{ "best_metric": 0.08489208633093526, "best_model_checkpoint": "resnet-50-finetuned-pokemon-finetuned-pokemon/checkpoint-2486", "epoch": 99.34640522875817, "eval_steps": 500, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 1.3157894736842106e-06, "loss": 1.1424, "step": 10 }, { "epoch": 0.52, "learning_rate": 2.631578947368421e-06, "loss": 1.1313, "step": 20 }, { "epoch": 0.78, "learning_rate": 3.9473684210526315e-06, "loss": 1.1894, "step": 30 }, { "epoch": 0.99, "eval_accuracy": 0.013669064748201438, "eval_loss": 9.211542129516602, "eval_runtime": 5.8886, "eval_samples_per_second": 236.048, "eval_steps_per_second": 7.472, "step": 38 }, { "epoch": 1.05, "learning_rate": 5.263157894736842e-06, "loss": 1.1934, "step": 40 }, { "epoch": 1.31, "learning_rate": 6.578947368421053e-06, "loss": 1.1333, "step": 50 }, { "epoch": 1.57, "learning_rate": 7.894736842105263e-06, "loss": 1.0844, "step": 60 }, { "epoch": 1.83, "learning_rate": 9.210526315789474e-06, "loss": 1.1389, "step": 70 }, { "epoch": 1.99, "eval_accuracy": 0.012949640287769784, "eval_loss": 9.252067565917969, "eval_runtime": 5.8088, "eval_samples_per_second": 239.291, "eval_steps_per_second": 7.575, "step": 76 }, { "epoch": 2.09, "learning_rate": 1.0526315789473684e-05, "loss": 1.1388, "step": 80 }, { "epoch": 2.35, "learning_rate": 1.1842105263157895e-05, "loss": 1.1188, "step": 90 }, { "epoch": 2.61, "learning_rate": 1.3157894736842106e-05, "loss": 1.1029, "step": 100 }, { "epoch": 2.88, "learning_rate": 1.4473684210526317e-05, "loss": 1.0432, "step": 110 }, { "epoch": 2.98, "eval_accuracy": 0.014388489208633094, "eval_loss": 9.476502418518066, "eval_runtime": 5.8873, "eval_samples_per_second": 236.102, "eval_steps_per_second": 7.474, "step": 114 }, { "epoch": 3.14, "learning_rate": 1.5789473684210526e-05, "loss": 1.124, "step": 120 }, { "epoch": 3.4, "learning_rate": 1.7105263157894737e-05, "loss": 1.062, "step": 130 }, { "epoch": 3.66, "learning_rate": 1.8421052631578947e-05, "loss": 1.1051, "step": 140 }, { "epoch": 3.92, "learning_rate": 1.9736842105263158e-05, "loss": 1.0625, "step": 150 }, { "epoch": 4.0, "eval_accuracy": 0.013669064748201438, "eval_loss": 9.7667818069458, "eval_runtime": 5.875, "eval_samples_per_second": 236.595, "eval_steps_per_second": 7.489, "step": 153 }, { "epoch": 4.18, "learning_rate": 2.105263157894737e-05, "loss": 1.1224, "step": 160 }, { "epoch": 4.44, "learning_rate": 2.236842105263158e-05, "loss": 1.0557, "step": 170 }, { "epoch": 4.71, "learning_rate": 2.368421052631579e-05, "loss": 1.0433, "step": 180 }, { "epoch": 4.97, "learning_rate": 2.5e-05, "loss": 1.0805, "step": 190 }, { "epoch": 4.99, "eval_accuracy": 0.013669064748201438, "eval_loss": 10.252641677856445, "eval_runtime": 5.9184, "eval_samples_per_second": 234.862, "eval_steps_per_second": 7.434, "step": 191 }, { "epoch": 5.23, "learning_rate": 2.6315789473684212e-05, "loss": 1.0663, "step": 200 }, { "epoch": 5.49, "learning_rate": 2.7631578947368426e-05, "loss": 1.0406, "step": 210 }, { "epoch": 5.75, "learning_rate": 2.8947368421052634e-05, "loss": 1.0353, "step": 220 }, { "epoch": 5.99, "eval_accuracy": 0.012949640287769784, "eval_loss": 10.323753356933594, "eval_runtime": 5.9005, "eval_samples_per_second": 235.572, "eval_steps_per_second": 7.457, "step": 229 }, { "epoch": 6.01, "learning_rate": 3.0263157894736844e-05, "loss": 1.0401, "step": 230 }, { "epoch": 6.27, "learning_rate": 3.157894736842105e-05, "loss": 1.0144, "step": 240 }, { "epoch": 6.54, "learning_rate": 3.289473684210527e-05, "loss": 1.0139, "step": 250 }, { "epoch": 6.8, "learning_rate": 3.421052631578947e-05, "loss": 0.9747, "step": 260 }, { "epoch": 6.98, "eval_accuracy": 0.016546762589928057, "eval_loss": 10.577895164489746, "eval_runtime": 6.0278, "eval_samples_per_second": 230.598, "eval_steps_per_second": 7.299, "step": 267 }, { "epoch": 7.06, "learning_rate": 3.5526315789473684e-05, "loss": 1.0199, "step": 270 }, { "epoch": 7.32, "learning_rate": 3.6842105263157895e-05, "loss": 0.9931, "step": 280 }, { "epoch": 7.58, "learning_rate": 3.815789473684211e-05, "loss": 0.9694, "step": 290 }, { "epoch": 7.84, "learning_rate": 3.9473684210526316e-05, "loss": 0.9708, "step": 300 }, { "epoch": 8.0, "eval_accuracy": 0.017985611510791366, "eval_loss": 10.745814323425293, "eval_runtime": 6.0966, "eval_samples_per_second": 227.997, "eval_steps_per_second": 7.217, "step": 306 }, { "epoch": 8.1, "learning_rate": 4.078947368421053e-05, "loss": 0.9497, "step": 310 }, { "epoch": 8.37, "learning_rate": 4.210526315789474e-05, "loss": 0.8812, "step": 320 }, { "epoch": 8.63, "learning_rate": 4.342105263157895e-05, "loss": 0.8991, "step": 330 }, { "epoch": 8.89, "learning_rate": 4.473684210526316e-05, "loss": 0.8886, "step": 340 }, { "epoch": 8.99, "eval_accuracy": 0.019424460431654675, "eval_loss": 11.007186889648438, "eval_runtime": 6.1583, "eval_samples_per_second": 225.713, "eval_steps_per_second": 7.145, "step": 344 }, { "epoch": 9.15, "learning_rate": 4.605263157894737e-05, "loss": 0.8914, "step": 350 }, { "epoch": 9.41, "learning_rate": 4.736842105263158e-05, "loss": 0.8727, "step": 360 }, { "epoch": 9.67, "learning_rate": 4.868421052631579e-05, "loss": 0.8327, "step": 370 }, { "epoch": 9.93, "learning_rate": 5e-05, "loss": 0.8408, "step": 380 }, { "epoch": 9.99, "eval_accuracy": 0.022302158273381296, "eval_loss": 11.317117691040039, "eval_runtime": 6.1694, "eval_samples_per_second": 225.305, "eval_steps_per_second": 7.132, "step": 382 }, { "epoch": 10.2, "learning_rate": 4.985380116959065e-05, "loss": 0.8767, "step": 390 }, { "epoch": 10.46, "learning_rate": 4.970760233918128e-05, "loss": 0.801, "step": 400 }, { "epoch": 10.72, "learning_rate": 4.956140350877193e-05, "loss": 0.804, "step": 410 }, { "epoch": 10.98, "learning_rate": 4.941520467836258e-05, "loss": 0.802, "step": 420 }, { "epoch": 10.98, "eval_accuracy": 0.02446043165467626, "eval_loss": 11.554548263549805, "eval_runtime": 6.1954, "eval_samples_per_second": 224.36, "eval_steps_per_second": 7.102, "step": 420 }, { "epoch": 11.24, "learning_rate": 4.926900584795322e-05, "loss": 0.8308, "step": 430 }, { "epoch": 11.5, "learning_rate": 4.912280701754386e-05, "loss": 0.7814, "step": 440 }, { "epoch": 11.76, "learning_rate": 4.8976608187134504e-05, "loss": 0.7903, "step": 450 }, { "epoch": 12.0, "eval_accuracy": 0.02877697841726619, "eval_loss": 11.77219295501709, "eval_runtime": 6.2667, "eval_samples_per_second": 221.808, "eval_steps_per_second": 7.021, "step": 459 }, { "epoch": 12.03, "learning_rate": 4.883040935672515e-05, "loss": 0.8129, "step": 460 }, { "epoch": 12.29, "learning_rate": 4.868421052631579e-05, "loss": 0.7376, "step": 470 }, { "epoch": 12.55, "learning_rate": 4.853801169590643e-05, "loss": 0.7731, "step": 480 }, { "epoch": 12.81, "learning_rate": 4.839181286549708e-05, "loss": 0.7553, "step": 490 }, { "epoch": 12.99, "eval_accuracy": 0.03525179856115108, "eval_loss": 11.983402252197266, "eval_runtime": 6.562, "eval_samples_per_second": 211.826, "eval_steps_per_second": 6.705, "step": 497 }, { "epoch": 13.07, "learning_rate": 4.824561403508772e-05, "loss": 0.744, "step": 500 }, { "epoch": 13.33, "learning_rate": 4.8099415204678366e-05, "loss": 0.7221, "step": 510 }, { "epoch": 13.59, "learning_rate": 4.7953216374269006e-05, "loss": 0.728, "step": 520 }, { "epoch": 13.86, "learning_rate": 4.780701754385965e-05, "loss": 0.7413, "step": 530 }, { "epoch": 13.99, "eval_accuracy": 0.04460431654676259, "eval_loss": 11.981474876403809, "eval_runtime": 6.1295, "eval_samples_per_second": 226.771, "eval_steps_per_second": 7.178, "step": 535 }, { "epoch": 14.12, "learning_rate": 4.7660818713450294e-05, "loss": 0.6931, "step": 540 }, { "epoch": 14.38, "learning_rate": 4.751461988304094e-05, "loss": 0.6729, "step": 550 }, { "epoch": 14.64, "learning_rate": 4.736842105263158e-05, "loss": 0.6931, "step": 560 }, { "epoch": 14.9, "learning_rate": 4.722222222222222e-05, "loss": 0.6272, "step": 570 }, { "epoch": 14.98, "eval_accuracy": 0.04964028776978417, "eval_loss": 12.08712387084961, "eval_runtime": 6.1408, "eval_samples_per_second": 226.354, "eval_steps_per_second": 7.165, "step": 573 }, { "epoch": 15.16, "learning_rate": 4.707602339181287e-05, "loss": 0.6859, "step": 580 }, { "epoch": 15.42, "learning_rate": 4.6929824561403515e-05, "loss": 0.6595, "step": 590 }, { "epoch": 15.69, "learning_rate": 4.678362573099415e-05, "loss": 0.6183, "step": 600 }, { "epoch": 15.95, "learning_rate": 4.6637426900584796e-05, "loss": 0.6944, "step": 610 }, { "epoch": 16.0, "eval_accuracy": 0.058992805755395686, "eval_loss": 12.371334075927734, "eval_runtime": 6.0154, "eval_samples_per_second": 231.073, "eval_steps_per_second": 7.315, "step": 612 }, { "epoch": 16.21, "learning_rate": 4.649122807017544e-05, "loss": 0.6187, "step": 620 }, { "epoch": 16.47, "learning_rate": 4.634502923976608e-05, "loss": 0.6033, "step": 630 }, { "epoch": 16.73, "learning_rate": 4.619883040935672e-05, "loss": 0.6347, "step": 640 }, { "epoch": 16.99, "learning_rate": 4.605263157894737e-05, "loss": 0.6322, "step": 650 }, { "epoch": 16.99, "eval_accuracy": 0.05539568345323741, "eval_loss": 12.682552337646484, "eval_runtime": 5.924, "eval_samples_per_second": 234.639, "eval_steps_per_second": 7.427, "step": 650 }, { "epoch": 17.25, "learning_rate": 4.590643274853802e-05, "loss": 0.6833, "step": 660 }, { "epoch": 17.52, "learning_rate": 4.576023391812866e-05, "loss": 0.6246, "step": 670 }, { "epoch": 17.78, "learning_rate": 4.56140350877193e-05, "loss": 0.6131, "step": 680 }, { "epoch": 17.99, "eval_accuracy": 0.06115107913669065, "eval_loss": 12.481949806213379, "eval_runtime": 5.8554, "eval_samples_per_second": 237.388, "eval_steps_per_second": 7.514, "step": 688 }, { "epoch": 18.04, "learning_rate": 4.5467836257309945e-05, "loss": 0.5705, "step": 690 }, { "epoch": 18.3, "learning_rate": 4.5321637426900585e-05, "loss": 0.5652, "step": 700 }, { "epoch": 18.56, "learning_rate": 4.517543859649123e-05, "loss": 0.5502, "step": 710 }, { "epoch": 18.82, "learning_rate": 4.502923976608187e-05, "loss": 0.5916, "step": 720 }, { "epoch": 18.98, "eval_accuracy": 0.06474820143884892, "eval_loss": 12.624631881713867, "eval_runtime": 5.8769, "eval_samples_per_second": 236.519, "eval_steps_per_second": 7.487, "step": 726 }, { "epoch": 19.08, "learning_rate": 4.488304093567251e-05, "loss": 0.6098, "step": 730 }, { "epoch": 19.35, "learning_rate": 4.473684210526316e-05, "loss": 0.5802, "step": 740 }, { "epoch": 19.61, "learning_rate": 4.4590643274853806e-05, "loss": 0.4978, "step": 750 }, { "epoch": 19.87, "learning_rate": 4.4444444444444447e-05, "loss": 0.5094, "step": 760 }, { "epoch": 20.0, "eval_accuracy": 0.06690647482014389, "eval_loss": 12.664143562316895, "eval_runtime": 5.9348, "eval_samples_per_second": 234.211, "eval_steps_per_second": 7.414, "step": 765 }, { "epoch": 20.13, "learning_rate": 4.429824561403509e-05, "loss": 0.5806, "step": 770 }, { "epoch": 20.39, "learning_rate": 4.4152046783625734e-05, "loss": 0.5324, "step": 780 }, { "epoch": 20.65, "learning_rate": 4.400584795321638e-05, "loss": 0.5366, "step": 790 }, { "epoch": 20.92, "learning_rate": 4.3859649122807014e-05, "loss": 0.5201, "step": 800 }, { "epoch": 20.99, "eval_accuracy": 0.06618705035971223, "eval_loss": 12.886053085327148, "eval_runtime": 5.9484, "eval_samples_per_second": 233.676, "eval_steps_per_second": 7.397, "step": 803 }, { "epoch": 21.18, "learning_rate": 4.371345029239766e-05, "loss": 0.5913, "step": 810 }, { "epoch": 21.44, "learning_rate": 4.356725146198831e-05, "loss": 0.5353, "step": 820 }, { "epoch": 21.7, "learning_rate": 4.342105263157895e-05, "loss": 0.542, "step": 830 }, { "epoch": 21.96, "learning_rate": 4.327485380116959e-05, "loss": 0.4731, "step": 840 }, { "epoch": 21.99, "eval_accuracy": 0.06546762589928058, "eval_loss": 12.743083000183105, "eval_runtime": 5.8758, "eval_samples_per_second": 236.564, "eval_steps_per_second": 7.488, "step": 841 }, { "epoch": 22.22, "learning_rate": 4.3128654970760236e-05, "loss": 0.4812, "step": 850 }, { "epoch": 22.48, "learning_rate": 4.298245614035088e-05, "loss": 0.5204, "step": 860 }, { "epoch": 22.75, "learning_rate": 4.283625730994152e-05, "loss": 0.5132, "step": 870 }, { "epoch": 22.98, "eval_accuracy": 0.07050359712230216, "eval_loss": 12.778566360473633, "eval_runtime": 5.856, "eval_samples_per_second": 237.364, "eval_steps_per_second": 7.514, "step": 879 }, { "epoch": 23.01, "learning_rate": 4.269005847953216e-05, "loss": 0.5084, "step": 880 }, { "epoch": 23.27, "learning_rate": 4.254385964912281e-05, "loss": 0.5183, "step": 890 }, { "epoch": 23.53, "learning_rate": 4.239766081871345e-05, "loss": 0.5286, "step": 900 }, { "epoch": 23.79, "learning_rate": 4.22514619883041e-05, "loss": 0.5036, "step": 910 }, { "epoch": 24.0, "eval_accuracy": 0.07266187050359713, "eval_loss": 12.998967170715332, "eval_runtime": 5.8521, "eval_samples_per_second": 237.522, "eval_steps_per_second": 7.519, "step": 918 }, { "epoch": 24.05, "learning_rate": 4.210526315789474e-05, "loss": 0.4826, "step": 920 }, { "epoch": 24.31, "learning_rate": 4.195906432748538e-05, "loss": 0.5105, "step": 930 }, { "epoch": 24.58, "learning_rate": 4.1812865497076025e-05, "loss": 0.5133, "step": 940 }, { "epoch": 24.84, "learning_rate": 4.166666666666667e-05, "loss": 0.4863, "step": 950 }, { "epoch": 24.99, "eval_accuracy": 0.07266187050359713, "eval_loss": 13.041873931884766, "eval_runtime": 7.3809, "eval_samples_per_second": 188.323, "eval_steps_per_second": 5.961, "step": 956 }, { "epoch": 25.1, "learning_rate": 4.152046783625731e-05, "loss": 0.5028, "step": 960 }, { "epoch": 25.36, "learning_rate": 4.137426900584795e-05, "loss": 0.5185, "step": 970 }, { "epoch": 25.62, "learning_rate": 4.12280701754386e-05, "loss": 0.484, "step": 980 }, { "epoch": 25.88, "learning_rate": 4.1081871345029247e-05, "loss": 0.4852, "step": 990 }, { "epoch": 25.99, "eval_accuracy": 0.07338129496402877, "eval_loss": 13.057332038879395, "eval_runtime": 6.099, "eval_samples_per_second": 227.908, "eval_steps_per_second": 7.214, "step": 994 }, { "epoch": 26.14, "learning_rate": 4.093567251461988e-05, "loss": 0.5227, "step": 1000 }, { "epoch": 26.41, "learning_rate": 4.078947368421053e-05, "loss": 0.4827, "step": 1010 }, { "epoch": 26.67, "learning_rate": 4.0643274853801174e-05, "loss": 0.5091, "step": 1020 }, { "epoch": 26.93, "learning_rate": 4.0497076023391814e-05, "loss": 0.4983, "step": 1030 }, { "epoch": 26.98, "eval_accuracy": 0.07194244604316546, "eval_loss": 13.131017684936523, "eval_runtime": 6.2465, "eval_samples_per_second": 222.525, "eval_steps_per_second": 7.044, "step": 1032 }, { "epoch": 27.19, "learning_rate": 4.0350877192982455e-05, "loss": 0.4918, "step": 1040 }, { "epoch": 27.45, "learning_rate": 4.02046783625731e-05, "loss": 0.4687, "step": 1050 }, { "epoch": 27.71, "learning_rate": 4.005847953216375e-05, "loss": 0.4687, "step": 1060 }, { "epoch": 27.97, "learning_rate": 3.991228070175439e-05, "loss": 0.459, "step": 1070 }, { "epoch": 28.0, "eval_accuracy": 0.07482014388489208, "eval_loss": 13.068760871887207, "eval_runtime": 6.2511, "eval_samples_per_second": 222.36, "eval_steps_per_second": 7.039, "step": 1071 }, { "epoch": 28.24, "learning_rate": 3.976608187134503e-05, "loss": 0.4987, "step": 1080 }, { "epoch": 28.5, "learning_rate": 3.9619883040935676e-05, "loss": 0.4874, "step": 1090 }, { "epoch": 28.76, "learning_rate": 3.9473684210526316e-05, "loss": 0.4556, "step": 1100 }, { "epoch": 28.99, "eval_accuracy": 0.07482014388489208, "eval_loss": 13.41281509399414, "eval_runtime": 6.1781, "eval_samples_per_second": 224.989, "eval_steps_per_second": 7.122, "step": 1109 }, { "epoch": 29.02, "learning_rate": 3.932748538011696e-05, "loss": 0.47, "step": 1110 }, { "epoch": 29.28, "learning_rate": 3.9181286549707604e-05, "loss": 0.4709, "step": 1120 }, { "epoch": 29.54, "learning_rate": 3.9035087719298244e-05, "loss": 0.4499, "step": 1130 }, { "epoch": 29.8, "learning_rate": 3.888888888888889e-05, "loss": 0.4729, "step": 1140 }, { "epoch": 29.99, "eval_accuracy": 0.07410071942446043, "eval_loss": 13.353046417236328, "eval_runtime": 6.179, "eval_samples_per_second": 224.956, "eval_steps_per_second": 7.121, "step": 1147 }, { "epoch": 30.07, "learning_rate": 3.874269005847954e-05, "loss": 0.4329, "step": 1150 }, { "epoch": 30.33, "learning_rate": 3.859649122807018e-05, "loss": 0.4418, "step": 1160 }, { "epoch": 30.59, "learning_rate": 3.845029239766082e-05, "loss": 0.4428, "step": 1170 }, { "epoch": 30.85, "learning_rate": 3.8304093567251465e-05, "loss": 0.4659, "step": 1180 }, { "epoch": 30.98, "eval_accuracy": 0.07625899280575539, "eval_loss": 13.230795860290527, "eval_runtime": 6.1942, "eval_samples_per_second": 224.404, "eval_steps_per_second": 7.103, "step": 1185 }, { "epoch": 31.11, "learning_rate": 3.815789473684211e-05, "loss": 0.436, "step": 1190 }, { "epoch": 31.37, "learning_rate": 3.8011695906432746e-05, "loss": 0.4484, "step": 1200 }, { "epoch": 31.63, "learning_rate": 3.786549707602339e-05, "loss": 0.4099, "step": 1210 }, { "epoch": 31.9, "learning_rate": 3.771929824561404e-05, "loss": 0.4337, "step": 1220 }, { "epoch": 32.0, "eval_accuracy": 0.07482014388489208, "eval_loss": 13.32636833190918, "eval_runtime": 6.1367, "eval_samples_per_second": 226.505, "eval_steps_per_second": 7.17, "step": 1224 }, { "epoch": 32.16, "learning_rate": 3.757309941520468e-05, "loss": 0.4737, "step": 1230 }, { "epoch": 32.42, "learning_rate": 3.742690058479532e-05, "loss": 0.4133, "step": 1240 }, { "epoch": 32.68, "learning_rate": 3.728070175438597e-05, "loss": 0.4175, "step": 1250 }, { "epoch": 32.94, "learning_rate": 3.713450292397661e-05, "loss": 0.456, "step": 1260 }, { "epoch": 32.99, "eval_accuracy": 0.07410071942446043, "eval_loss": 13.350556373596191, "eval_runtime": 6.1533, "eval_samples_per_second": 225.896, "eval_steps_per_second": 7.151, "step": 1262 }, { "epoch": 33.2, "learning_rate": 3.6988304093567254e-05, "loss": 0.4446, "step": 1270 }, { "epoch": 33.46, "learning_rate": 3.6842105263157895e-05, "loss": 0.4216, "step": 1280 }, { "epoch": 33.73, "learning_rate": 3.669590643274854e-05, "loss": 0.4576, "step": 1290 }, { "epoch": 33.99, "learning_rate": 3.654970760233918e-05, "loss": 0.4423, "step": 1300 }, { "epoch": 33.99, "eval_accuracy": 0.07841726618705036, "eval_loss": 13.360701560974121, "eval_runtime": 5.9537, "eval_samples_per_second": 233.467, "eval_steps_per_second": 7.39, "step": 1300 }, { "epoch": 34.25, "learning_rate": 3.640350877192983e-05, "loss": 0.4469, "step": 1310 }, { "epoch": 34.51, "learning_rate": 3.625730994152047e-05, "loss": 0.4226, "step": 1320 }, { "epoch": 34.77, "learning_rate": 3.611111111111111e-05, "loss": 0.4037, "step": 1330 }, { "epoch": 34.98, "eval_accuracy": 0.07338129496402877, "eval_loss": 13.252113342285156, "eval_runtime": 6.1434, "eval_samples_per_second": 226.26, "eval_steps_per_second": 7.162, "step": 1338 }, { "epoch": 35.03, "learning_rate": 3.5964912280701756e-05, "loss": 0.4277, "step": 1340 }, { "epoch": 35.29, "learning_rate": 3.5818713450292403e-05, "loss": 0.3596, "step": 1350 }, { "epoch": 35.56, "learning_rate": 3.5672514619883044e-05, "loss": 0.4047, "step": 1360 }, { "epoch": 35.82, "learning_rate": 3.5526315789473684e-05, "loss": 0.3891, "step": 1370 }, { "epoch": 36.0, "eval_accuracy": 0.0776978417266187, "eval_loss": 13.370182991027832, "eval_runtime": 6.0896, "eval_samples_per_second": 228.258, "eval_steps_per_second": 7.225, "step": 1377 }, { "epoch": 36.08, "learning_rate": 3.538011695906433e-05, "loss": 0.4298, "step": 1380 }, { "epoch": 36.34, "learning_rate": 3.523391812865498e-05, "loss": 0.366, "step": 1390 }, { "epoch": 36.6, "learning_rate": 3.508771929824561e-05, "loss": 0.4485, "step": 1400 }, { "epoch": 36.86, "learning_rate": 3.494152046783626e-05, "loss": 0.3992, "step": 1410 }, { "epoch": 36.99, "eval_accuracy": 0.0776978417266187, "eval_loss": 13.476212501525879, "eval_runtime": 6.0276, "eval_samples_per_second": 230.606, "eval_steps_per_second": 7.3, "step": 1415 }, { "epoch": 37.12, "learning_rate": 3.4795321637426905e-05, "loss": 0.3786, "step": 1420 }, { "epoch": 37.39, "learning_rate": 3.4649122807017546e-05, "loss": 0.3805, "step": 1430 }, { "epoch": 37.65, "learning_rate": 3.4502923976608186e-05, "loss": 0.398, "step": 1440 }, { "epoch": 37.91, "learning_rate": 3.435672514619883e-05, "loss": 0.4014, "step": 1450 }, { "epoch": 37.99, "eval_accuracy": 0.07913669064748201, "eval_loss": 13.538166999816895, "eval_runtime": 5.9007, "eval_samples_per_second": 235.564, "eval_steps_per_second": 7.457, "step": 1453 }, { "epoch": 38.17, "learning_rate": 3.421052631578947e-05, "loss": 0.4176, "step": 1460 }, { "epoch": 38.43, "learning_rate": 3.406432748538012e-05, "loss": 0.3663, "step": 1470 }, { "epoch": 38.69, "learning_rate": 3.391812865497076e-05, "loss": 0.3658, "step": 1480 }, { "epoch": 38.95, "learning_rate": 3.377192982456141e-05, "loss": 0.3549, "step": 1490 }, { "epoch": 38.98, "eval_accuracy": 0.07913669064748201, "eval_loss": 13.555007934570312, "eval_runtime": 5.8979, "eval_samples_per_second": 235.677, "eval_steps_per_second": 7.46, "step": 1491 }, { "epoch": 39.22, "learning_rate": 3.362573099415205e-05, "loss": 0.4037, "step": 1500 }, { "epoch": 39.48, "learning_rate": 3.3479532163742695e-05, "loss": 0.3612, "step": 1510 }, { "epoch": 39.74, "learning_rate": 3.3333333333333335e-05, "loss": 0.3879, "step": 1520 }, { "epoch": 40.0, "learning_rate": 3.3187134502923975e-05, "loss": 0.4048, "step": 1530 }, { "epoch": 40.0, "eval_accuracy": 0.07985611510791367, "eval_loss": 13.640563011169434, "eval_runtime": 5.9909, "eval_samples_per_second": 232.02, "eval_steps_per_second": 7.345, "step": 1530 }, { "epoch": 40.26, "learning_rate": 3.304093567251462e-05, "loss": 0.3897, "step": 1540 }, { "epoch": 40.52, "learning_rate": 3.289473684210527e-05, "loss": 0.3554, "step": 1550 }, { "epoch": 40.78, "learning_rate": 3.274853801169591e-05, "loss": 0.3711, "step": 1560 }, { "epoch": 40.99, "eval_accuracy": 0.0776978417266187, "eval_loss": 13.51199722290039, "eval_runtime": 5.8868, "eval_samples_per_second": 236.12, "eval_steps_per_second": 7.474, "step": 1568 }, { "epoch": 41.05, "learning_rate": 3.260233918128655e-05, "loss": 0.4245, "step": 1570 }, { "epoch": 41.31, "learning_rate": 3.24561403508772e-05, "loss": 0.4058, "step": 1580 }, { "epoch": 41.57, "learning_rate": 3.230994152046784e-05, "loss": 0.3726, "step": 1590 }, { "epoch": 41.83, "learning_rate": 3.216374269005848e-05, "loss": 0.3834, "step": 1600 }, { "epoch": 41.99, "eval_accuracy": 0.07985611510791367, "eval_loss": 13.923029899597168, "eval_runtime": 5.9072, "eval_samples_per_second": 235.305, "eval_steps_per_second": 7.448, "step": 1606 }, { "epoch": 42.09, "learning_rate": 3.2017543859649124e-05, "loss": 0.4307, "step": 1610 }, { "epoch": 42.35, "learning_rate": 3.187134502923977e-05, "loss": 0.3423, "step": 1620 }, { "epoch": 42.61, "learning_rate": 3.172514619883041e-05, "loss": 0.3652, "step": 1630 }, { "epoch": 42.88, "learning_rate": 3.157894736842105e-05, "loss": 0.3475, "step": 1640 }, { "epoch": 42.98, "eval_accuracy": 0.07913669064748201, "eval_loss": 13.860240936279297, "eval_runtime": 5.9793, "eval_samples_per_second": 232.468, "eval_steps_per_second": 7.359, "step": 1644 }, { "epoch": 43.14, "learning_rate": 3.14327485380117e-05, "loss": 0.3636, "step": 1650 }, { "epoch": 43.4, "learning_rate": 3.128654970760234e-05, "loss": 0.3586, "step": 1660 }, { "epoch": 43.66, "learning_rate": 3.1140350877192986e-05, "loss": 0.3643, "step": 1670 }, { "epoch": 43.92, "learning_rate": 3.0994152046783626e-05, "loss": 0.3465, "step": 1680 }, { "epoch": 44.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 13.69305419921875, "eval_runtime": 5.9121, "eval_samples_per_second": 235.109, "eval_steps_per_second": 7.442, "step": 1683 }, { "epoch": 44.18, "learning_rate": 3.084795321637427e-05, "loss": 0.412, "step": 1690 }, { "epoch": 44.44, "learning_rate": 3.0701754385964913e-05, "loss": 0.3521, "step": 1700 }, { "epoch": 44.71, "learning_rate": 3.055555555555556e-05, "loss": 0.3847, "step": 1710 }, { "epoch": 44.97, "learning_rate": 3.0409356725146197e-05, "loss": 0.3682, "step": 1720 }, { "epoch": 44.99, "eval_accuracy": 0.07841726618705036, "eval_loss": 13.777403831481934, "eval_runtime": 6.1334, "eval_samples_per_second": 226.629, "eval_steps_per_second": 7.174, "step": 1721 }, { "epoch": 45.23, "learning_rate": 3.0263157894736844e-05, "loss": 0.4067, "step": 1730 }, { "epoch": 45.49, "learning_rate": 3.0116959064327488e-05, "loss": 0.3724, "step": 1740 }, { "epoch": 45.75, "learning_rate": 2.997076023391813e-05, "loss": 0.3613, "step": 1750 }, { "epoch": 45.99, "eval_accuracy": 0.07913669064748201, "eval_loss": 14.02348518371582, "eval_runtime": 6.1306, "eval_samples_per_second": 226.73, "eval_steps_per_second": 7.177, "step": 1759 }, { "epoch": 46.01, "learning_rate": 2.9824561403508772e-05, "loss": 0.3766, "step": 1760 }, { "epoch": 46.27, "learning_rate": 2.9678362573099415e-05, "loss": 0.4555, "step": 1770 }, { "epoch": 46.54, "learning_rate": 2.9532163742690062e-05, "loss": 0.3324, "step": 1780 }, { "epoch": 46.8, "learning_rate": 2.9385964912280706e-05, "loss": 0.368, "step": 1790 }, { "epoch": 46.98, "eval_accuracy": 0.08129496402877698, "eval_loss": 13.92888355255127, "eval_runtime": 6.1887, "eval_samples_per_second": 224.602, "eval_steps_per_second": 7.11, "step": 1797 }, { "epoch": 47.06, "learning_rate": 2.9239766081871346e-05, "loss": 0.2888, "step": 1800 }, { "epoch": 47.32, "learning_rate": 2.909356725146199e-05, "loss": 0.3538, "step": 1810 }, { "epoch": 47.58, "learning_rate": 2.8947368421052634e-05, "loss": 0.3298, "step": 1820 }, { "epoch": 47.84, "learning_rate": 2.8801169590643277e-05, "loss": 0.3961, "step": 1830 }, { "epoch": 48.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.254853248596191, "eval_runtime": 6.2277, "eval_samples_per_second": 223.197, "eval_steps_per_second": 7.065, "step": 1836 }, { "epoch": 48.1, "learning_rate": 2.8654970760233917e-05, "loss": 0.3433, "step": 1840 }, { "epoch": 48.37, "learning_rate": 2.850877192982456e-05, "loss": 0.366, "step": 1850 }, { "epoch": 48.63, "learning_rate": 2.8362573099415208e-05, "loss": 0.3278, "step": 1860 }, { "epoch": 48.89, "learning_rate": 2.821637426900585e-05, "loss": 0.365, "step": 1870 }, { "epoch": 48.99, "eval_accuracy": 0.08129496402877698, "eval_loss": 14.111425399780273, "eval_runtime": 6.2596, "eval_samples_per_second": 222.059, "eval_steps_per_second": 7.029, "step": 1874 }, { "epoch": 49.15, "learning_rate": 2.8070175438596492e-05, "loss": 0.3721, "step": 1880 }, { "epoch": 49.41, "learning_rate": 2.7923976608187135e-05, "loss": 0.3683, "step": 1890 }, { "epoch": 49.67, "learning_rate": 2.777777777777778e-05, "loss": 0.3506, "step": 1900 }, { "epoch": 49.93, "learning_rate": 2.7631578947368426e-05, "loss": 0.3259, "step": 1910 }, { "epoch": 49.99, "eval_accuracy": 0.08057553956834532, "eval_loss": 13.97095012664795, "eval_runtime": 6.1784, "eval_samples_per_second": 224.977, "eval_steps_per_second": 7.122, "step": 1912 }, { "epoch": 50.2, "learning_rate": 2.7485380116959063e-05, "loss": 0.3456, "step": 1920 }, { "epoch": 50.46, "learning_rate": 2.733918128654971e-05, "loss": 0.3499, "step": 1930 }, { "epoch": 50.72, "learning_rate": 2.7192982456140354e-05, "loss": 0.3255, "step": 1940 }, { "epoch": 50.98, "learning_rate": 2.7046783625730997e-05, "loss": 0.2998, "step": 1950 }, { "epoch": 50.98, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.028827667236328, "eval_runtime": 6.2989, "eval_samples_per_second": 220.674, "eval_steps_per_second": 6.985, "step": 1950 }, { "epoch": 51.24, "learning_rate": 2.6900584795321637e-05, "loss": 0.3892, "step": 1960 }, { "epoch": 51.5, "learning_rate": 2.675438596491228e-05, "loss": 0.336, "step": 1970 }, { "epoch": 51.76, "learning_rate": 2.6608187134502928e-05, "loss": 0.3203, "step": 1980 }, { "epoch": 52.0, "eval_accuracy": 0.08129496402877698, "eval_loss": 13.939803123474121, "eval_runtime": 6.0691, "eval_samples_per_second": 229.028, "eval_steps_per_second": 7.25, "step": 1989 }, { "epoch": 52.03, "learning_rate": 2.6461988304093572e-05, "loss": 0.3512, "step": 1990 }, { "epoch": 52.29, "learning_rate": 2.6315789473684212e-05, "loss": 0.3178, "step": 2000 }, { "epoch": 52.55, "learning_rate": 2.6169590643274856e-05, "loss": 0.3224, "step": 2010 }, { "epoch": 52.81, "learning_rate": 2.60233918128655e-05, "loss": 0.3104, "step": 2020 }, { "epoch": 52.99, "eval_accuracy": 0.08201438848920864, "eval_loss": 14.025542259216309, "eval_runtime": 5.9734, "eval_samples_per_second": 232.696, "eval_steps_per_second": 7.366, "step": 2027 }, { "epoch": 53.07, "learning_rate": 2.5877192982456143e-05, "loss": 0.3159, "step": 2030 }, { "epoch": 53.33, "learning_rate": 2.5730994152046783e-05, "loss": 0.3305, "step": 2040 }, { "epoch": 53.59, "learning_rate": 2.5584795321637427e-05, "loss": 0.3085, "step": 2050 }, { "epoch": 53.86, "learning_rate": 2.5438596491228074e-05, "loss": 0.3232, "step": 2060 }, { "epoch": 53.99, "eval_accuracy": 0.08273381294964029, "eval_loss": 13.93545150756836, "eval_runtime": 6.1249, "eval_samples_per_second": 226.943, "eval_steps_per_second": 7.184, "step": 2065 }, { "epoch": 54.12, "learning_rate": 2.5292397660818717e-05, "loss": 0.3035, "step": 2070 }, { "epoch": 54.38, "learning_rate": 2.5146198830409358e-05, "loss": 0.3235, "step": 2080 }, { "epoch": 54.64, "learning_rate": 2.5e-05, "loss": 0.3359, "step": 2090 }, { "epoch": 54.9, "learning_rate": 2.485380116959064e-05, "loss": 0.3521, "step": 2100 }, { "epoch": 54.98, "eval_accuracy": 0.08057553956834532, "eval_loss": 13.862702369689941, "eval_runtime": 5.97, "eval_samples_per_second": 232.832, "eval_steps_per_second": 7.37, "step": 2103 }, { "epoch": 55.16, "learning_rate": 2.470760233918129e-05, "loss": 0.3049, "step": 2110 }, { "epoch": 55.42, "learning_rate": 2.456140350877193e-05, "loss": 0.3011, "step": 2120 }, { "epoch": 55.69, "learning_rate": 2.4415204678362576e-05, "loss": 0.2945, "step": 2130 }, { "epoch": 55.95, "learning_rate": 2.4269005847953216e-05, "loss": 0.3322, "step": 2140 }, { "epoch": 56.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.01791000366211, "eval_runtime": 5.9414, "eval_samples_per_second": 233.951, "eval_steps_per_second": 7.406, "step": 2142 }, { "epoch": 56.21, "learning_rate": 2.412280701754386e-05, "loss": 0.2962, "step": 2150 }, { "epoch": 56.47, "learning_rate": 2.3976608187134503e-05, "loss": 0.3282, "step": 2160 }, { "epoch": 56.73, "learning_rate": 2.3830409356725147e-05, "loss": 0.2965, "step": 2170 }, { "epoch": 56.99, "learning_rate": 2.368421052631579e-05, "loss": 0.3129, "step": 2180 }, { "epoch": 56.99, "eval_accuracy": 0.08201438848920864, "eval_loss": 13.96402359008789, "eval_runtime": 5.8814, "eval_samples_per_second": 236.34, "eval_steps_per_second": 7.481, "step": 2180 }, { "epoch": 57.25, "learning_rate": 2.3538011695906434e-05, "loss": 0.3487, "step": 2190 }, { "epoch": 57.52, "learning_rate": 2.3391812865497074e-05, "loss": 0.3089, "step": 2200 }, { "epoch": 57.78, "learning_rate": 2.324561403508772e-05, "loss": 0.3159, "step": 2210 }, { "epoch": 57.99, "eval_accuracy": 0.07985611510791367, "eval_loss": 14.199702262878418, "eval_runtime": 6.0594, "eval_samples_per_second": 229.395, "eval_steps_per_second": 7.261, "step": 2218 }, { "epoch": 58.04, "learning_rate": 2.309941520467836e-05, "loss": 0.34, "step": 2220 }, { "epoch": 58.3, "learning_rate": 2.295321637426901e-05, "loss": 0.3108, "step": 2230 }, { "epoch": 58.56, "learning_rate": 2.280701754385965e-05, "loss": 0.3316, "step": 2240 }, { "epoch": 58.82, "learning_rate": 2.2660818713450292e-05, "loss": 0.3118, "step": 2250 }, { "epoch": 58.98, "eval_accuracy": 0.08201438848920864, "eval_loss": 14.163899421691895, "eval_runtime": 5.9877, "eval_samples_per_second": 232.144, "eval_steps_per_second": 7.348, "step": 2256 }, { "epoch": 59.08, "learning_rate": 2.2514619883040936e-05, "loss": 0.321, "step": 2260 }, { "epoch": 59.35, "learning_rate": 2.236842105263158e-05, "loss": 0.2972, "step": 2270 }, { "epoch": 59.61, "learning_rate": 2.2222222222222223e-05, "loss": 0.2783, "step": 2280 }, { "epoch": 59.87, "learning_rate": 2.2076023391812867e-05, "loss": 0.3196, "step": 2290 }, { "epoch": 60.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.033405303955078, "eval_runtime": 6.077, "eval_samples_per_second": 228.732, "eval_steps_per_second": 7.24, "step": 2295 }, { "epoch": 60.13, "learning_rate": 2.1929824561403507e-05, "loss": 0.3396, "step": 2300 }, { "epoch": 60.39, "learning_rate": 2.1783625730994154e-05, "loss": 0.3065, "step": 2310 }, { "epoch": 60.65, "learning_rate": 2.1637426900584794e-05, "loss": 0.3169, "step": 2320 }, { "epoch": 60.92, "learning_rate": 2.149122807017544e-05, "loss": 0.301, "step": 2330 }, { "epoch": 60.99, "eval_accuracy": 0.08201438848920864, "eval_loss": 13.995382308959961, "eval_runtime": 6.1902, "eval_samples_per_second": 224.549, "eval_steps_per_second": 7.108, "step": 2333 }, { "epoch": 61.18, "learning_rate": 2.134502923976608e-05, "loss": 0.3011, "step": 2340 }, { "epoch": 61.44, "learning_rate": 2.1198830409356725e-05, "loss": 0.2741, "step": 2350 }, { "epoch": 61.7, "learning_rate": 2.105263157894737e-05, "loss": 0.2713, "step": 2360 }, { "epoch": 61.96, "learning_rate": 2.0906432748538013e-05, "loss": 0.3142, "step": 2370 }, { "epoch": 61.99, "eval_accuracy": 0.07985611510791367, "eval_loss": 14.143210411071777, "eval_runtime": 6.2246, "eval_samples_per_second": 223.307, "eval_steps_per_second": 7.069, "step": 2371 }, { "epoch": 62.22, "learning_rate": 2.0760233918128656e-05, "loss": 0.2731, "step": 2380 }, { "epoch": 62.48, "learning_rate": 2.06140350877193e-05, "loss": 0.2847, "step": 2390 }, { "epoch": 62.75, "learning_rate": 2.046783625730994e-05, "loss": 0.3192, "step": 2400 }, { "epoch": 62.98, "eval_accuracy": 0.07841726618705036, "eval_loss": 14.026898384094238, "eval_runtime": 6.2695, "eval_samples_per_second": 221.708, "eval_steps_per_second": 7.018, "step": 2409 }, { "epoch": 63.01, "learning_rate": 2.0321637426900587e-05, "loss": 0.3586, "step": 2410 }, { "epoch": 63.27, "learning_rate": 2.0175438596491227e-05, "loss": 0.3255, "step": 2420 }, { "epoch": 63.53, "learning_rate": 2.0029239766081874e-05, "loss": 0.2821, "step": 2430 }, { "epoch": 63.79, "learning_rate": 1.9883040935672515e-05, "loss": 0.3342, "step": 2440 }, { "epoch": 64.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.044992446899414, "eval_runtime": 6.2504, "eval_samples_per_second": 222.386, "eval_steps_per_second": 7.04, "step": 2448 }, { "epoch": 64.05, "learning_rate": 1.9736842105263158e-05, "loss": 0.3163, "step": 2450 }, { "epoch": 64.31, "learning_rate": 1.9590643274853802e-05, "loss": 0.3105, "step": 2460 }, { "epoch": 64.58, "learning_rate": 1.9444444444444445e-05, "loss": 0.32, "step": 2470 }, { "epoch": 64.84, "learning_rate": 1.929824561403509e-05, "loss": 0.3045, "step": 2480 }, { "epoch": 64.99, "eval_accuracy": 0.08489208633093526, "eval_loss": 14.174620628356934, "eval_runtime": 6.2508, "eval_samples_per_second": 222.372, "eval_steps_per_second": 7.039, "step": 2486 }, { "epoch": 65.1, "learning_rate": 1.9152046783625733e-05, "loss": 0.3044, "step": 2490 }, { "epoch": 65.36, "learning_rate": 1.9005847953216373e-05, "loss": 0.3328, "step": 2500 }, { "epoch": 65.62, "learning_rate": 1.885964912280702e-05, "loss": 0.2961, "step": 2510 }, { "epoch": 65.88, "learning_rate": 1.871345029239766e-05, "loss": 0.2991, "step": 2520 }, { "epoch": 65.99, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.319219589233398, "eval_runtime": 6.2401, "eval_samples_per_second": 222.753, "eval_steps_per_second": 7.051, "step": 2524 }, { "epoch": 66.14, "learning_rate": 1.8567251461988304e-05, "loss": 0.3191, "step": 2530 }, { "epoch": 66.41, "learning_rate": 1.8421052631578947e-05, "loss": 0.3111, "step": 2540 }, { "epoch": 66.67, "learning_rate": 1.827485380116959e-05, "loss": 0.3201, "step": 2550 }, { "epoch": 66.93, "learning_rate": 1.8128654970760235e-05, "loss": 0.3228, "step": 2560 }, { "epoch": 66.98, "eval_accuracy": 0.07841726618705036, "eval_loss": 14.178192138671875, "eval_runtime": 6.2603, "eval_samples_per_second": 222.035, "eval_steps_per_second": 7.028, "step": 2562 }, { "epoch": 67.19, "learning_rate": 1.7982456140350878e-05, "loss": 0.2959, "step": 2570 }, { "epoch": 67.45, "learning_rate": 1.7836257309941522e-05, "loss": 0.2927, "step": 2580 }, { "epoch": 67.71, "learning_rate": 1.7690058479532165e-05, "loss": 0.2991, "step": 2590 }, { "epoch": 67.97, "learning_rate": 1.7543859649122806e-05, "loss": 0.2711, "step": 2600 }, { "epoch": 68.0, "eval_accuracy": 0.08489208633093526, "eval_loss": 14.426069259643555, "eval_runtime": 6.1828, "eval_samples_per_second": 224.816, "eval_steps_per_second": 7.116, "step": 2601 }, { "epoch": 68.24, "learning_rate": 1.7397660818713453e-05, "loss": 0.2786, "step": 2610 }, { "epoch": 68.5, "learning_rate": 1.7251461988304093e-05, "loss": 0.2746, "step": 2620 }, { "epoch": 68.76, "learning_rate": 1.7105263157894737e-05, "loss": 0.2473, "step": 2630 }, { "epoch": 68.99, "eval_accuracy": 0.08273381294964029, "eval_loss": 14.23031997680664, "eval_runtime": 6.0443, "eval_samples_per_second": 229.97, "eval_steps_per_second": 7.28, "step": 2639 }, { "epoch": 69.02, "learning_rate": 1.695906432748538e-05, "loss": 0.3484, "step": 2640 }, { "epoch": 69.28, "learning_rate": 1.6812865497076024e-05, "loss": 0.2993, "step": 2650 }, { "epoch": 69.54, "learning_rate": 1.6666666666666667e-05, "loss": 0.3016, "step": 2660 }, { "epoch": 69.8, "learning_rate": 1.652046783625731e-05, "loss": 0.3287, "step": 2670 }, { "epoch": 69.99, "eval_accuracy": 0.08273381294964029, "eval_loss": 14.274971961975098, "eval_runtime": 5.9713, "eval_samples_per_second": 232.779, "eval_steps_per_second": 7.369, "step": 2677 }, { "epoch": 70.07, "learning_rate": 1.6374269005847955e-05, "loss": 0.2662, "step": 2680 }, { "epoch": 70.33, "learning_rate": 1.62280701754386e-05, "loss": 0.3035, "step": 2690 }, { "epoch": 70.59, "learning_rate": 1.608187134502924e-05, "loss": 0.2923, "step": 2700 }, { "epoch": 70.85, "learning_rate": 1.5935672514619886e-05, "loss": 0.2673, "step": 2710 }, { "epoch": 70.98, "eval_accuracy": 0.08201438848920864, "eval_loss": 14.23031234741211, "eval_runtime": 5.945, "eval_samples_per_second": 233.809, "eval_steps_per_second": 7.401, "step": 2715 }, { "epoch": 71.11, "learning_rate": 1.5789473684210526e-05, "loss": 0.2952, "step": 2720 }, { "epoch": 71.37, "learning_rate": 1.564327485380117e-05, "loss": 0.3177, "step": 2730 }, { "epoch": 71.63, "learning_rate": 1.5497076023391813e-05, "loss": 0.2803, "step": 2740 }, { "epoch": 71.9, "learning_rate": 1.5350877192982457e-05, "loss": 0.2843, "step": 2750 }, { "epoch": 72.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.408571243286133, "eval_runtime": 5.9636, "eval_samples_per_second": 233.082, "eval_steps_per_second": 7.378, "step": 2754 }, { "epoch": 72.16, "learning_rate": 1.5204678362573099e-05, "loss": 0.3007, "step": 2760 }, { "epoch": 72.42, "learning_rate": 1.5058479532163744e-05, "loss": 0.3098, "step": 2770 }, { "epoch": 72.68, "learning_rate": 1.4912280701754386e-05, "loss": 0.272, "step": 2780 }, { "epoch": 72.94, "learning_rate": 1.4766081871345031e-05, "loss": 0.3099, "step": 2790 }, { "epoch": 72.99, "eval_accuracy": 0.08273381294964029, "eval_loss": 14.518421173095703, "eval_runtime": 5.9749, "eval_samples_per_second": 232.641, "eval_steps_per_second": 7.364, "step": 2792 }, { "epoch": 73.2, "learning_rate": 1.4619883040935673e-05, "loss": 0.2839, "step": 2800 }, { "epoch": 73.46, "learning_rate": 1.4473684210526317e-05, "loss": 0.3161, "step": 2810 }, { "epoch": 73.73, "learning_rate": 1.4327485380116959e-05, "loss": 0.3241, "step": 2820 }, { "epoch": 73.99, "learning_rate": 1.4181286549707604e-05, "loss": 0.3102, "step": 2830 }, { "epoch": 73.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.276801109313965, "eval_runtime": 5.8868, "eval_samples_per_second": 236.121, "eval_steps_per_second": 7.474, "step": 2830 }, { "epoch": 74.25, "learning_rate": 1.4035087719298246e-05, "loss": 0.3081, "step": 2840 }, { "epoch": 74.51, "learning_rate": 1.388888888888889e-05, "loss": 0.2935, "step": 2850 }, { "epoch": 74.77, "learning_rate": 1.3742690058479531e-05, "loss": 0.2911, "step": 2860 }, { "epoch": 74.98, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.100983619689941, "eval_runtime": 6.0333, "eval_samples_per_second": 230.387, "eval_steps_per_second": 7.293, "step": 2868 }, { "epoch": 75.03, "learning_rate": 1.3596491228070177e-05, "loss": 0.3481, "step": 2870 }, { "epoch": 75.29, "learning_rate": 1.3450292397660819e-05, "loss": 0.298, "step": 2880 }, { "epoch": 75.56, "learning_rate": 1.3304093567251464e-05, "loss": 0.282, "step": 2890 }, { "epoch": 75.82, "learning_rate": 1.3157894736842106e-05, "loss": 0.2927, "step": 2900 }, { "epoch": 76.0, "eval_accuracy": 0.08129496402877698, "eval_loss": 14.461828231811523, "eval_runtime": 6.3066, "eval_samples_per_second": 220.403, "eval_steps_per_second": 6.977, "step": 2907 }, { "epoch": 76.08, "learning_rate": 1.301169590643275e-05, "loss": 0.2929, "step": 2910 }, { "epoch": 76.34, "learning_rate": 1.2865497076023392e-05, "loss": 0.3012, "step": 2920 }, { "epoch": 76.6, "learning_rate": 1.2719298245614037e-05, "loss": 0.2694, "step": 2930 }, { "epoch": 76.86, "learning_rate": 1.2573099415204679e-05, "loss": 0.2967, "step": 2940 }, { "epoch": 76.99, "eval_accuracy": 0.08201438848920864, "eval_loss": 14.358075141906738, "eval_runtime": 6.119, "eval_samples_per_second": 227.162, "eval_steps_per_second": 7.191, "step": 2945 }, { "epoch": 77.12, "learning_rate": 1.242690058479532e-05, "loss": 0.2748, "step": 2950 }, { "epoch": 77.39, "learning_rate": 1.2280701754385964e-05, "loss": 0.281, "step": 2960 }, { "epoch": 77.65, "learning_rate": 1.2134502923976608e-05, "loss": 0.2897, "step": 2970 }, { "epoch": 77.91, "learning_rate": 1.1988304093567252e-05, "loss": 0.2446, "step": 2980 }, { "epoch": 77.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.456214904785156, "eval_runtime": 6.1336, "eval_samples_per_second": 226.62, "eval_steps_per_second": 7.174, "step": 2983 }, { "epoch": 78.17, "learning_rate": 1.1842105263157895e-05, "loss": 0.3009, "step": 2990 }, { "epoch": 78.43, "learning_rate": 1.1695906432748537e-05, "loss": 0.2901, "step": 3000 }, { "epoch": 78.69, "learning_rate": 1.154970760233918e-05, "loss": 0.2656, "step": 3010 }, { "epoch": 78.95, "learning_rate": 1.1403508771929824e-05, "loss": 0.3035, "step": 3020 }, { "epoch": 78.98, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.268139839172363, "eval_runtime": 6.2555, "eval_samples_per_second": 222.203, "eval_steps_per_second": 7.034, "step": 3021 }, { "epoch": 79.22, "learning_rate": 1.1257309941520468e-05, "loss": 0.2897, "step": 3030 }, { "epoch": 79.48, "learning_rate": 1.1111111111111112e-05, "loss": 0.3152, "step": 3040 }, { "epoch": 79.74, "learning_rate": 1.0964912280701754e-05, "loss": 0.3034, "step": 3050 }, { "epoch": 80.0, "learning_rate": 1.0818713450292397e-05, "loss": 0.2989, "step": 3060 }, { "epoch": 80.0, "eval_accuracy": 0.08273381294964029, "eval_loss": 14.276779174804688, "eval_runtime": 6.208, "eval_samples_per_second": 223.906, "eval_steps_per_second": 7.088, "step": 3060 }, { "epoch": 80.26, "learning_rate": 1.067251461988304e-05, "loss": 0.3268, "step": 3070 }, { "epoch": 80.52, "learning_rate": 1.0526315789473684e-05, "loss": 0.2827, "step": 3080 }, { "epoch": 80.78, "learning_rate": 1.0380116959064328e-05, "loss": 0.2486, "step": 3090 }, { "epoch": 80.99, "eval_accuracy": 0.08201438848920864, "eval_loss": 14.424173355102539, "eval_runtime": 6.2731, "eval_samples_per_second": 221.581, "eval_steps_per_second": 7.014, "step": 3098 }, { "epoch": 81.05, "learning_rate": 1.023391812865497e-05, "loss": 0.2711, "step": 3100 }, { "epoch": 81.31, "learning_rate": 1.0087719298245614e-05, "loss": 0.2918, "step": 3110 }, { "epoch": 81.57, "learning_rate": 9.941520467836257e-06, "loss": 0.3038, "step": 3120 }, { "epoch": 81.83, "learning_rate": 9.795321637426901e-06, "loss": 0.2622, "step": 3130 }, { "epoch": 81.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.381049156188965, "eval_runtime": 6.1831, "eval_samples_per_second": 224.805, "eval_steps_per_second": 7.116, "step": 3136 }, { "epoch": 82.09, "learning_rate": 9.649122807017545e-06, "loss": 0.295, "step": 3140 }, { "epoch": 82.35, "learning_rate": 9.502923976608186e-06, "loss": 0.2911, "step": 3150 }, { "epoch": 82.61, "learning_rate": 9.35672514619883e-06, "loss": 0.2637, "step": 3160 }, { "epoch": 82.88, "learning_rate": 9.210526315789474e-06, "loss": 0.2892, "step": 3170 }, { "epoch": 82.98, "eval_accuracy": 0.08273381294964029, "eval_loss": 14.46367073059082, "eval_runtime": 5.9176, "eval_samples_per_second": 234.894, "eval_steps_per_second": 7.435, "step": 3174 }, { "epoch": 83.14, "learning_rate": 9.064327485380117e-06, "loss": 0.3097, "step": 3180 }, { "epoch": 83.4, "learning_rate": 8.918128654970761e-06, "loss": 0.2677, "step": 3190 }, { "epoch": 83.66, "learning_rate": 8.771929824561403e-06, "loss": 0.2416, "step": 3200 }, { "epoch": 83.92, "learning_rate": 8.625730994152046e-06, "loss": 0.2668, "step": 3210 }, { "epoch": 84.0, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.459735870361328, "eval_runtime": 5.9202, "eval_samples_per_second": 234.79, "eval_steps_per_second": 7.432, "step": 3213 }, { "epoch": 84.18, "learning_rate": 8.47953216374269e-06, "loss": 0.286, "step": 3220 }, { "epoch": 84.44, "learning_rate": 8.333333333333334e-06, "loss": 0.2743, "step": 3230 }, { "epoch": 84.71, "learning_rate": 8.187134502923977e-06, "loss": 0.2709, "step": 3240 }, { "epoch": 84.97, "learning_rate": 8.04093567251462e-06, "loss": 0.2527, "step": 3250 }, { "epoch": 84.99, "eval_accuracy": 0.08201438848920864, "eval_loss": 14.30977725982666, "eval_runtime": 5.9216, "eval_samples_per_second": 234.734, "eval_steps_per_second": 7.43, "step": 3251 }, { "epoch": 85.23, "learning_rate": 7.894736842105263e-06, "loss": 0.3011, "step": 3260 }, { "epoch": 85.49, "learning_rate": 7.748538011695907e-06, "loss": 0.2647, "step": 3270 }, { "epoch": 85.75, "learning_rate": 7.602339181286549e-06, "loss": 0.2636, "step": 3280 }, { "epoch": 85.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.37409782409668, "eval_runtime": 5.948, "eval_samples_per_second": 233.693, "eval_steps_per_second": 7.397, "step": 3289 }, { "epoch": 86.01, "learning_rate": 7.456140350877193e-06, "loss": 0.2907, "step": 3290 }, { "epoch": 86.27, "learning_rate": 7.3099415204678366e-06, "loss": 0.2792, "step": 3300 }, { "epoch": 86.54, "learning_rate": 7.163742690058479e-06, "loss": 0.3177, "step": 3310 }, { "epoch": 86.8, "learning_rate": 7.017543859649123e-06, "loss": 0.247, "step": 3320 }, { "epoch": 86.98, "eval_accuracy": 0.0841726618705036, "eval_loss": 14.536933898925781, "eval_runtime": 5.8678, "eval_samples_per_second": 236.885, "eval_steps_per_second": 7.499, "step": 3327 }, { "epoch": 87.06, "learning_rate": 6.871345029239766e-06, "loss": 0.2813, "step": 3330 }, { "epoch": 87.32, "learning_rate": 6.725146198830409e-06, "loss": 0.2539, "step": 3340 }, { "epoch": 87.58, "learning_rate": 6.578947368421053e-06, "loss": 0.2976, "step": 3350 }, { "epoch": 87.84, "learning_rate": 6.432748538011696e-06, "loss": 0.2693, "step": 3360 }, { "epoch": 88.0, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.403931617736816, "eval_runtime": 5.9739, "eval_samples_per_second": 232.68, "eval_steps_per_second": 7.365, "step": 3366 }, { "epoch": 88.1, "learning_rate": 6.286549707602339e-06, "loss": 0.3025, "step": 3370 }, { "epoch": 88.37, "learning_rate": 6.140350877192982e-06, "loss": 0.2483, "step": 3380 }, { "epoch": 88.63, "learning_rate": 5.994152046783626e-06, "loss": 0.2826, "step": 3390 }, { "epoch": 88.89, "learning_rate": 5.8479532163742686e-06, "loss": 0.2692, "step": 3400 }, { "epoch": 88.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.616063117980957, "eval_runtime": 5.9896, "eval_samples_per_second": 232.068, "eval_steps_per_second": 7.346, "step": 3404 }, { "epoch": 89.15, "learning_rate": 5.701754385964912e-06, "loss": 0.294, "step": 3410 }, { "epoch": 89.41, "learning_rate": 5.555555555555556e-06, "loss": 0.3021, "step": 3420 }, { "epoch": 89.67, "learning_rate": 5.409356725146199e-06, "loss": 0.2924, "step": 3430 }, { "epoch": 89.93, "learning_rate": 5.263157894736842e-06, "loss": 0.28, "step": 3440 }, { "epoch": 89.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.524378776550293, "eval_runtime": 5.9099, "eval_samples_per_second": 235.197, "eval_steps_per_second": 7.445, "step": 3442 }, { "epoch": 90.2, "learning_rate": 5.116959064327485e-06, "loss": 0.3174, "step": 3450 }, { "epoch": 90.46, "learning_rate": 4.970760233918129e-06, "loss": 0.2556, "step": 3460 }, { "epoch": 90.72, "learning_rate": 4.824561403508772e-06, "loss": 0.2672, "step": 3470 }, { "epoch": 90.98, "learning_rate": 4.678362573099415e-06, "loss": 0.2535, "step": 3480 }, { "epoch": 90.98, "eval_accuracy": 0.0841726618705036, "eval_loss": 14.40617847442627, "eval_runtime": 5.9735, "eval_samples_per_second": 232.695, "eval_steps_per_second": 7.366, "step": 3480 }, { "epoch": 91.24, "learning_rate": 4.532163742690059e-06, "loss": 0.3035, "step": 3490 }, { "epoch": 91.5, "learning_rate": 4.3859649122807014e-06, "loss": 0.2757, "step": 3500 }, { "epoch": 91.76, "learning_rate": 4.239766081871345e-06, "loss": 0.2887, "step": 3510 }, { "epoch": 92.0, "eval_accuracy": 0.08057553956834532, "eval_loss": 14.41130256652832, "eval_runtime": 5.9481, "eval_samples_per_second": 233.689, "eval_steps_per_second": 7.397, "step": 3519 }, { "epoch": 92.03, "learning_rate": 4.093567251461989e-06, "loss": 0.2733, "step": 3520 }, { "epoch": 92.29, "learning_rate": 3.9473684210526315e-06, "loss": 0.2809, "step": 3530 }, { "epoch": 92.55, "learning_rate": 3.8011695906432747e-06, "loss": 0.2788, "step": 3540 }, { "epoch": 92.81, "learning_rate": 3.6549707602339183e-06, "loss": 0.257, "step": 3550 }, { "epoch": 92.99, "eval_accuracy": 0.0841726618705036, "eval_loss": 14.344242095947266, "eval_runtime": 5.9071, "eval_samples_per_second": 235.31, "eval_steps_per_second": 7.449, "step": 3557 }, { "epoch": 93.07, "learning_rate": 3.5087719298245615e-06, "loss": 0.3108, "step": 3560 }, { "epoch": 93.33, "learning_rate": 3.3625730994152047e-06, "loss": 0.2408, "step": 3570 }, { "epoch": 93.59, "learning_rate": 3.216374269005848e-06, "loss": 0.2597, "step": 3580 }, { "epoch": 93.86, "learning_rate": 3.070175438596491e-06, "loss": 0.2627, "step": 3590 }, { "epoch": 93.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.469304084777832, "eval_runtime": 6.0553, "eval_samples_per_second": 229.553, "eval_steps_per_second": 7.266, "step": 3595 }, { "epoch": 94.12, "learning_rate": 2.9239766081871343e-06, "loss": 0.2628, "step": 3600 }, { "epoch": 94.38, "learning_rate": 2.777777777777778e-06, "loss": 0.283, "step": 3610 }, { "epoch": 94.64, "learning_rate": 2.631578947368421e-06, "loss": 0.27, "step": 3620 }, { "epoch": 94.9, "learning_rate": 2.4853801169590643e-06, "loss": 0.2804, "step": 3630 }, { "epoch": 94.98, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.322315216064453, "eval_runtime": 6.0881, "eval_samples_per_second": 228.315, "eval_steps_per_second": 7.227, "step": 3633 }, { "epoch": 95.16, "learning_rate": 2.3391812865497075e-06, "loss": 0.3117, "step": 3640 }, { "epoch": 95.42, "learning_rate": 2.1929824561403507e-06, "loss": 0.2948, "step": 3650 }, { "epoch": 95.69, "learning_rate": 2.0467836257309943e-06, "loss": 0.2926, "step": 3660 }, { "epoch": 95.95, "learning_rate": 1.9005847953216373e-06, "loss": 0.2529, "step": 3670 }, { "epoch": 96.0, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.384366989135742, "eval_runtime": 6.1884, "eval_samples_per_second": 224.614, "eval_steps_per_second": 7.11, "step": 3672 }, { "epoch": 96.21, "learning_rate": 1.7543859649122807e-06, "loss": 0.2694, "step": 3680 }, { "epoch": 96.47, "learning_rate": 1.608187134502924e-06, "loss": 0.2869, "step": 3690 }, { "epoch": 96.73, "learning_rate": 1.4619883040935671e-06, "loss": 0.2634, "step": 3700 }, { "epoch": 96.99, "learning_rate": 1.3157894736842106e-06, "loss": 0.2327, "step": 3710 }, { "epoch": 96.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.428401947021484, "eval_runtime": 6.2696, "eval_samples_per_second": 221.706, "eval_steps_per_second": 7.018, "step": 3710 }, { "epoch": 97.25, "learning_rate": 1.1695906432748538e-06, "loss": 0.2786, "step": 3720 }, { "epoch": 97.52, "learning_rate": 1.0233918128654972e-06, "loss": 0.2893, "step": 3730 }, { "epoch": 97.78, "learning_rate": 8.771929824561404e-07, "loss": 0.2643, "step": 3740 }, { "epoch": 97.99, "eval_accuracy": 0.08345323741007195, "eval_loss": 14.556747436523438, "eval_runtime": 6.288, "eval_samples_per_second": 221.057, "eval_steps_per_second": 6.997, "step": 3748 }, { "epoch": 98.04, "learning_rate": 7.309941520467836e-07, "loss": 0.2726, "step": 3750 }, { "epoch": 98.3, "learning_rate": 5.847953216374269e-07, "loss": 0.2169, "step": 3760 }, { "epoch": 98.56, "learning_rate": 4.385964912280702e-07, "loss": 0.2801, "step": 3770 }, { "epoch": 98.82, "learning_rate": 2.9239766081871344e-07, "loss": 0.284, "step": 3780 }, { "epoch": 98.98, "eval_accuracy": 0.08129496402877698, "eval_loss": 14.673846244812012, "eval_runtime": 6.2773, "eval_samples_per_second": 221.432, "eval_steps_per_second": 7.009, "step": 3786 }, { "epoch": 99.08, "learning_rate": 1.4619883040935672e-07, "loss": 0.2767, "step": 3790 }, { "epoch": 99.35, "learning_rate": 0.0, "loss": 0.2503, "step": 3800 }, { "epoch": 99.35, "eval_accuracy": 0.0841726618705036, "eval_loss": 14.536262512207031, "eval_runtime": 6.1763, "eval_samples_per_second": 225.053, "eval_steps_per_second": 7.124, "step": 3800 }, { "epoch": 99.35, "step": 3800, "total_flos": 1.0404624773781246e+19, "train_loss": 0.44781778210087825, "train_runtime": 6473.2407, "train_samples_per_second": 75.217, "train_steps_per_second": 0.587 } ], "logging_steps": 10, "max_steps": 3800, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 1.0404624773781246e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }