{ "best_metric": 0.04137137532234192, "best_model_checkpoint": "./vit-base-beans/checkpoint-1480", "epoch": 20.0, "global_step": 1840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 9.945652173913043e-05, "loss": 3.3812, "step": 10 }, { "epoch": 0.22, "learning_rate": 9.891304347826087e-05, "loss": 3.3288, "step": 20 }, { "epoch": 0.33, "learning_rate": 9.836956521739132e-05, "loss": 3.3101, "step": 30 }, { "epoch": 0.43, "learning_rate": 9.782608695652174e-05, "loss": 3.2579, "step": 40 }, { "epoch": 0.43, "eval_accuracy": 0.17882919005613473, "eval_loss": 3.1847527027130127, "eval_runtime": 13.4342, "eval_samples_per_second": 92.823, "eval_steps_per_second": 11.612, "step": 40 }, { "epoch": 0.54, "learning_rate": 9.728260869565217e-05, "loss": 3.1255, "step": 50 }, { "epoch": 0.65, "learning_rate": 9.673913043478261e-05, "loss": 3.0162, "step": 60 }, { "epoch": 0.76, "learning_rate": 9.619565217391306e-05, "loss": 2.8706, "step": 70 }, { "epoch": 0.87, "learning_rate": 9.565217391304348e-05, "loss": 2.7157, "step": 80 }, { "epoch": 0.87, "eval_accuracy": 0.43785084202085006, "eval_loss": 2.5922651290893555, "eval_runtime": 13.9416, "eval_samples_per_second": 89.445, "eval_steps_per_second": 11.19, "step": 80 }, { "epoch": 0.98, "learning_rate": 9.510869565217391e-05, "loss": 2.5322, "step": 90 }, { "epoch": 1.09, "learning_rate": 9.456521739130435e-05, "loss": 2.3504, "step": 100 }, { "epoch": 1.2, "learning_rate": 9.402173913043478e-05, "loss": 2.1887, "step": 110 }, { "epoch": 1.3, "learning_rate": 9.347826086956522e-05, "loss": 2.0664, "step": 120 }, { "epoch": 1.3, "eval_accuracy": 0.6696070569366479, "eval_loss": 1.9748882055282593, "eval_runtime": 13.4691, "eval_samples_per_second": 92.582, "eval_steps_per_second": 11.582, "step": 120 }, { "epoch": 1.41, "learning_rate": 9.293478260869566e-05, "loss": 1.8705, "step": 130 }, { "epoch": 1.52, "learning_rate": 9.239130434782609e-05, "loss": 1.688, "step": 140 }, { "epoch": 1.63, "learning_rate": 9.184782608695652e-05, "loss": 1.5939, "step": 150 }, { "epoch": 1.74, "learning_rate": 9.130434782608696e-05, "loss": 1.4765, "step": 160 }, { "epoch": 1.74, "eval_accuracy": 0.917401764234162, "eval_loss": 1.3413872718811035, "eval_runtime": 13.9419, "eval_samples_per_second": 89.442, "eval_steps_per_second": 11.189, "step": 160 }, { "epoch": 1.85, "learning_rate": 9.07608695652174e-05, "loss": 1.3014, "step": 170 }, { "epoch": 1.96, "learning_rate": 9.021739130434783e-05, "loss": 1.201, "step": 180 }, { "epoch": 2.07, "learning_rate": 8.967391304347826e-05, "loss": 1.0165, "step": 190 }, { "epoch": 2.17, "learning_rate": 8.91304347826087e-05, "loss": 0.965, "step": 200 }, { "epoch": 2.17, "eval_accuracy": 0.9615076182838813, "eval_loss": 0.9264132380485535, "eval_runtime": 13.1542, "eval_samples_per_second": 94.798, "eval_steps_per_second": 11.859, "step": 200 }, { "epoch": 2.28, "learning_rate": 8.858695652173914e-05, "loss": 0.9096, "step": 210 }, { "epoch": 2.39, "learning_rate": 8.804347826086957e-05, "loss": 0.8527, "step": 220 }, { "epoch": 2.5, "learning_rate": 8.75e-05, "loss": 0.759, "step": 230 }, { "epoch": 2.61, "learning_rate": 8.695652173913044e-05, "loss": 0.7163, "step": 240 }, { "epoch": 2.61, "eval_accuracy": 0.9647153167602245, "eval_loss": 0.6652109622955322, "eval_runtime": 14.0687, "eval_samples_per_second": 88.637, "eval_steps_per_second": 11.088, "step": 240 }, { "epoch": 2.72, "learning_rate": 8.641304347826087e-05, "loss": 0.6403, "step": 250 }, { "epoch": 2.83, "learning_rate": 8.586956521739131e-05, "loss": 0.5857, "step": 260 }, { "epoch": 2.93, "learning_rate": 8.532608695652174e-05, "loss": 0.5406, "step": 270 }, { "epoch": 3.04, "learning_rate": 8.478260869565218e-05, "loss": 0.5061, "step": 280 }, { "epoch": 3.04, "eval_accuracy": 0.9687249398556536, "eval_loss": 0.5080122947692871, "eval_runtime": 13.0785, "eval_samples_per_second": 95.347, "eval_steps_per_second": 11.928, "step": 280 }, { "epoch": 3.15, "learning_rate": 8.423913043478261e-05, "loss": 0.4622, "step": 290 }, { "epoch": 3.26, "learning_rate": 8.369565217391305e-05, "loss": 0.4919, "step": 300 }, { "epoch": 3.37, "learning_rate": 8.315217391304349e-05, "loss": 0.371, "step": 310 }, { "epoch": 3.48, "learning_rate": 8.260869565217392e-05, "loss": 0.3883, "step": 320 }, { "epoch": 3.48, "eval_accuracy": 0.9759422614274258, "eval_loss": 0.3574630916118622, "eval_runtime": 13.9479, "eval_samples_per_second": 89.404, "eval_steps_per_second": 11.184, "step": 320 }, { "epoch": 3.59, "learning_rate": 8.206521739130435e-05, "loss": 0.3831, "step": 330 }, { "epoch": 3.7, "learning_rate": 8.152173913043478e-05, "loss": 0.3329, "step": 340 }, { "epoch": 3.8, "learning_rate": 8.097826086956523e-05, "loss": 0.3383, "step": 350 }, { "epoch": 3.91, "learning_rate": 8.043478260869566e-05, "loss": 0.3328, "step": 360 }, { "epoch": 3.91, "eval_accuracy": 0.9839615076182838, "eval_loss": 0.27629122138023376, "eval_runtime": 13.7308, "eval_samples_per_second": 90.818, "eval_steps_per_second": 11.361, "step": 360 }, { "epoch": 4.02, "learning_rate": 7.989130434782609e-05, "loss": 0.2387, "step": 370 }, { "epoch": 4.13, "learning_rate": 7.934782608695653e-05, "loss": 0.2509, "step": 380 }, { "epoch": 4.24, "learning_rate": 7.880434782608696e-05, "loss": 0.2259, "step": 390 }, { "epoch": 4.35, "learning_rate": 7.82608695652174e-05, "loss": 0.2049, "step": 400 }, { "epoch": 4.35, "eval_accuracy": 0.9855653568564555, "eval_loss": 0.2094665914773941, "eval_runtime": 13.8999, "eval_samples_per_second": 89.713, "eval_steps_per_second": 11.223, "step": 400 }, { "epoch": 4.46, "learning_rate": 7.771739130434783e-05, "loss": 0.1979, "step": 410 }, { "epoch": 4.57, "learning_rate": 7.717391304347827e-05, "loss": 0.1703, "step": 420 }, { "epoch": 4.67, "learning_rate": 7.66304347826087e-05, "loss": 0.1771, "step": 430 }, { "epoch": 4.78, "learning_rate": 7.608695652173914e-05, "loss": 0.2078, "step": 440 }, { "epoch": 4.78, "eval_accuracy": 0.9871692060946271, "eval_loss": 0.19693857431411743, "eval_runtime": 13.7091, "eval_samples_per_second": 90.961, "eval_steps_per_second": 11.379, "step": 440 }, { "epoch": 4.89, "learning_rate": 7.554347826086957e-05, "loss": 0.1564, "step": 450 }, { "epoch": 5.0, "learning_rate": 7.500000000000001e-05, "loss": 0.1512, "step": 460 }, { "epoch": 5.11, "learning_rate": 7.445652173913044e-05, "loss": 0.1339, "step": 470 }, { "epoch": 5.22, "learning_rate": 7.391304347826086e-05, "loss": 0.1447, "step": 480 }, { "epoch": 5.22, "eval_accuracy": 0.9871692060946271, "eval_loss": 0.14835722744464874, "eval_runtime": 13.9008, "eval_samples_per_second": 89.707, "eval_steps_per_second": 11.222, "step": 480 }, { "epoch": 5.33, "learning_rate": 7.336956521739132e-05, "loss": 0.1207, "step": 490 }, { "epoch": 5.43, "learning_rate": 7.282608695652175e-05, "loss": 0.1294, "step": 500 }, { "epoch": 5.54, "learning_rate": 7.228260869565217e-05, "loss": 0.116, "step": 510 }, { "epoch": 5.65, "learning_rate": 7.17391304347826e-05, "loss": 0.1401, "step": 520 }, { "epoch": 5.65, "eval_accuracy": 0.9839615076182838, "eval_loss": 0.14811548590660095, "eval_runtime": 13.5209, "eval_samples_per_second": 92.228, "eval_steps_per_second": 11.538, "step": 520 }, { "epoch": 5.76, "learning_rate": 7.119565217391306e-05, "loss": 0.0904, "step": 530 }, { "epoch": 5.87, "learning_rate": 7.065217391304349e-05, "loss": 0.1099, "step": 540 }, { "epoch": 5.98, "learning_rate": 7.010869565217391e-05, "loss": 0.1599, "step": 550 }, { "epoch": 6.09, "learning_rate": 6.956521739130436e-05, "loss": 0.1232, "step": 560 }, { "epoch": 6.09, "eval_accuracy": 0.991980753809142, "eval_loss": 0.11416751146316528, "eval_runtime": 13.9361, "eval_samples_per_second": 89.48, "eval_steps_per_second": 11.194, "step": 560 }, { "epoch": 6.2, "learning_rate": 6.902173913043478e-05, "loss": 0.1381, "step": 570 }, { "epoch": 6.3, "learning_rate": 6.847826086956522e-05, "loss": 0.1001, "step": 580 }, { "epoch": 6.41, "learning_rate": 6.793478260869565e-05, "loss": 0.0823, "step": 590 }, { "epoch": 6.52, "learning_rate": 6.73913043478261e-05, "loss": 0.0725, "step": 600 }, { "epoch": 6.52, "eval_accuracy": 0.9879711307137129, "eval_loss": 0.10076911747455597, "eval_runtime": 13.8114, "eval_samples_per_second": 90.288, "eval_steps_per_second": 11.295, "step": 600 }, { "epoch": 6.63, "learning_rate": 6.684782608695652e-05, "loss": 0.0852, "step": 610 }, { "epoch": 6.74, "learning_rate": 6.630434782608695e-05, "loss": 0.0723, "step": 620 }, { "epoch": 6.85, "learning_rate": 6.576086956521739e-05, "loss": 0.0881, "step": 630 }, { "epoch": 6.96, "learning_rate": 6.521739130434783e-05, "loss": 0.0934, "step": 640 }, { "epoch": 6.96, "eval_accuracy": 0.9895749799518845, "eval_loss": 0.09398525953292847, "eval_runtime": 14.2025, "eval_samples_per_second": 87.801, "eval_steps_per_second": 10.984, "step": 640 }, { "epoch": 7.07, "learning_rate": 6.467391304347826e-05, "loss": 0.0668, "step": 650 }, { "epoch": 7.17, "learning_rate": 6.413043478260869e-05, "loss": 0.0586, "step": 660 }, { "epoch": 7.28, "learning_rate": 6.358695652173913e-05, "loss": 0.0543, "step": 670 }, { "epoch": 7.39, "learning_rate": 6.304347826086957e-05, "loss": 0.053, "step": 680 }, { "epoch": 7.39, "eval_accuracy": 0.9895749799518845, "eval_loss": 0.08539092540740967, "eval_runtime": 13.9817, "eval_samples_per_second": 89.188, "eval_steps_per_second": 11.157, "step": 680 }, { "epoch": 7.5, "learning_rate": 6.25e-05, "loss": 0.0514, "step": 690 }, { "epoch": 7.61, "learning_rate": 6.195652173913043e-05, "loss": 0.0491, "step": 700 }, { "epoch": 7.72, "learning_rate": 6.141304347826087e-05, "loss": 0.0481, "step": 710 }, { "epoch": 7.83, "learning_rate": 6.086956521739131e-05, "loss": 0.0469, "step": 720 }, { "epoch": 7.83, "eval_accuracy": 0.9903769045709703, "eval_loss": 0.06862174719572067, "eval_runtime": 14.4287, "eval_samples_per_second": 86.425, "eval_steps_per_second": 10.812, "step": 720 }, { "epoch": 7.93, "learning_rate": 6.032608695652174e-05, "loss": 0.0693, "step": 730 }, { "epoch": 8.04, "learning_rate": 5.9782608695652175e-05, "loss": 0.0664, "step": 740 }, { "epoch": 8.15, "learning_rate": 5.923913043478261e-05, "loss": 0.0502, "step": 750 }, { "epoch": 8.26, "learning_rate": 5.869565217391305e-05, "loss": 0.0429, "step": 760 }, { "epoch": 8.26, "eval_accuracy": 0.9863672814755413, "eval_loss": 0.0824466422200203, "eval_runtime": 13.8977, "eval_samples_per_second": 89.727, "eval_steps_per_second": 11.225, "step": 760 }, { "epoch": 8.37, "learning_rate": 5.815217391304349e-05, "loss": 0.0622, "step": 770 }, { "epoch": 8.48, "learning_rate": 5.7608695652173915e-05, "loss": 0.0394, "step": 780 }, { "epoch": 8.59, "learning_rate": 5.706521739130435e-05, "loss": 0.0375, "step": 790 }, { "epoch": 8.7, "learning_rate": 5.652173913043478e-05, "loss": 0.0371, "step": 800 }, { "epoch": 8.7, "eval_accuracy": 0.991980753809142, "eval_loss": 0.07010400295257568, "eval_runtime": 13.4894, "eval_samples_per_second": 92.443, "eval_steps_per_second": 11.565, "step": 800 }, { "epoch": 8.8, "learning_rate": 5.5978260869565226e-05, "loss": 0.036, "step": 810 }, { "epoch": 8.91, "learning_rate": 5.5434782608695654e-05, "loss": 0.0352, "step": 820 }, { "epoch": 9.02, "learning_rate": 5.489130434782609e-05, "loss": 0.0344, "step": 830 }, { "epoch": 9.13, "learning_rate": 5.4347826086956524e-05, "loss": 0.033, "step": 840 }, { "epoch": 9.13, "eval_accuracy": 0.991980753809142, "eval_loss": 0.06847481429576874, "eval_runtime": 13.9465, "eval_samples_per_second": 89.413, "eval_steps_per_second": 11.186, "step": 840 }, { "epoch": 9.24, "learning_rate": 5.380434782608695e-05, "loss": 0.0327, "step": 850 }, { "epoch": 9.35, "learning_rate": 5.32608695652174e-05, "loss": 0.0318, "step": 860 }, { "epoch": 9.46, "learning_rate": 5.271739130434783e-05, "loss": 0.0315, "step": 870 }, { "epoch": 9.57, "learning_rate": 5.217391304347826e-05, "loss": 0.0308, "step": 880 }, { "epoch": 9.57, "eval_accuracy": 0.991980753809142, "eval_loss": 0.06314855068922043, "eval_runtime": 13.4895, "eval_samples_per_second": 92.442, "eval_steps_per_second": 11.565, "step": 880 }, { "epoch": 9.67, "learning_rate": 5.163043478260869e-05, "loss": 0.0502, "step": 890 }, { "epoch": 9.78, "learning_rate": 5.108695652173914e-05, "loss": 0.03, "step": 900 }, { "epoch": 9.89, "learning_rate": 5.054347826086957e-05, "loss": 0.0294, "step": 910 }, { "epoch": 10.0, "learning_rate": 5e-05, "loss": 0.0398, "step": 920 }, { "epoch": 10.0, "eval_accuracy": 0.9927826784282278, "eval_loss": 0.05900084227323532, "eval_runtime": 14.0073, "eval_samples_per_second": 89.025, "eval_steps_per_second": 11.137, "step": 920 }, { "epoch": 10.11, "learning_rate": 4.945652173913044e-05, "loss": 0.03, "step": 930 }, { "epoch": 10.22, "learning_rate": 4.891304347826087e-05, "loss": 0.029, "step": 940 }, { "epoch": 10.33, "learning_rate": 4.836956521739131e-05, "loss": 0.0273, "step": 950 }, { "epoch": 10.43, "learning_rate": 4.782608695652174e-05, "loss": 0.0453, "step": 960 }, { "epoch": 10.43, "eval_accuracy": 0.9895749799518845, "eval_loss": 0.062146905809640884, "eval_runtime": 14.1053, "eval_samples_per_second": 88.406, "eval_steps_per_second": 11.06, "step": 960 }, { "epoch": 10.54, "learning_rate": 4.7282608695652177e-05, "loss": 0.0415, "step": 970 }, { "epoch": 10.65, "learning_rate": 4.673913043478261e-05, "loss": 0.0268, "step": 980 }, { "epoch": 10.76, "learning_rate": 4.6195652173913046e-05, "loss": 0.0282, "step": 990 }, { "epoch": 10.87, "learning_rate": 4.565217391304348e-05, "loss": 0.026, "step": 1000 }, { "epoch": 10.87, "eval_accuracy": 0.9855653568564555, "eval_loss": 0.0649920180439949, "eval_runtime": 13.8769, "eval_samples_per_second": 89.861, "eval_steps_per_second": 11.242, "step": 1000 }, { "epoch": 10.98, "learning_rate": 4.5108695652173916e-05, "loss": 0.0255, "step": 1010 }, { "epoch": 11.09, "learning_rate": 4.456521739130435e-05, "loss": 0.0246, "step": 1020 }, { "epoch": 11.2, "learning_rate": 4.4021739130434786e-05, "loss": 0.0264, "step": 1030 }, { "epoch": 11.3, "learning_rate": 4.347826086956522e-05, "loss": 0.0257, "step": 1040 }, { "epoch": 11.3, "eval_accuracy": 0.9927826784282278, "eval_loss": 0.04654848575592041, "eval_runtime": 13.6877, "eval_samples_per_second": 91.103, "eval_steps_per_second": 11.397, "step": 1040 }, { "epoch": 11.41, "learning_rate": 4.2934782608695655e-05, "loss": 0.0237, "step": 1050 }, { "epoch": 11.52, "learning_rate": 4.239130434782609e-05, "loss": 0.0233, "step": 1060 }, { "epoch": 11.63, "learning_rate": 4.1847826086956525e-05, "loss": 0.0231, "step": 1070 }, { "epoch": 11.74, "learning_rate": 4.130434782608696e-05, "loss": 0.041, "step": 1080 }, { "epoch": 11.74, "eval_accuracy": 0.9927826784282278, "eval_loss": 0.04421408474445343, "eval_runtime": 14.1229, "eval_samples_per_second": 88.296, "eval_steps_per_second": 11.046, "step": 1080 }, { "epoch": 11.85, "learning_rate": 4.076086956521739e-05, "loss": 0.0234, "step": 1090 }, { "epoch": 11.96, "learning_rate": 4.021739130434783e-05, "loss": 0.0221, "step": 1100 }, { "epoch": 12.07, "learning_rate": 3.9673913043478264e-05, "loss": 0.0251, "step": 1110 }, { "epoch": 12.17, "learning_rate": 3.91304347826087e-05, "loss": 0.0223, "step": 1120 }, { "epoch": 12.17, "eval_accuracy": 0.9863672814755413, "eval_loss": 0.06379802525043488, "eval_runtime": 13.3726, "eval_samples_per_second": 93.25, "eval_steps_per_second": 11.666, "step": 1120 }, { "epoch": 12.28, "learning_rate": 3.8586956521739134e-05, "loss": 0.0222, "step": 1130 }, { "epoch": 12.39, "learning_rate": 3.804347826086957e-05, "loss": 0.0208, "step": 1140 }, { "epoch": 12.5, "learning_rate": 3.7500000000000003e-05, "loss": 0.0207, "step": 1150 }, { "epoch": 12.61, "learning_rate": 3.695652173913043e-05, "loss": 0.0205, "step": 1160 }, { "epoch": 12.61, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.050300538539886475, "eval_runtime": 14.0669, "eval_samples_per_second": 88.648, "eval_steps_per_second": 11.09, "step": 1160 }, { "epoch": 12.72, "learning_rate": 3.641304347826087e-05, "loss": 0.0331, "step": 1170 }, { "epoch": 12.83, "learning_rate": 3.58695652173913e-05, "loss": 0.021, "step": 1180 }, { "epoch": 12.93, "learning_rate": 3.532608695652174e-05, "loss": 0.0203, "step": 1190 }, { "epoch": 13.04, "learning_rate": 3.478260869565218e-05, "loss": 0.0221, "step": 1200 }, { "epoch": 13.04, "eval_accuracy": 0.991980753809142, "eval_loss": 0.047799013555049896, "eval_runtime": 13.3712, "eval_samples_per_second": 93.26, "eval_steps_per_second": 11.667, "step": 1200 }, { "epoch": 13.15, "learning_rate": 3.423913043478261e-05, "loss": 0.0191, "step": 1210 }, { "epoch": 13.26, "learning_rate": 3.369565217391305e-05, "loss": 0.0195, "step": 1220 }, { "epoch": 13.37, "learning_rate": 3.3152173913043475e-05, "loss": 0.0188, "step": 1230 }, { "epoch": 13.48, "learning_rate": 3.260869565217392e-05, "loss": 0.0188, "step": 1240 }, { "epoch": 13.48, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.04699365794658661, "eval_runtime": 13.8942, "eval_samples_per_second": 89.75, "eval_steps_per_second": 11.228, "step": 1240 }, { "epoch": 13.59, "learning_rate": 3.2065217391304345e-05, "loss": 0.019, "step": 1250 }, { "epoch": 13.7, "learning_rate": 3.152173913043479e-05, "loss": 0.0184, "step": 1260 }, { "epoch": 13.8, "learning_rate": 3.0978260869565215e-05, "loss": 0.0179, "step": 1270 }, { "epoch": 13.91, "learning_rate": 3.0434782608695656e-05, "loss": 0.0302, "step": 1280 }, { "epoch": 13.91, "eval_accuracy": 0.9927826784282278, "eval_loss": 0.04419828951358795, "eval_runtime": 13.9931, "eval_samples_per_second": 89.115, "eval_steps_per_second": 11.148, "step": 1280 }, { "epoch": 14.02, "learning_rate": 2.9891304347826088e-05, "loss": 0.0182, "step": 1290 }, { "epoch": 14.13, "learning_rate": 2.9347826086956526e-05, "loss": 0.0216, "step": 1300 }, { "epoch": 14.24, "learning_rate": 2.8804347826086957e-05, "loss": 0.0174, "step": 1310 }, { "epoch": 14.35, "learning_rate": 2.826086956521739e-05, "loss": 0.0171, "step": 1320 }, { "epoch": 14.35, "eval_accuracy": 0.9935846030473136, "eval_loss": 0.04177280142903328, "eval_runtime": 13.9993, "eval_samples_per_second": 89.076, "eval_steps_per_second": 11.143, "step": 1320 }, { "epoch": 14.46, "learning_rate": 2.7717391304347827e-05, "loss": 0.0172, "step": 1330 }, { "epoch": 14.57, "learning_rate": 2.7173913043478262e-05, "loss": 0.0173, "step": 1340 }, { "epoch": 14.67, "learning_rate": 2.66304347826087e-05, "loss": 0.0259, "step": 1350 }, { "epoch": 14.78, "learning_rate": 2.608695652173913e-05, "loss": 0.0197, "step": 1360 }, { "epoch": 14.78, "eval_accuracy": 0.991980753809142, "eval_loss": 0.04225374758243561, "eval_runtime": 14.4748, "eval_samples_per_second": 86.15, "eval_steps_per_second": 10.777, "step": 1360 }, { "epoch": 14.89, "learning_rate": 2.554347826086957e-05, "loss": 0.0166, "step": 1370 }, { "epoch": 15.0, "learning_rate": 2.5e-05, "loss": 0.0163, "step": 1380 }, { "epoch": 15.11, "learning_rate": 2.4456521739130436e-05, "loss": 0.0164, "step": 1390 }, { "epoch": 15.22, "learning_rate": 2.391304347826087e-05, "loss": 0.0162, "step": 1400 }, { "epoch": 15.22, "eval_accuracy": 0.9927826784282278, "eval_loss": 0.04216426983475685, "eval_runtime": 14.0671, "eval_samples_per_second": 88.646, "eval_steps_per_second": 11.09, "step": 1400 }, { "epoch": 15.33, "learning_rate": 2.3369565217391306e-05, "loss": 0.0172, "step": 1410 }, { "epoch": 15.43, "learning_rate": 2.282608695652174e-05, "loss": 0.016, "step": 1420 }, { "epoch": 15.54, "learning_rate": 2.2282608695652175e-05, "loss": 0.0158, "step": 1430 }, { "epoch": 15.65, "learning_rate": 2.173913043478261e-05, "loss": 0.0159, "step": 1440 }, { "epoch": 15.65, "eval_accuracy": 0.991980753809142, "eval_loss": 0.043235816061496735, "eval_runtime": 13.435, "eval_samples_per_second": 92.817, "eval_steps_per_second": 11.611, "step": 1440 }, { "epoch": 15.76, "learning_rate": 2.1195652173913045e-05, "loss": 0.0158, "step": 1450 }, { "epoch": 15.87, "learning_rate": 2.065217391304348e-05, "loss": 0.0252, "step": 1460 }, { "epoch": 15.98, "learning_rate": 2.0108695652173915e-05, "loss": 0.0156, "step": 1470 }, { "epoch": 16.09, "learning_rate": 1.956521739130435e-05, "loss": 0.0155, "step": 1480 }, { "epoch": 16.09, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.04137137532234192, "eval_runtime": 13.9815, "eval_samples_per_second": 89.189, "eval_steps_per_second": 11.158, "step": 1480 }, { "epoch": 16.2, "learning_rate": 1.9021739130434784e-05, "loss": 0.0175, "step": 1490 }, { "epoch": 16.3, "learning_rate": 1.8478260869565216e-05, "loss": 0.0155, "step": 1500 }, { "epoch": 16.41, "learning_rate": 1.793478260869565e-05, "loss": 0.0258, "step": 1510 }, { "epoch": 16.52, "learning_rate": 1.739130434782609e-05, "loss": 0.015, "step": 1520 }, { "epoch": 16.52, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.0487416572868824, "eval_runtime": 13.4779, "eval_samples_per_second": 92.522, "eval_steps_per_second": 11.575, "step": 1520 }, { "epoch": 16.63, "learning_rate": 1.6847826086956524e-05, "loss": 0.0152, "step": 1530 }, { "epoch": 16.74, "learning_rate": 1.630434782608696e-05, "loss": 0.0174, "step": 1540 }, { "epoch": 16.85, "learning_rate": 1.5760869565217393e-05, "loss": 0.0147, "step": 1550 }, { "epoch": 16.96, "learning_rate": 1.5217391304347828e-05, "loss": 0.015, "step": 1560 }, { "epoch": 16.96, "eval_accuracy": 0.991980753809142, "eval_loss": 0.04399973526597023, "eval_runtime": 14.0057, "eval_samples_per_second": 89.035, "eval_steps_per_second": 11.138, "step": 1560 }, { "epoch": 17.07, "learning_rate": 1.4673913043478263e-05, "loss": 0.0148, "step": 1570 }, { "epoch": 17.17, "learning_rate": 1.4130434782608694e-05, "loss": 0.0147, "step": 1580 }, { "epoch": 17.28, "learning_rate": 1.3586956521739131e-05, "loss": 0.0148, "step": 1590 }, { "epoch": 17.39, "learning_rate": 1.3043478260869566e-05, "loss": 0.0146, "step": 1600 }, { "epoch": 17.39, "eval_accuracy": 0.991980753809142, "eval_loss": 0.04343697056174278, "eval_runtime": 14.4989, "eval_samples_per_second": 86.007, "eval_steps_per_second": 10.759, "step": 1600 }, { "epoch": 17.5, "learning_rate": 1.25e-05, "loss": 0.0145, "step": 1610 }, { "epoch": 17.61, "learning_rate": 1.1956521739130435e-05, "loss": 0.0144, "step": 1620 }, { "epoch": 17.72, "learning_rate": 1.141304347826087e-05, "loss": 0.0149, "step": 1630 }, { "epoch": 17.83, "learning_rate": 1.0869565217391305e-05, "loss": 0.0143, "step": 1640 }, { "epoch": 17.83, "eval_accuracy": 0.991980753809142, "eval_loss": 0.042883455753326416, "eval_runtime": 14.0877, "eval_samples_per_second": 88.517, "eval_steps_per_second": 11.073, "step": 1640 }, { "epoch": 17.93, "learning_rate": 1.032608695652174e-05, "loss": 0.0142, "step": 1650 }, { "epoch": 18.04, "learning_rate": 9.782608695652175e-06, "loss": 0.0225, "step": 1660 }, { "epoch": 18.15, "learning_rate": 9.239130434782608e-06, "loss": 0.0146, "step": 1670 }, { "epoch": 18.26, "learning_rate": 8.695652173913044e-06, "loss": 0.0143, "step": 1680 }, { "epoch": 18.26, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.04524253308773041, "eval_runtime": 14.4999, "eval_samples_per_second": 86.001, "eval_steps_per_second": 10.759, "step": 1680 }, { "epoch": 18.37, "learning_rate": 8.15217391304348e-06, "loss": 0.0154, "step": 1690 }, { "epoch": 18.48, "learning_rate": 7.608695652173914e-06, "loss": 0.0144, "step": 1700 }, { "epoch": 18.59, "learning_rate": 7.065217391304347e-06, "loss": 0.014, "step": 1710 }, { "epoch": 18.7, "learning_rate": 6.521739130434783e-06, "loss": 0.014, "step": 1720 }, { "epoch": 18.7, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.04453733563423157, "eval_runtime": 14.0735, "eval_samples_per_second": 88.606, "eval_steps_per_second": 11.085, "step": 1720 }, { "epoch": 18.8, "learning_rate": 5.978260869565218e-06, "loss": 0.018, "step": 1730 }, { "epoch": 18.91, "learning_rate": 5.4347826086956525e-06, "loss": 0.014, "step": 1740 }, { "epoch": 19.02, "learning_rate": 4.891304347826087e-06, "loss": 0.0155, "step": 1750 }, { "epoch": 19.13, "learning_rate": 4.347826086956522e-06, "loss": 0.0141, "step": 1760 }, { "epoch": 19.13, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.048826370388269424, "eval_runtime": 14.2162, "eval_samples_per_second": 87.717, "eval_steps_per_second": 10.973, "step": 1760 }, { "epoch": 19.24, "learning_rate": 3.804347826086957e-06, "loss": 0.0139, "step": 1770 }, { "epoch": 19.35, "learning_rate": 3.2608695652173914e-06, "loss": 0.0139, "step": 1780 }, { "epoch": 19.46, "learning_rate": 2.7173913043478263e-06, "loss": 0.0139, "step": 1790 }, { "epoch": 19.57, "learning_rate": 2.173913043478261e-06, "loss": 0.0138, "step": 1800 }, { "epoch": 19.57, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.048504043370485306, "eval_runtime": 13.6564, "eval_samples_per_second": 91.312, "eval_steps_per_second": 11.423, "step": 1800 }, { "epoch": 19.67, "learning_rate": 1.6304347826086957e-06, "loss": 0.0144, "step": 1810 }, { "epoch": 19.78, "learning_rate": 1.0869565217391306e-06, "loss": 0.0155, "step": 1820 }, { "epoch": 19.89, "learning_rate": 5.434782608695653e-07, "loss": 0.0141, "step": 1830 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.0138, "step": 1840 }, { "epoch": 20.0, "eval_accuracy": 0.9911788291900562, "eval_loss": 0.0495075099170208, "eval_runtime": 14.095, "eval_samples_per_second": 88.471, "eval_steps_per_second": 11.068, "step": 1840 }, { "epoch": 20.0, "step": 1840, "total_flos": 3.419773941089157e+18, "train_loss": 0.33101742866894474, "train_runtime": 1771.2395, "train_samples_per_second": 24.909, "train_steps_per_second": 1.039 } ], "max_steps": 1840, "num_train_epochs": 20, "total_flos": 3.419773941089157e+18, "trial_name": null, "trial_params": null }