|
{ |
|
"best_metric": 0.04137137532234192, |
|
"best_model_checkpoint": "./vit-base-beans/checkpoint-1480", |
|
"epoch": 20.0, |
|
"global_step": 1840, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.945652173913043e-05, |
|
"loss": 3.3812, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.891304347826087e-05, |
|
"loss": 3.3288, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.836956521739132e-05, |
|
"loss": 3.3101, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.782608695652174e-05, |
|
"loss": 3.2579, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.17882919005613473, |
|
"eval_loss": 3.1847527027130127, |
|
"eval_runtime": 13.4342, |
|
"eval_samples_per_second": 92.823, |
|
"eval_steps_per_second": 11.612, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.728260869565217e-05, |
|
"loss": 3.1255, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.673913043478261e-05, |
|
"loss": 3.0162, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.619565217391306e-05, |
|
"loss": 2.8706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.565217391304348e-05, |
|
"loss": 2.7157, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.43785084202085006, |
|
"eval_loss": 2.5922651290893555, |
|
"eval_runtime": 13.9416, |
|
"eval_samples_per_second": 89.445, |
|
"eval_steps_per_second": 11.19, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.510869565217391e-05, |
|
"loss": 2.5322, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.456521739130435e-05, |
|
"loss": 2.3504, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 9.402173913043478e-05, |
|
"loss": 2.1887, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 9.347826086956522e-05, |
|
"loss": 2.0664, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.6696070569366479, |
|
"eval_loss": 1.9748882055282593, |
|
"eval_runtime": 13.4691, |
|
"eval_samples_per_second": 92.582, |
|
"eval_steps_per_second": 11.582, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 9.293478260869566e-05, |
|
"loss": 1.8705, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.239130434782609e-05, |
|
"loss": 1.688, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.184782608695652e-05, |
|
"loss": 1.5939, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.130434782608696e-05, |
|
"loss": 1.4765, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.917401764234162, |
|
"eval_loss": 1.3413872718811035, |
|
"eval_runtime": 13.9419, |
|
"eval_samples_per_second": 89.442, |
|
"eval_steps_per_second": 11.189, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 9.07608695652174e-05, |
|
"loss": 1.3014, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 9.021739130434783e-05, |
|
"loss": 1.201, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 8.967391304347826e-05, |
|
"loss": 1.0165, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.91304347826087e-05, |
|
"loss": 0.965, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.9615076182838813, |
|
"eval_loss": 0.9264132380485535, |
|
"eval_runtime": 13.1542, |
|
"eval_samples_per_second": 94.798, |
|
"eval_steps_per_second": 11.859, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.858695652173914e-05, |
|
"loss": 0.9096, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 8.804347826086957e-05, |
|
"loss": 0.8527, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.75e-05, |
|
"loss": 0.759, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 0.7163, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.9647153167602245, |
|
"eval_loss": 0.6652109622955322, |
|
"eval_runtime": 14.0687, |
|
"eval_samples_per_second": 88.637, |
|
"eval_steps_per_second": 11.088, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.641304347826087e-05, |
|
"loss": 0.6403, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 8.586956521739131e-05, |
|
"loss": 0.5857, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.532608695652174e-05, |
|
"loss": 0.5406, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.478260869565218e-05, |
|
"loss": 0.5061, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.9687249398556536, |
|
"eval_loss": 0.5080122947692871, |
|
"eval_runtime": 13.0785, |
|
"eval_samples_per_second": 95.347, |
|
"eval_steps_per_second": 11.928, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 8.423913043478261e-05, |
|
"loss": 0.4622, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.369565217391305e-05, |
|
"loss": 0.4919, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 8.315217391304349e-05, |
|
"loss": 0.371, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.260869565217392e-05, |
|
"loss": 0.3883, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_accuracy": 0.9759422614274258, |
|
"eval_loss": 0.3574630916118622, |
|
"eval_runtime": 13.9479, |
|
"eval_samples_per_second": 89.404, |
|
"eval_steps_per_second": 11.184, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 8.206521739130435e-05, |
|
"loss": 0.3831, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 8.152173913043478e-05, |
|
"loss": 0.3329, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 8.097826086956523e-05, |
|
"loss": 0.3383, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.043478260869566e-05, |
|
"loss": 0.3328, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_accuracy": 0.9839615076182838, |
|
"eval_loss": 0.27629122138023376, |
|
"eval_runtime": 13.7308, |
|
"eval_samples_per_second": 90.818, |
|
"eval_steps_per_second": 11.361, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 7.989130434782609e-05, |
|
"loss": 0.2387, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 7.934782608695653e-05, |
|
"loss": 0.2509, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.880434782608696e-05, |
|
"loss": 0.2259, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.82608695652174e-05, |
|
"loss": 0.2049, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_accuracy": 0.9855653568564555, |
|
"eval_loss": 0.2094665914773941, |
|
"eval_runtime": 13.8999, |
|
"eval_samples_per_second": 89.713, |
|
"eval_steps_per_second": 11.223, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.771739130434783e-05, |
|
"loss": 0.1979, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 7.717391304347827e-05, |
|
"loss": 0.1703, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 7.66304347826087e-05, |
|
"loss": 0.1771, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 7.608695652173914e-05, |
|
"loss": 0.2078, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_accuracy": 0.9871692060946271, |
|
"eval_loss": 0.19693857431411743, |
|
"eval_runtime": 13.7091, |
|
"eval_samples_per_second": 90.961, |
|
"eval_steps_per_second": 11.379, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 7.554347826086957e-05, |
|
"loss": 0.1564, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.1512, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 7.445652173913044e-05, |
|
"loss": 0.1339, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 7.391304347826086e-05, |
|
"loss": 0.1447, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_accuracy": 0.9871692060946271, |
|
"eval_loss": 0.14835722744464874, |
|
"eval_runtime": 13.9008, |
|
"eval_samples_per_second": 89.707, |
|
"eval_steps_per_second": 11.222, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 7.336956521739132e-05, |
|
"loss": 0.1207, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 7.282608695652175e-05, |
|
"loss": 0.1294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 7.228260869565217e-05, |
|
"loss": 0.116, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 7.17391304347826e-05, |
|
"loss": 0.1401, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_accuracy": 0.9839615076182838, |
|
"eval_loss": 0.14811548590660095, |
|
"eval_runtime": 13.5209, |
|
"eval_samples_per_second": 92.228, |
|
"eval_steps_per_second": 11.538, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 7.119565217391306e-05, |
|
"loss": 0.0904, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.065217391304349e-05, |
|
"loss": 0.1099, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 7.010869565217391e-05, |
|
"loss": 0.1599, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 6.956521739130436e-05, |
|
"loss": 0.1232, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.11416751146316528, |
|
"eval_runtime": 13.9361, |
|
"eval_samples_per_second": 89.48, |
|
"eval_steps_per_second": 11.194, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 6.902173913043478e-05, |
|
"loss": 0.1381, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 6.847826086956522e-05, |
|
"loss": 0.1001, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 6.793478260869565e-05, |
|
"loss": 0.0823, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.73913043478261e-05, |
|
"loss": 0.0725, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_accuracy": 0.9879711307137129, |
|
"eval_loss": 0.10076911747455597, |
|
"eval_runtime": 13.8114, |
|
"eval_samples_per_second": 90.288, |
|
"eval_steps_per_second": 11.295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 6.684782608695652e-05, |
|
"loss": 0.0852, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 6.630434782608695e-05, |
|
"loss": 0.0723, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.576086956521739e-05, |
|
"loss": 0.0881, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 6.521739130434783e-05, |
|
"loss": 0.0934, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"eval_accuracy": 0.9895749799518845, |
|
"eval_loss": 0.09398525953292847, |
|
"eval_runtime": 14.2025, |
|
"eval_samples_per_second": 87.801, |
|
"eval_steps_per_second": 10.984, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 6.467391304347826e-05, |
|
"loss": 0.0668, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 6.413043478260869e-05, |
|
"loss": 0.0586, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 6.358695652173913e-05, |
|
"loss": 0.0543, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 6.304347826086957e-05, |
|
"loss": 0.053, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_accuracy": 0.9895749799518845, |
|
"eval_loss": 0.08539092540740967, |
|
"eval_runtime": 13.9817, |
|
"eval_samples_per_second": 89.188, |
|
"eval_steps_per_second": 11.157, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.0514, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 6.195652173913043e-05, |
|
"loss": 0.0491, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 6.141304347826087e-05, |
|
"loss": 0.0481, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 6.086956521739131e-05, |
|
"loss": 0.0469, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"eval_accuracy": 0.9903769045709703, |
|
"eval_loss": 0.06862174719572067, |
|
"eval_runtime": 14.4287, |
|
"eval_samples_per_second": 86.425, |
|
"eval_steps_per_second": 10.812, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 6.032608695652174e-05, |
|
"loss": 0.0693, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 5.9782608695652175e-05, |
|
"loss": 0.0664, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 5.923913043478261e-05, |
|
"loss": 0.0502, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 5.869565217391305e-05, |
|
"loss": 0.0429, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_accuracy": 0.9863672814755413, |
|
"eval_loss": 0.0824466422200203, |
|
"eval_runtime": 13.8977, |
|
"eval_samples_per_second": 89.727, |
|
"eval_steps_per_second": 11.225, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 5.815217391304349e-05, |
|
"loss": 0.0622, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 5.7608695652173915e-05, |
|
"loss": 0.0394, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 5.706521739130435e-05, |
|
"loss": 0.0375, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 5.652173913043478e-05, |
|
"loss": 0.0371, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.07010400295257568, |
|
"eval_runtime": 13.4894, |
|
"eval_samples_per_second": 92.443, |
|
"eval_steps_per_second": 11.565, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5.5978260869565226e-05, |
|
"loss": 0.036, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.5434782608695654e-05, |
|
"loss": 0.0352, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 5.489130434782609e-05, |
|
"loss": 0.0344, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 5.4347826086956524e-05, |
|
"loss": 0.033, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.06847481429576874, |
|
"eval_runtime": 13.9465, |
|
"eval_samples_per_second": 89.413, |
|
"eval_steps_per_second": 11.186, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 5.380434782608695e-05, |
|
"loss": 0.0327, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 5.32608695652174e-05, |
|
"loss": 0.0318, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 5.271739130434783e-05, |
|
"loss": 0.0315, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 5.217391304347826e-05, |
|
"loss": 0.0308, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.06314855068922043, |
|
"eval_runtime": 13.4895, |
|
"eval_samples_per_second": 92.442, |
|
"eval_steps_per_second": 11.565, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 5.163043478260869e-05, |
|
"loss": 0.0502, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 5.108695652173914e-05, |
|
"loss": 0.03, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 5.054347826086957e-05, |
|
"loss": 0.0294, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0398, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9927826784282278, |
|
"eval_loss": 0.05900084227323532, |
|
"eval_runtime": 14.0073, |
|
"eval_samples_per_second": 89.025, |
|
"eval_steps_per_second": 11.137, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 4.945652173913044e-05, |
|
"loss": 0.03, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 4.891304347826087e-05, |
|
"loss": 0.029, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 4.836956521739131e-05, |
|
"loss": 0.0273, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 4.782608695652174e-05, |
|
"loss": 0.0453, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"eval_accuracy": 0.9895749799518845, |
|
"eval_loss": 0.062146905809640884, |
|
"eval_runtime": 14.1053, |
|
"eval_samples_per_second": 88.406, |
|
"eval_steps_per_second": 11.06, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 4.7282608695652177e-05, |
|
"loss": 0.0415, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 4.673913043478261e-05, |
|
"loss": 0.0268, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 4.6195652173913046e-05, |
|
"loss": 0.0282, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 4.565217391304348e-05, |
|
"loss": 0.026, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_accuracy": 0.9855653568564555, |
|
"eval_loss": 0.0649920180439949, |
|
"eval_runtime": 13.8769, |
|
"eval_samples_per_second": 89.861, |
|
"eval_steps_per_second": 11.242, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 4.5108695652173916e-05, |
|
"loss": 0.0255, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 4.456521739130435e-05, |
|
"loss": 0.0246, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.4021739130434786e-05, |
|
"loss": 0.0264, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 4.347826086956522e-05, |
|
"loss": 0.0257, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"eval_accuracy": 0.9927826784282278, |
|
"eval_loss": 0.04654848575592041, |
|
"eval_runtime": 13.6877, |
|
"eval_samples_per_second": 91.103, |
|
"eval_steps_per_second": 11.397, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.2934782608695655e-05, |
|
"loss": 0.0237, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 4.239130434782609e-05, |
|
"loss": 0.0233, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 4.1847826086956525e-05, |
|
"loss": 0.0231, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 4.130434782608696e-05, |
|
"loss": 0.041, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"eval_accuracy": 0.9927826784282278, |
|
"eval_loss": 0.04421408474445343, |
|
"eval_runtime": 14.1229, |
|
"eval_samples_per_second": 88.296, |
|
"eval_steps_per_second": 11.046, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 4.076086956521739e-05, |
|
"loss": 0.0234, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 4.021739130434783e-05, |
|
"loss": 0.0221, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.07, |
|
"learning_rate": 3.9673913043478264e-05, |
|
"loss": 0.0251, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 3.91304347826087e-05, |
|
"loss": 0.0223, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"eval_accuracy": 0.9863672814755413, |
|
"eval_loss": 0.06379802525043488, |
|
"eval_runtime": 13.3726, |
|
"eval_samples_per_second": 93.25, |
|
"eval_steps_per_second": 11.666, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 3.8586956521739134e-05, |
|
"loss": 0.0222, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 3.804347826086957e-05, |
|
"loss": 0.0208, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0207, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 3.695652173913043e-05, |
|
"loss": 0.0205, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.050300538539886475, |
|
"eval_runtime": 14.0669, |
|
"eval_samples_per_second": 88.648, |
|
"eval_steps_per_second": 11.09, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 3.641304347826087e-05, |
|
"loss": 0.0331, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 3.58695652173913e-05, |
|
"loss": 0.021, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 3.532608695652174e-05, |
|
"loss": 0.0203, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 0.0221, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.047799013555049896, |
|
"eval_runtime": 13.3712, |
|
"eval_samples_per_second": 93.26, |
|
"eval_steps_per_second": 11.667, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.15, |
|
"learning_rate": 3.423913043478261e-05, |
|
"loss": 0.0191, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 3.369565217391305e-05, |
|
"loss": 0.0195, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 3.3152173913043475e-05, |
|
"loss": 0.0188, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"learning_rate": 3.260869565217392e-05, |
|
"loss": 0.0188, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.04699365794658661, |
|
"eval_runtime": 13.8942, |
|
"eval_samples_per_second": 89.75, |
|
"eval_steps_per_second": 11.228, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 3.2065217391304345e-05, |
|
"loss": 0.019, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 13.7, |
|
"learning_rate": 3.152173913043479e-05, |
|
"loss": 0.0184, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 3.0978260869565215e-05, |
|
"loss": 0.0179, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 3.0434782608695656e-05, |
|
"loss": 0.0302, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"eval_accuracy": 0.9927826784282278, |
|
"eval_loss": 0.04419828951358795, |
|
"eval_runtime": 13.9931, |
|
"eval_samples_per_second": 89.115, |
|
"eval_steps_per_second": 11.148, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.9891304347826088e-05, |
|
"loss": 0.0182, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 2.9347826086956526e-05, |
|
"loss": 0.0216, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 2.8804347826086957e-05, |
|
"loss": 0.0174, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"learning_rate": 2.826086956521739e-05, |
|
"loss": 0.0171, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_accuracy": 0.9935846030473136, |
|
"eval_loss": 0.04177280142903328, |
|
"eval_runtime": 13.9993, |
|
"eval_samples_per_second": 89.076, |
|
"eval_steps_per_second": 11.143, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 2.7717391304347827e-05, |
|
"loss": 0.0172, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 14.57, |
|
"learning_rate": 2.7173913043478262e-05, |
|
"loss": 0.0173, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 14.67, |
|
"learning_rate": 2.66304347826087e-05, |
|
"loss": 0.0259, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 2.608695652173913e-05, |
|
"loss": 0.0197, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.04225374758243561, |
|
"eval_runtime": 14.4748, |
|
"eval_samples_per_second": 86.15, |
|
"eval_steps_per_second": 10.777, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 2.554347826086957e-05, |
|
"loss": 0.0166, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0163, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 2.4456521739130436e-05, |
|
"loss": 0.0164, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 2.391304347826087e-05, |
|
"loss": 0.0162, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"eval_accuracy": 0.9927826784282278, |
|
"eval_loss": 0.04216426983475685, |
|
"eval_runtime": 14.0671, |
|
"eval_samples_per_second": 88.646, |
|
"eval_steps_per_second": 11.09, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 2.3369565217391306e-05, |
|
"loss": 0.0172, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 2.282608695652174e-05, |
|
"loss": 0.016, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 2.2282608695652175e-05, |
|
"loss": 0.0158, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 0.0159, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.043235816061496735, |
|
"eval_runtime": 13.435, |
|
"eval_samples_per_second": 92.817, |
|
"eval_steps_per_second": 11.611, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 2.1195652173913045e-05, |
|
"loss": 0.0158, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 2.065217391304348e-05, |
|
"loss": 0.0252, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 2.0108695652173915e-05, |
|
"loss": 0.0156, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 1.956521739130435e-05, |
|
"loss": 0.0155, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.04137137532234192, |
|
"eval_runtime": 13.9815, |
|
"eval_samples_per_second": 89.189, |
|
"eval_steps_per_second": 11.158, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 1.9021739130434784e-05, |
|
"loss": 0.0175, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 1.8478260869565216e-05, |
|
"loss": 0.0155, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 1.793478260869565e-05, |
|
"loss": 0.0258, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.015, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.0487416572868824, |
|
"eval_runtime": 13.4779, |
|
"eval_samples_per_second": 92.522, |
|
"eval_steps_per_second": 11.575, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 1.6847826086956524e-05, |
|
"loss": 0.0152, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 1.630434782608696e-05, |
|
"loss": 0.0174, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 1.5760869565217393e-05, |
|
"loss": 0.0147, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 1.5217391304347828e-05, |
|
"loss": 0.015, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.04399973526597023, |
|
"eval_runtime": 14.0057, |
|
"eval_samples_per_second": 89.035, |
|
"eval_steps_per_second": 11.138, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 1.4673913043478263e-05, |
|
"loss": 0.0148, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 17.17, |
|
"learning_rate": 1.4130434782608694e-05, |
|
"loss": 0.0147, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 1.3586956521739131e-05, |
|
"loss": 0.0148, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 1.3043478260869566e-05, |
|
"loss": 0.0146, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.04343697056174278, |
|
"eval_runtime": 14.4989, |
|
"eval_samples_per_second": 86.007, |
|
"eval_steps_per_second": 10.759, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0145, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 1.1956521739130435e-05, |
|
"loss": 0.0144, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 1.141304347826087e-05, |
|
"loss": 0.0149, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"loss": 0.0143, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 17.83, |
|
"eval_accuracy": 0.991980753809142, |
|
"eval_loss": 0.042883455753326416, |
|
"eval_runtime": 14.0877, |
|
"eval_samples_per_second": 88.517, |
|
"eval_steps_per_second": 11.073, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 1.032608695652174e-05, |
|
"loss": 0.0142, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 9.782608695652175e-06, |
|
"loss": 0.0225, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 18.15, |
|
"learning_rate": 9.239130434782608e-06, |
|
"loss": 0.0146, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 0.0143, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.04524253308773041, |
|
"eval_runtime": 14.4999, |
|
"eval_samples_per_second": 86.001, |
|
"eval_steps_per_second": 10.759, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 8.15217391304348e-06, |
|
"loss": 0.0154, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 7.608695652173914e-06, |
|
"loss": 0.0144, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 7.065217391304347e-06, |
|
"loss": 0.014, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 6.521739130434783e-06, |
|
"loss": 0.014, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.04453733563423157, |
|
"eval_runtime": 14.0735, |
|
"eval_samples_per_second": 88.606, |
|
"eval_steps_per_second": 11.085, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 5.978260869565218e-06, |
|
"loss": 0.018, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 5.4347826086956525e-06, |
|
"loss": 0.014, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 19.02, |
|
"learning_rate": 4.891304347826087e-06, |
|
"loss": 0.0155, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.0141, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.048826370388269424, |
|
"eval_runtime": 14.2162, |
|
"eval_samples_per_second": 87.717, |
|
"eval_steps_per_second": 10.973, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 3.804347826086957e-06, |
|
"loss": 0.0139, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 3.2608695652173914e-06, |
|
"loss": 0.0139, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 2.7173913043478263e-06, |
|
"loss": 0.0139, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 0.0138, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.048504043370485306, |
|
"eval_runtime": 13.6564, |
|
"eval_samples_per_second": 91.312, |
|
"eval_steps_per_second": 11.423, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 19.67, |
|
"learning_rate": 1.6304347826086957e-06, |
|
"loss": 0.0144, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 1.0869565217391306e-06, |
|
"loss": 0.0155, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 19.89, |
|
"learning_rate": 5.434782608695653e-07, |
|
"loss": 0.0141, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0138, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9911788291900562, |
|
"eval_loss": 0.0495075099170208, |
|
"eval_runtime": 14.095, |
|
"eval_samples_per_second": 88.471, |
|
"eval_steps_per_second": 11.068, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 1840, |
|
"total_flos": 3.419773941089157e+18, |
|
"train_loss": 0.33101742866894474, |
|
"train_runtime": 1771.2395, |
|
"train_samples_per_second": 24.909, |
|
"train_steps_per_second": 1.039 |
|
} |
|
], |
|
"max_steps": 1840, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.419773941089157e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|