{ "best_metric": 0.9837157660991858, "best_model_checkpoint": "videomae-base-finetuned-ucf101\\checkpoint-8320", "epoch": 9.098918269230769, "eval_steps": 500, "global_step": 8320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.009615384615385e-07, "loss": 4.0127, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.201923076923077e-06, "loss": 4.0116, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.8028846153846153e-06, "loss": 3.9416, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.403846153846154e-06, "loss": 3.954, "step": 40 }, { "epoch": 0.01, "learning_rate": 3.0048076923076927e-06, "loss": 4.0102, "step": 50 }, { "epoch": 0.01, "learning_rate": 3.6057692307692307e-06, "loss": 3.9791, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.20673076923077e-06, "loss": 3.9077, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.807692307692308e-06, "loss": 3.9278, "step": 80 }, { "epoch": 0.01, "learning_rate": 5.408653846153847e-06, "loss": 3.9414, "step": 90 }, { "epoch": 0.01, "learning_rate": 6.0096153846153855e-06, "loss": 3.992, "step": 100 }, { "epoch": 0.01, "learning_rate": 6.610576923076923e-06, "loss": 3.9291, "step": 110 }, { "epoch": 0.01, "learning_rate": 7.211538461538461e-06, "loss": 3.9836, "step": 120 }, { "epoch": 0.02, "learning_rate": 7.8125e-06, "loss": 3.914, "step": 130 }, { "epoch": 0.02, "learning_rate": 8.41346153846154e-06, "loss": 3.8372, "step": 140 }, { "epoch": 0.02, "learning_rate": 9.014423076923078e-06, "loss": 3.8477, "step": 150 }, { "epoch": 0.02, "learning_rate": 9.615384615384616e-06, "loss": 3.7954, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.0216346153846154e-05, "loss": 3.8203, "step": 170 }, { "epoch": 0.02, "learning_rate": 1.0817307692307693e-05, "loss": 3.8033, "step": 180 }, { "epoch": 0.02, "learning_rate": 1.1418269230769231e-05, "loss": 3.7756, "step": 190 }, { "epoch": 0.02, "learning_rate": 1.2019230769230771e-05, "loss": 3.8827, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.2620192307692307e-05, "loss": 3.7937, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.3221153846153847e-05, "loss": 3.7683, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.3822115384615386e-05, "loss": 3.6827, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.4423076923076923e-05, "loss": 3.6884, "step": 240 }, { "epoch": 0.03, "learning_rate": 1.5024038461538462e-05, "loss": 3.6489, "step": 250 }, { "epoch": 0.03, "learning_rate": 1.5625e-05, "loss": 3.6757, "step": 260 }, { "epoch": 0.03, "learning_rate": 1.6225961538461538e-05, "loss": 3.5549, "step": 270 }, { "epoch": 0.03, "learning_rate": 1.682692307692308e-05, "loss": 3.509, "step": 280 }, { "epoch": 0.03, "learning_rate": 1.7427884615384614e-05, "loss": 3.4363, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.8028846153846156e-05, "loss": 3.4794, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.8629807692307693e-05, "loss": 3.4726, "step": 310 }, { "epoch": 0.04, "learning_rate": 1.923076923076923e-05, "loss": 3.3436, "step": 320 }, { "epoch": 0.04, "learning_rate": 1.983173076923077e-05, "loss": 3.3382, "step": 330 }, { "epoch": 0.04, "learning_rate": 2.0432692307692307e-05, "loss": 3.1034, "step": 340 }, { "epoch": 0.04, "learning_rate": 2.103365384615385e-05, "loss": 3.151, "step": 350 }, { "epoch": 0.04, "learning_rate": 2.1634615384615387e-05, "loss": 3.1399, "step": 360 }, { "epoch": 0.04, "learning_rate": 2.223557692307692e-05, "loss": 3.1325, "step": 370 }, { "epoch": 0.05, "learning_rate": 2.2836538461538463e-05, "loss": 3.214, "step": 380 }, { "epoch": 0.05, "learning_rate": 2.34375e-05, "loss": 2.9566, "step": 390 }, { "epoch": 0.05, "learning_rate": 2.4038461538461542e-05, "loss": 2.8413, "step": 400 }, { "epoch": 0.05, "learning_rate": 2.463942307692308e-05, "loss": 2.769, "step": 410 }, { "epoch": 0.05, "learning_rate": 2.5240384615384614e-05, "loss": 2.8527, "step": 420 }, { "epoch": 0.05, "learning_rate": 2.584134615384616e-05, "loss": 2.7911, "step": 430 }, { "epoch": 0.05, "learning_rate": 2.6442307692307694e-05, "loss": 2.4166, "step": 440 }, { "epoch": 0.05, "learning_rate": 2.704326923076923e-05, "loss": 2.7125, "step": 450 }, { "epoch": 0.06, "learning_rate": 2.7644230769230773e-05, "loss": 2.4932, "step": 460 }, { "epoch": 0.06, "learning_rate": 2.8245192307692307e-05, "loss": 2.454, "step": 470 }, { "epoch": 0.06, "learning_rate": 2.8846153846153845e-05, "loss": 2.2737, "step": 480 }, { "epoch": 0.06, "learning_rate": 2.9447115384615387e-05, "loss": 2.316, "step": 490 }, { "epoch": 0.06, "learning_rate": 3.0048076923076925e-05, "loss": 2.538, "step": 500 }, { "epoch": 0.06, "learning_rate": 3.064903846153846e-05, "loss": 2.1644, "step": 510 }, { "epoch": 0.06, "learning_rate": 3.125e-05, "loss": 2.1932, "step": 520 }, { "epoch": 0.06, "learning_rate": 3.185096153846154e-05, "loss": 2.1082, "step": 530 }, { "epoch": 0.06, "learning_rate": 3.2451923076923077e-05, "loss": 2.2554, "step": 540 }, { "epoch": 0.07, "learning_rate": 3.3052884615384615e-05, "loss": 2.1601, "step": 550 }, { "epoch": 0.07, "learning_rate": 3.365384615384616e-05, "loss": 2.0342, "step": 560 }, { "epoch": 0.07, "learning_rate": 3.42548076923077e-05, "loss": 1.9706, "step": 570 }, { "epoch": 0.07, "learning_rate": 3.485576923076923e-05, "loss": 2.0727, "step": 580 }, { "epoch": 0.07, "learning_rate": 3.545673076923077e-05, "loss": 1.8431, "step": 590 }, { "epoch": 0.07, "learning_rate": 3.605769230769231e-05, "loss": 1.9603, "step": 600 }, { "epoch": 0.07, "learning_rate": 3.665865384615384e-05, "loss": 1.9677, "step": 610 }, { "epoch": 0.07, "learning_rate": 3.725961538461539e-05, "loss": 1.7238, "step": 620 }, { "epoch": 0.08, "learning_rate": 3.7860576923076925e-05, "loss": 1.8137, "step": 630 }, { "epoch": 0.08, "learning_rate": 3.846153846153846e-05, "loss": 1.5439, "step": 640 }, { "epoch": 0.08, "learning_rate": 3.90625e-05, "loss": 1.7736, "step": 650 }, { "epoch": 0.08, "learning_rate": 3.966346153846154e-05, "loss": 1.7694, "step": 660 }, { "epoch": 0.08, "learning_rate": 4.0264423076923083e-05, "loss": 1.5685, "step": 670 }, { "epoch": 0.08, "learning_rate": 4.0865384615384615e-05, "loss": 1.3912, "step": 680 }, { "epoch": 0.08, "learning_rate": 4.146634615384616e-05, "loss": 1.3916, "step": 690 }, { "epoch": 0.08, "learning_rate": 4.20673076923077e-05, "loss": 1.5716, "step": 700 }, { "epoch": 0.09, "learning_rate": 4.266826923076923e-05, "loss": 1.7127, "step": 710 }, { "epoch": 0.09, "learning_rate": 4.326923076923077e-05, "loss": 1.6191, "step": 720 }, { "epoch": 0.09, "learning_rate": 4.387019230769231e-05, "loss": 1.53, "step": 730 }, { "epoch": 0.09, "learning_rate": 4.447115384615384e-05, "loss": 1.3601, "step": 740 }, { "epoch": 0.09, "learning_rate": 4.507211538461539e-05, "loss": 1.5335, "step": 750 }, { "epoch": 0.09, "learning_rate": 4.5673076923076925e-05, "loss": 1.5374, "step": 760 }, { "epoch": 0.09, "learning_rate": 4.627403846153846e-05, "loss": 1.4242, "step": 770 }, { "epoch": 0.09, "learning_rate": 4.6875e-05, "loss": 1.3306, "step": 780 }, { "epoch": 0.09, "learning_rate": 4.747596153846154e-05, "loss": 1.1707, "step": 790 }, { "epoch": 0.1, "learning_rate": 4.8076923076923084e-05, "loss": 1.3212, "step": 800 }, { "epoch": 0.1, "learning_rate": 4.8677884615384615e-05, "loss": 1.0776, "step": 810 }, { "epoch": 0.1, "learning_rate": 4.927884615384616e-05, "loss": 1.1776, "step": 820 }, { "epoch": 0.1, "learning_rate": 4.98798076923077e-05, "loss": 1.1009, "step": 830 }, { "epoch": 0.1, "eval_accuracy": 0.6558105107327905, "eval_loss": 1.2521276473999023, "eval_runtime": 89.0403, "eval_samples_per_second": 15.173, "eval_steps_per_second": 2.538, "step": 833 }, { "epoch": 1.0, "learning_rate": 4.99465811965812e-05, "loss": 1.2513, "step": 840 }, { "epoch": 1.0, "learning_rate": 4.98798076923077e-05, "loss": 1.2784, "step": 850 }, { "epoch": 1.0, "learning_rate": 4.981303418803419e-05, "loss": 1.0356, "step": 860 }, { "epoch": 1.0, "learning_rate": 4.9746260683760685e-05, "loss": 1.2674, "step": 870 }, { "epoch": 1.01, "learning_rate": 4.9679487179487185e-05, "loss": 1.2378, "step": 880 }, { "epoch": 1.01, "learning_rate": 4.961271367521368e-05, "loss": 1.0986, "step": 890 }, { "epoch": 1.01, "learning_rate": 4.954594017094017e-05, "loss": 0.9062, "step": 900 }, { "epoch": 1.01, "learning_rate": 4.947916666666667e-05, "loss": 1.0728, "step": 910 }, { "epoch": 1.01, "learning_rate": 4.9412393162393166e-05, "loss": 1.12, "step": 920 }, { "epoch": 1.01, "learning_rate": 4.9345619658119666e-05, "loss": 1.2067, "step": 930 }, { "epoch": 1.01, "learning_rate": 4.927884615384616e-05, "loss": 1.1527, "step": 940 }, { "epoch": 1.01, "learning_rate": 4.9212072649572646e-05, "loss": 1.1009, "step": 950 }, { "epoch": 1.02, "learning_rate": 4.9145299145299147e-05, "loss": 0.9438, "step": 960 }, { "epoch": 1.02, "learning_rate": 4.907852564102564e-05, "loss": 0.8438, "step": 970 }, { "epoch": 1.02, "learning_rate": 4.901175213675214e-05, "loss": 0.7278, "step": 980 }, { "epoch": 1.02, "learning_rate": 4.8944978632478634e-05, "loss": 0.8685, "step": 990 }, { "epoch": 1.02, "learning_rate": 4.887820512820513e-05, "loss": 1.0214, "step": 1000 }, { "epoch": 1.02, "learning_rate": 4.881143162393163e-05, "loss": 0.9312, "step": 1010 }, { "epoch": 1.02, "learning_rate": 4.874465811965812e-05, "loss": 0.8984, "step": 1020 }, { "epoch": 1.02, "learning_rate": 4.8677884615384615e-05, "loss": 0.8442, "step": 1030 }, { "epoch": 1.02, "learning_rate": 4.8611111111111115e-05, "loss": 0.6382, "step": 1040 }, { "epoch": 1.03, "learning_rate": 4.854433760683761e-05, "loss": 0.6792, "step": 1050 }, { "epoch": 1.03, "learning_rate": 4.84775641025641e-05, "loss": 0.8806, "step": 1060 }, { "epoch": 1.03, "learning_rate": 4.84107905982906e-05, "loss": 0.9612, "step": 1070 }, { "epoch": 1.03, "learning_rate": 4.8344017094017096e-05, "loss": 0.8101, "step": 1080 }, { "epoch": 1.03, "learning_rate": 4.827724358974359e-05, "loss": 0.7791, "step": 1090 }, { "epoch": 1.03, "learning_rate": 4.821047008547009e-05, "loss": 0.8231, "step": 1100 }, { "epoch": 1.03, "learning_rate": 4.814369658119658e-05, "loss": 0.6457, "step": 1110 }, { "epoch": 1.03, "learning_rate": 4.8076923076923084e-05, "loss": 0.5397, "step": 1120 }, { "epoch": 1.04, "learning_rate": 4.801014957264958e-05, "loss": 0.7663, "step": 1130 }, { "epoch": 1.04, "learning_rate": 4.794337606837607e-05, "loss": 1.0645, "step": 1140 }, { "epoch": 1.04, "learning_rate": 4.787660256410257e-05, "loss": 1.0223, "step": 1150 }, { "epoch": 1.04, "learning_rate": 4.7809829059829065e-05, "loss": 1.0806, "step": 1160 }, { "epoch": 1.04, "learning_rate": 4.774305555555556e-05, "loss": 0.9156, "step": 1170 }, { "epoch": 1.04, "learning_rate": 4.767628205128206e-05, "loss": 0.7737, "step": 1180 }, { "epoch": 1.04, "learning_rate": 4.7609508547008545e-05, "loss": 0.6834, "step": 1190 }, { "epoch": 1.04, "learning_rate": 4.7542735042735045e-05, "loss": 0.808, "step": 1200 }, { "epoch": 1.05, "learning_rate": 4.747596153846154e-05, "loss": 0.9556, "step": 1210 }, { "epoch": 1.05, "learning_rate": 4.740918803418803e-05, "loss": 0.5992, "step": 1220 }, { "epoch": 1.05, "learning_rate": 4.734241452991453e-05, "loss": 0.6876, "step": 1230 }, { "epoch": 1.05, "learning_rate": 4.7275641025641026e-05, "loss": 0.7753, "step": 1240 }, { "epoch": 1.05, "learning_rate": 4.720886752136752e-05, "loss": 0.6666, "step": 1250 }, { "epoch": 1.05, "learning_rate": 4.714209401709402e-05, "loss": 0.4952, "step": 1260 }, { "epoch": 1.05, "learning_rate": 4.7075320512820514e-05, "loss": 0.9254, "step": 1270 }, { "epoch": 1.05, "learning_rate": 4.700854700854701e-05, "loss": 0.8664, "step": 1280 }, { "epoch": 1.05, "learning_rate": 4.694177350427351e-05, "loss": 0.6691, "step": 1290 }, { "epoch": 1.06, "learning_rate": 4.6875e-05, "loss": 0.7406, "step": 1300 }, { "epoch": 1.06, "learning_rate": 4.68082264957265e-05, "loss": 0.6539, "step": 1310 }, { "epoch": 1.06, "learning_rate": 4.6741452991452995e-05, "loss": 0.9185, "step": 1320 }, { "epoch": 1.06, "learning_rate": 4.667467948717949e-05, "loss": 0.7776, "step": 1330 }, { "epoch": 1.06, "learning_rate": 4.660790598290599e-05, "loss": 0.8129, "step": 1340 }, { "epoch": 1.06, "learning_rate": 4.654113247863248e-05, "loss": 0.4696, "step": 1350 }, { "epoch": 1.06, "learning_rate": 4.6474358974358976e-05, "loss": 0.4895, "step": 1360 }, { "epoch": 1.06, "learning_rate": 4.6407585470085476e-05, "loss": 0.6573, "step": 1370 }, { "epoch": 1.07, "learning_rate": 4.634081196581197e-05, "loss": 0.6352, "step": 1380 }, { "epoch": 1.07, "learning_rate": 4.627403846153846e-05, "loss": 0.4662, "step": 1390 }, { "epoch": 1.07, "learning_rate": 4.620726495726496e-05, "loss": 0.6592, "step": 1400 }, { "epoch": 1.07, "learning_rate": 4.614049145299146e-05, "loss": 0.6501, "step": 1410 }, { "epoch": 1.07, "learning_rate": 4.607371794871795e-05, "loss": 0.5648, "step": 1420 }, { "epoch": 1.07, "learning_rate": 4.6006944444444444e-05, "loss": 0.7981, "step": 1430 }, { "epoch": 1.07, "learning_rate": 4.594017094017094e-05, "loss": 0.573, "step": 1440 }, { "epoch": 1.07, "learning_rate": 4.587339743589744e-05, "loss": 0.6546, "step": 1450 }, { "epoch": 1.08, "learning_rate": 4.580662393162393e-05, "loss": 0.5825, "step": 1460 }, { "epoch": 1.08, "learning_rate": 4.5739850427350425e-05, "loss": 0.5756, "step": 1470 }, { "epoch": 1.08, "learning_rate": 4.5673076923076925e-05, "loss": 0.5927, "step": 1480 }, { "epoch": 1.08, "learning_rate": 4.560630341880342e-05, "loss": 0.577, "step": 1490 }, { "epoch": 1.08, "learning_rate": 4.553952991452992e-05, "loss": 0.726, "step": 1500 }, { "epoch": 1.08, "learning_rate": 4.547275641025641e-05, "loss": 0.5516, "step": 1510 }, { "epoch": 1.08, "learning_rate": 4.5405982905982906e-05, "loss": 0.5479, "step": 1520 }, { "epoch": 1.08, "learning_rate": 4.5339209401709406e-05, "loss": 0.4325, "step": 1530 }, { "epoch": 1.08, "learning_rate": 4.52724358974359e-05, "loss": 0.4567, "step": 1540 }, { "epoch": 1.09, "learning_rate": 4.520566239316239e-05, "loss": 0.5283, "step": 1550 }, { "epoch": 1.09, "learning_rate": 4.5138888888888894e-05, "loss": 0.3399, "step": 1560 }, { "epoch": 1.09, "learning_rate": 4.507211538461539e-05, "loss": 0.5445, "step": 1570 }, { "epoch": 1.09, "learning_rate": 4.500534188034188e-05, "loss": 0.3424, "step": 1580 }, { "epoch": 1.09, "learning_rate": 4.493856837606838e-05, "loss": 0.624, "step": 1590 }, { "epoch": 1.09, "learning_rate": 4.4871794871794874e-05, "loss": 0.529, "step": 1600 }, { "epoch": 1.09, "learning_rate": 4.4805021367521375e-05, "loss": 0.5173, "step": 1610 }, { "epoch": 1.09, "learning_rate": 4.473824786324787e-05, "loss": 0.5501, "step": 1620 }, { "epoch": 1.1, "learning_rate": 4.467147435897436e-05, "loss": 0.3545, "step": 1630 }, { "epoch": 1.1, "learning_rate": 4.460470085470086e-05, "loss": 0.5297, "step": 1640 }, { "epoch": 1.1, "learning_rate": 4.4537927350427356e-05, "loss": 0.3494, "step": 1650 }, { "epoch": 1.1, "learning_rate": 4.447115384615384e-05, "loss": 0.2821, "step": 1660 }, { "epoch": 1.1, "eval_accuracy": 0.8623242042931162, "eval_loss": 0.43101879954338074, "eval_runtime": 85.5894, "eval_samples_per_second": 15.785, "eval_steps_per_second": 2.641, "step": 1666 }, { "epoch": 2.0, "learning_rate": 4.440438034188034e-05, "loss": 0.4212, "step": 1670 }, { "epoch": 2.0, "learning_rate": 4.4337606837606836e-05, "loss": 0.4583, "step": 1680 }, { "epoch": 2.0, "learning_rate": 4.4270833333333337e-05, "loss": 0.2378, "step": 1690 }, { "epoch": 2.0, "learning_rate": 4.420405982905983e-05, "loss": 0.4887, "step": 1700 }, { "epoch": 2.01, "learning_rate": 4.4137286324786324e-05, "loss": 0.312, "step": 1710 }, { "epoch": 2.01, "learning_rate": 4.4070512820512824e-05, "loss": 0.3402, "step": 1720 }, { "epoch": 2.01, "learning_rate": 4.400373931623932e-05, "loss": 0.2723, "step": 1730 }, { "epoch": 2.01, "learning_rate": 4.393696581196581e-05, "loss": 0.2785, "step": 1740 }, { "epoch": 2.01, "learning_rate": 4.387019230769231e-05, "loss": 0.187, "step": 1750 }, { "epoch": 2.01, "learning_rate": 4.3803418803418805e-05, "loss": 0.2831, "step": 1760 }, { "epoch": 2.01, "learning_rate": 4.37366452991453e-05, "loss": 0.6487, "step": 1770 }, { "epoch": 2.01, "learning_rate": 4.36698717948718e-05, "loss": 0.3937, "step": 1780 }, { "epoch": 2.01, "learning_rate": 4.360309829059829e-05, "loss": 0.2464, "step": 1790 }, { "epoch": 2.02, "learning_rate": 4.353632478632479e-05, "loss": 0.1763, "step": 1800 }, { "epoch": 2.02, "learning_rate": 4.3469551282051286e-05, "loss": 0.5681, "step": 1810 }, { "epoch": 2.02, "learning_rate": 4.340277777777778e-05, "loss": 0.3058, "step": 1820 }, { "epoch": 2.02, "learning_rate": 4.333600427350428e-05, "loss": 0.4737, "step": 1830 }, { "epoch": 2.02, "learning_rate": 4.326923076923077e-05, "loss": 0.2714, "step": 1840 }, { "epoch": 2.02, "learning_rate": 4.320245726495727e-05, "loss": 0.2996, "step": 1850 }, { "epoch": 2.02, "learning_rate": 4.313568376068377e-05, "loss": 0.1767, "step": 1860 }, { "epoch": 2.02, "learning_rate": 4.306891025641026e-05, "loss": 0.3166, "step": 1870 }, { "epoch": 2.03, "learning_rate": 4.3002136752136754e-05, "loss": 0.6681, "step": 1880 }, { "epoch": 2.03, "learning_rate": 4.293536324786325e-05, "loss": 0.271, "step": 1890 }, { "epoch": 2.03, "learning_rate": 4.286858974358974e-05, "loss": 0.2432, "step": 1900 }, { "epoch": 2.03, "learning_rate": 4.280181623931624e-05, "loss": 0.3976, "step": 1910 }, { "epoch": 2.03, "learning_rate": 4.2735042735042735e-05, "loss": 0.4972, "step": 1920 }, { "epoch": 2.03, "learning_rate": 4.266826923076923e-05, "loss": 0.4663, "step": 1930 }, { "epoch": 2.03, "learning_rate": 4.260149572649573e-05, "loss": 0.456, "step": 1940 }, { "epoch": 2.03, "learning_rate": 4.253472222222222e-05, "loss": 0.5133, "step": 1950 }, { "epoch": 2.04, "learning_rate": 4.2467948717948716e-05, "loss": 0.2464, "step": 1960 }, { "epoch": 2.04, "learning_rate": 4.2401175213675216e-05, "loss": 0.2097, "step": 1970 }, { "epoch": 2.04, "learning_rate": 4.233440170940171e-05, "loss": 0.479, "step": 1980 }, { "epoch": 2.04, "learning_rate": 4.226762820512821e-05, "loss": 0.2995, "step": 1990 }, { "epoch": 2.04, "learning_rate": 4.2200854700854704e-05, "loss": 0.5557, "step": 2000 }, { "epoch": 2.04, "learning_rate": 4.21340811965812e-05, "loss": 0.572, "step": 2010 }, { "epoch": 2.04, "learning_rate": 4.20673076923077e-05, "loss": 0.5636, "step": 2020 }, { "epoch": 2.04, "learning_rate": 4.200053418803419e-05, "loss": 0.2179, "step": 2030 }, { "epoch": 2.04, "learning_rate": 4.1933760683760684e-05, "loss": 0.3565, "step": 2040 }, { "epoch": 2.05, "learning_rate": 4.1866987179487185e-05, "loss": 0.2154, "step": 2050 }, { "epoch": 2.05, "learning_rate": 4.180021367521368e-05, "loss": 0.2826, "step": 2060 }, { "epoch": 2.05, "learning_rate": 4.173344017094017e-05, "loss": 0.3187, "step": 2070 }, { "epoch": 2.05, "learning_rate": 4.166666666666667e-05, "loss": 0.356, "step": 2080 }, { "epoch": 2.05, "learning_rate": 4.1599893162393166e-05, "loss": 0.1521, "step": 2090 }, { "epoch": 2.05, "learning_rate": 4.153311965811966e-05, "loss": 0.2919, "step": 2100 }, { "epoch": 2.05, "learning_rate": 4.146634615384616e-05, "loss": 0.2014, "step": 2110 }, { "epoch": 2.05, "learning_rate": 4.1399572649572646e-05, "loss": 0.535, "step": 2120 }, { "epoch": 2.06, "learning_rate": 4.1332799145299146e-05, "loss": 0.2376, "step": 2130 }, { "epoch": 2.06, "learning_rate": 4.126602564102564e-05, "loss": 0.3963, "step": 2140 }, { "epoch": 2.06, "learning_rate": 4.1199252136752133e-05, "loss": 0.3987, "step": 2150 }, { "epoch": 2.06, "learning_rate": 4.1132478632478634e-05, "loss": 0.3923, "step": 2160 }, { "epoch": 2.06, "learning_rate": 4.106570512820513e-05, "loss": 0.5621, "step": 2170 }, { "epoch": 2.06, "learning_rate": 4.099893162393163e-05, "loss": 0.3855, "step": 2180 }, { "epoch": 2.06, "learning_rate": 4.093215811965812e-05, "loss": 0.369, "step": 2190 }, { "epoch": 2.06, "learning_rate": 4.0865384615384615e-05, "loss": 0.1231, "step": 2200 }, { "epoch": 2.07, "learning_rate": 4.0798611111111115e-05, "loss": 0.1671, "step": 2210 }, { "epoch": 2.07, "learning_rate": 4.073183760683761e-05, "loss": 0.1788, "step": 2220 }, { "epoch": 2.07, "learning_rate": 4.06650641025641e-05, "loss": 0.5884, "step": 2230 }, { "epoch": 2.07, "learning_rate": 4.05982905982906e-05, "loss": 0.2911, "step": 2240 }, { "epoch": 2.07, "learning_rate": 4.0531517094017096e-05, "loss": 0.1407, "step": 2250 }, { "epoch": 2.07, "learning_rate": 4.046474358974359e-05, "loss": 0.4743, "step": 2260 }, { "epoch": 2.07, "learning_rate": 4.039797008547009e-05, "loss": 0.2106, "step": 2270 }, { "epoch": 2.07, "learning_rate": 4.033119658119658e-05, "loss": 0.4062, "step": 2280 }, { "epoch": 2.08, "learning_rate": 4.0264423076923083e-05, "loss": 0.195, "step": 2290 }, { "epoch": 2.08, "learning_rate": 4.019764957264958e-05, "loss": 0.1114, "step": 2300 }, { "epoch": 2.08, "learning_rate": 4.013087606837607e-05, "loss": 0.368, "step": 2310 }, { "epoch": 2.08, "learning_rate": 4.006410256410257e-05, "loss": 0.2268, "step": 2320 }, { "epoch": 2.08, "learning_rate": 3.9997329059829064e-05, "loss": 0.4106, "step": 2330 }, { "epoch": 2.08, "learning_rate": 3.993055555555556e-05, "loss": 0.2388, "step": 2340 }, { "epoch": 2.08, "learning_rate": 3.986378205128206e-05, "loss": 0.085, "step": 2350 }, { "epoch": 2.08, "learning_rate": 3.9797008547008545e-05, "loss": 0.1083, "step": 2360 }, { "epoch": 2.08, "learning_rate": 3.9730235042735045e-05, "loss": 0.1683, "step": 2370 }, { "epoch": 2.09, "learning_rate": 3.966346153846154e-05, "loss": 0.0891, "step": 2380 }, { "epoch": 2.09, "learning_rate": 3.959668803418803e-05, "loss": 0.0728, "step": 2390 }, { "epoch": 2.09, "learning_rate": 3.952991452991453e-05, "loss": 0.1584, "step": 2400 }, { "epoch": 2.09, "learning_rate": 3.9463141025641026e-05, "loss": 0.2689, "step": 2410 }, { "epoch": 2.09, "learning_rate": 3.939636752136752e-05, "loss": 0.1891, "step": 2420 }, { "epoch": 2.09, "learning_rate": 3.932959401709402e-05, "loss": 0.3679, "step": 2430 }, { "epoch": 2.09, "learning_rate": 3.9262820512820513e-05, "loss": 0.1882, "step": 2440 }, { "epoch": 2.09, "learning_rate": 3.919604700854701e-05, "loss": 0.2205, "step": 2450 }, { "epoch": 2.1, "learning_rate": 3.912927350427351e-05, "loss": 0.1591, "step": 2460 }, { "epoch": 2.1, "learning_rate": 3.90625e-05, "loss": 0.1836, "step": 2470 }, { "epoch": 2.1, "learning_rate": 3.89957264957265e-05, "loss": 0.0979, "step": 2480 }, { "epoch": 2.1, "learning_rate": 3.8928952991452995e-05, "loss": 0.4519, "step": 2490 }, { "epoch": 2.1, "eval_accuracy": 0.9222797927461139, "eval_loss": 0.26454582810401917, "eval_runtime": 85.593, "eval_samples_per_second": 15.784, "eval_steps_per_second": 2.64, "step": 2499 }, { "epoch": 3.0, "learning_rate": 3.886217948717949e-05, "loss": 0.2009, "step": 2500 }, { "epoch": 3.0, "learning_rate": 3.879540598290599e-05, "loss": 0.1853, "step": 2510 }, { "epoch": 3.0, "learning_rate": 3.872863247863248e-05, "loss": 0.0564, "step": 2520 }, { "epoch": 3.0, "learning_rate": 3.8661858974358976e-05, "loss": 0.2384, "step": 2530 }, { "epoch": 3.0, "learning_rate": 3.8595085470085476e-05, "loss": 0.2116, "step": 2540 }, { "epoch": 3.01, "learning_rate": 3.852831196581197e-05, "loss": 0.2025, "step": 2550 }, { "epoch": 3.01, "learning_rate": 3.846153846153846e-05, "loss": 0.453, "step": 2560 }, { "epoch": 3.01, "learning_rate": 3.839476495726496e-05, "loss": 0.1854, "step": 2570 }, { "epoch": 3.01, "learning_rate": 3.832799145299146e-05, "loss": 0.1525, "step": 2580 }, { "epoch": 3.01, "learning_rate": 3.826121794871795e-05, "loss": 0.1236, "step": 2590 }, { "epoch": 3.01, "learning_rate": 3.8194444444444444e-05, "loss": 0.1181, "step": 2600 }, { "epoch": 3.01, "learning_rate": 3.812767094017094e-05, "loss": 0.1242, "step": 2610 }, { "epoch": 3.01, "learning_rate": 3.806089743589744e-05, "loss": 0.1651, "step": 2620 }, { "epoch": 3.02, "learning_rate": 3.799412393162393e-05, "loss": 0.1342, "step": 2630 }, { "epoch": 3.02, "learning_rate": 3.7927350427350425e-05, "loss": 0.1438, "step": 2640 }, { "epoch": 3.02, "learning_rate": 3.7860576923076925e-05, "loss": 0.0986, "step": 2650 }, { "epoch": 3.02, "learning_rate": 3.779380341880342e-05, "loss": 0.1217, "step": 2660 }, { "epoch": 3.02, "learning_rate": 3.772702991452992e-05, "loss": 0.1621, "step": 2670 }, { "epoch": 3.02, "learning_rate": 3.766025641025641e-05, "loss": 0.105, "step": 2680 }, { "epoch": 3.02, "learning_rate": 3.7593482905982906e-05, "loss": 0.234, "step": 2690 }, { "epoch": 3.02, "learning_rate": 3.7526709401709406e-05, "loss": 0.2106, "step": 2700 }, { "epoch": 3.03, "learning_rate": 3.74599358974359e-05, "loss": 0.1273, "step": 2710 }, { "epoch": 3.03, "learning_rate": 3.739316239316239e-05, "loss": 0.0124, "step": 2720 }, { "epoch": 3.03, "learning_rate": 3.7326388888888893e-05, "loss": 0.045, "step": 2730 }, { "epoch": 3.03, "learning_rate": 3.725961538461539e-05, "loss": 0.1901, "step": 2740 }, { "epoch": 3.03, "learning_rate": 3.719284188034188e-05, "loss": 0.1012, "step": 2750 }, { "epoch": 3.03, "learning_rate": 3.712606837606838e-05, "loss": 0.2435, "step": 2760 }, { "epoch": 3.03, "learning_rate": 3.7059294871794874e-05, "loss": 0.0302, "step": 2770 }, { "epoch": 3.03, "learning_rate": 3.699252136752137e-05, "loss": 0.1433, "step": 2780 }, { "epoch": 3.03, "learning_rate": 3.692574786324787e-05, "loss": 0.1693, "step": 2790 }, { "epoch": 3.04, "learning_rate": 3.685897435897436e-05, "loss": 0.2665, "step": 2800 }, { "epoch": 3.04, "learning_rate": 3.679220085470086e-05, "loss": 0.2381, "step": 2810 }, { "epoch": 3.04, "learning_rate": 3.6725427350427355e-05, "loss": 0.1251, "step": 2820 }, { "epoch": 3.04, "learning_rate": 3.665865384615384e-05, "loss": 0.1623, "step": 2830 }, { "epoch": 3.04, "learning_rate": 3.659188034188034e-05, "loss": 0.139, "step": 2840 }, { "epoch": 3.04, "learning_rate": 3.6525106837606836e-05, "loss": 0.0755, "step": 2850 }, { "epoch": 3.04, "learning_rate": 3.6458333333333336e-05, "loss": 0.265, "step": 2860 }, { "epoch": 3.04, "learning_rate": 3.639155982905983e-05, "loss": 0.3092, "step": 2870 }, { "epoch": 3.05, "learning_rate": 3.6324786324786323e-05, "loss": 0.134, "step": 2880 }, { "epoch": 3.05, "learning_rate": 3.6258012820512824e-05, "loss": 0.0111, "step": 2890 }, { "epoch": 3.05, "learning_rate": 3.619123931623932e-05, "loss": 0.154, "step": 2900 }, { "epoch": 3.05, "learning_rate": 3.612446581196581e-05, "loss": 0.0385, "step": 2910 }, { "epoch": 3.05, "learning_rate": 3.605769230769231e-05, "loss": 0.1979, "step": 2920 }, { "epoch": 3.05, "learning_rate": 3.5990918803418805e-05, "loss": 0.1618, "step": 2930 }, { "epoch": 3.05, "learning_rate": 3.59241452991453e-05, "loss": 0.1044, "step": 2940 }, { "epoch": 3.05, "learning_rate": 3.58573717948718e-05, "loss": 0.1899, "step": 2950 }, { "epoch": 3.06, "learning_rate": 3.579059829059829e-05, "loss": 0.1188, "step": 2960 }, { "epoch": 3.06, "learning_rate": 3.5723824786324785e-05, "loss": 0.3025, "step": 2970 }, { "epoch": 3.06, "learning_rate": 3.5657051282051286e-05, "loss": 0.1551, "step": 2980 }, { "epoch": 3.06, "learning_rate": 3.559027777777778e-05, "loss": 0.272, "step": 2990 }, { "epoch": 3.06, "learning_rate": 3.552350427350428e-05, "loss": 0.2863, "step": 3000 }, { "epoch": 3.06, "learning_rate": 3.545673076923077e-05, "loss": 0.1508, "step": 3010 }, { "epoch": 3.06, "learning_rate": 3.538995726495727e-05, "loss": 0.3007, "step": 3020 }, { "epoch": 3.06, "learning_rate": 3.532318376068377e-05, "loss": 0.256, "step": 3030 }, { "epoch": 3.07, "learning_rate": 3.525641025641026e-05, "loss": 0.0253, "step": 3040 }, { "epoch": 3.07, "learning_rate": 3.5189636752136754e-05, "loss": 0.0563, "step": 3050 }, { "epoch": 3.07, "learning_rate": 3.512286324786325e-05, "loss": 0.2547, "step": 3060 }, { "epoch": 3.07, "learning_rate": 3.505608974358974e-05, "loss": 0.1, "step": 3070 }, { "epoch": 3.07, "learning_rate": 3.498931623931624e-05, "loss": 0.1087, "step": 3080 }, { "epoch": 3.07, "learning_rate": 3.4922542735042735e-05, "loss": 0.0982, "step": 3090 }, { "epoch": 3.07, "learning_rate": 3.485576923076923e-05, "loss": 0.0756, "step": 3100 }, { "epoch": 3.07, "learning_rate": 3.478899572649573e-05, "loss": 0.1787, "step": 3110 }, { "epoch": 3.07, "learning_rate": 3.472222222222222e-05, "loss": 0.1296, "step": 3120 }, { "epoch": 3.08, "learning_rate": 3.4655448717948716e-05, "loss": 0.1278, "step": 3130 }, { "epoch": 3.08, "learning_rate": 3.4588675213675216e-05, "loss": 0.2712, "step": 3140 }, { "epoch": 3.08, "learning_rate": 3.452190170940171e-05, "loss": 0.1053, "step": 3150 }, { "epoch": 3.08, "learning_rate": 3.445512820512821e-05, "loss": 0.0271, "step": 3160 }, { "epoch": 3.08, "learning_rate": 3.43883547008547e-05, "loss": 0.1143, "step": 3170 }, { "epoch": 3.08, "learning_rate": 3.43215811965812e-05, "loss": 0.2631, "step": 3180 }, { "epoch": 3.08, "learning_rate": 3.42548076923077e-05, "loss": 0.3132, "step": 3190 }, { "epoch": 3.08, "learning_rate": 3.418803418803419e-05, "loss": 0.3753, "step": 3200 }, { "epoch": 3.09, "learning_rate": 3.4121260683760684e-05, "loss": 0.1488, "step": 3210 }, { "epoch": 3.09, "learning_rate": 3.4054487179487185e-05, "loss": 0.1837, "step": 3220 }, { "epoch": 3.09, "learning_rate": 3.398771367521368e-05, "loss": 0.0834, "step": 3230 }, { "epoch": 3.09, "learning_rate": 3.392094017094017e-05, "loss": 0.1418, "step": 3240 }, { "epoch": 3.09, "learning_rate": 3.385416666666667e-05, "loss": 0.2449, "step": 3250 }, { "epoch": 3.09, "learning_rate": 3.3787393162393165e-05, "loss": 0.3223, "step": 3260 }, { "epoch": 3.09, "learning_rate": 3.372061965811966e-05, "loss": 0.0967, "step": 3270 }, { "epoch": 3.09, "learning_rate": 3.365384615384616e-05, "loss": 0.0662, "step": 3280 }, { "epoch": 3.1, "learning_rate": 3.3587072649572646e-05, "loss": 0.1879, "step": 3290 }, { "epoch": 3.1, "learning_rate": 3.3520299145299146e-05, "loss": 0.1226, "step": 3300 }, { "epoch": 3.1, "learning_rate": 3.345352564102564e-05, "loss": 0.1606, "step": 3310 }, { "epoch": 3.1, "learning_rate": 3.338675213675213e-05, "loss": 0.0107, "step": 3320 }, { "epoch": 3.1, "learning_rate": 3.3319978632478634e-05, "loss": 0.137, "step": 3330 }, { "epoch": 3.1, "eval_accuracy": 0.9370836417468542, "eval_loss": 0.22310341894626617, "eval_runtime": 85.6004, "eval_samples_per_second": 15.783, "eval_steps_per_second": 2.64, "step": 3332 }, { "epoch": 4.0, "learning_rate": 3.325320512820513e-05, "loss": 0.247, "step": 3340 }, { "epoch": 4.0, "learning_rate": 3.318643162393163e-05, "loss": 0.0202, "step": 3350 }, { "epoch": 4.0, "learning_rate": 3.311965811965812e-05, "loss": 0.1689, "step": 3360 }, { "epoch": 4.0, "learning_rate": 3.3052884615384615e-05, "loss": 0.334, "step": 3370 }, { "epoch": 4.01, "learning_rate": 3.2986111111111115e-05, "loss": 0.1227, "step": 3380 }, { "epoch": 4.01, "learning_rate": 3.291933760683761e-05, "loss": 0.05, "step": 3390 }, { "epoch": 4.01, "learning_rate": 3.28525641025641e-05, "loss": 0.0551, "step": 3400 }, { "epoch": 4.01, "learning_rate": 3.27857905982906e-05, "loss": 0.0999, "step": 3410 }, { "epoch": 4.01, "learning_rate": 3.2719017094017096e-05, "loss": 0.1259, "step": 3420 }, { "epoch": 4.01, "learning_rate": 3.265224358974359e-05, "loss": 0.0748, "step": 3430 }, { "epoch": 4.01, "learning_rate": 3.258547008547009e-05, "loss": 0.0967, "step": 3440 }, { "epoch": 4.01, "learning_rate": 3.251869658119658e-05, "loss": 0.0372, "step": 3450 }, { "epoch": 4.02, "learning_rate": 3.2451923076923077e-05, "loss": 0.1477, "step": 3460 }, { "epoch": 4.02, "learning_rate": 3.238514957264958e-05, "loss": 0.0598, "step": 3470 }, { "epoch": 4.02, "learning_rate": 3.231837606837607e-05, "loss": 0.0092, "step": 3480 }, { "epoch": 4.02, "learning_rate": 3.225160256410257e-05, "loss": 0.0115, "step": 3490 }, { "epoch": 4.02, "learning_rate": 3.2184829059829064e-05, "loss": 0.0084, "step": 3500 }, { "epoch": 4.02, "learning_rate": 3.211805555555556e-05, "loss": 0.1052, "step": 3510 }, { "epoch": 4.02, "learning_rate": 3.205128205128206e-05, "loss": 0.0335, "step": 3520 }, { "epoch": 4.02, "learning_rate": 3.1984508547008545e-05, "loss": 0.1715, "step": 3530 }, { "epoch": 4.03, "learning_rate": 3.1917735042735045e-05, "loss": 0.0244, "step": 3540 }, { "epoch": 4.03, "learning_rate": 3.185096153846154e-05, "loss": 0.0322, "step": 3550 }, { "epoch": 4.03, "learning_rate": 3.178418803418803e-05, "loss": 0.0308, "step": 3560 }, { "epoch": 4.03, "learning_rate": 3.171741452991453e-05, "loss": 0.0982, "step": 3570 }, { "epoch": 4.03, "learning_rate": 3.1650641025641026e-05, "loss": 0.0419, "step": 3580 }, { "epoch": 4.03, "learning_rate": 3.158386752136752e-05, "loss": 0.0448, "step": 3590 }, { "epoch": 4.03, "learning_rate": 3.151709401709402e-05, "loss": 0.082, "step": 3600 }, { "epoch": 4.03, "learning_rate": 3.145032051282051e-05, "loss": 0.1413, "step": 3610 }, { "epoch": 4.03, "learning_rate": 3.138354700854701e-05, "loss": 0.0035, "step": 3620 }, { "epoch": 4.04, "learning_rate": 3.131677350427351e-05, "loss": 0.0325, "step": 3630 }, { "epoch": 4.04, "learning_rate": 3.125e-05, "loss": 0.1028, "step": 3640 }, { "epoch": 4.04, "learning_rate": 3.1183226495726494e-05, "loss": 0.0111, "step": 3650 }, { "epoch": 4.04, "learning_rate": 3.1116452991452994e-05, "loss": 0.0263, "step": 3660 }, { "epoch": 4.04, "learning_rate": 3.104967948717949e-05, "loss": 0.0666, "step": 3670 }, { "epoch": 4.04, "learning_rate": 3.098290598290599e-05, "loss": 0.0059, "step": 3680 }, { "epoch": 4.04, "learning_rate": 3.091613247863248e-05, "loss": 0.0704, "step": 3690 }, { "epoch": 4.04, "learning_rate": 3.0849358974358975e-05, "loss": 0.1522, "step": 3700 }, { "epoch": 4.05, "learning_rate": 3.0782585470085476e-05, "loss": 0.0377, "step": 3710 }, { "epoch": 4.05, "learning_rate": 3.071581196581197e-05, "loss": 0.1819, "step": 3720 }, { "epoch": 4.05, "learning_rate": 3.064903846153846e-05, "loss": 0.0209, "step": 3730 }, { "epoch": 4.05, "learning_rate": 3.058226495726496e-05, "loss": 0.0196, "step": 3740 }, { "epoch": 4.05, "learning_rate": 3.0515491452991457e-05, "loss": 0.1089, "step": 3750 }, { "epoch": 4.05, "learning_rate": 3.0448717948717947e-05, "loss": 0.3502, "step": 3760 }, { "epoch": 4.05, "learning_rate": 3.0381944444444444e-05, "loss": 0.0047, "step": 3770 }, { "epoch": 4.05, "learning_rate": 3.031517094017094e-05, "loss": 0.0927, "step": 3780 }, { "epoch": 4.06, "learning_rate": 3.0248397435897434e-05, "loss": 0.0115, "step": 3790 }, { "epoch": 4.06, "learning_rate": 3.018162393162393e-05, "loss": 0.0098, "step": 3800 }, { "epoch": 4.06, "learning_rate": 3.0114850427350428e-05, "loss": 0.2128, "step": 3810 }, { "epoch": 4.06, "learning_rate": 3.0048076923076925e-05, "loss": 0.1717, "step": 3820 }, { "epoch": 4.06, "learning_rate": 2.9981303418803418e-05, "loss": 0.0731, "step": 3830 }, { "epoch": 4.06, "learning_rate": 2.9914529914529915e-05, "loss": 0.0716, "step": 3840 }, { "epoch": 4.06, "learning_rate": 2.9847756410256412e-05, "loss": 0.0211, "step": 3850 }, { "epoch": 4.06, "learning_rate": 2.9780982905982906e-05, "loss": 0.0054, "step": 3860 }, { "epoch": 4.06, "learning_rate": 2.9714209401709403e-05, "loss": 0.2639, "step": 3870 }, { "epoch": 4.07, "learning_rate": 2.96474358974359e-05, "loss": 0.0827, "step": 3880 }, { "epoch": 4.07, "learning_rate": 2.9580662393162396e-05, "loss": 0.2148, "step": 3890 }, { "epoch": 4.07, "learning_rate": 2.951388888888889e-05, "loss": 0.007, "step": 3900 }, { "epoch": 4.07, "learning_rate": 2.9447115384615387e-05, "loss": 0.143, "step": 3910 }, { "epoch": 4.07, "learning_rate": 2.9380341880341884e-05, "loss": 0.0107, "step": 3920 }, { "epoch": 4.07, "learning_rate": 2.9313568376068377e-05, "loss": 0.0062, "step": 3930 }, { "epoch": 4.07, "learning_rate": 2.9246794871794874e-05, "loss": 0.1301, "step": 3940 }, { "epoch": 4.07, "learning_rate": 2.918002136752137e-05, "loss": 0.0108, "step": 3950 }, { "epoch": 4.08, "learning_rate": 2.9113247863247868e-05, "loss": 0.0731, "step": 3960 }, { "epoch": 4.08, "learning_rate": 2.904647435897436e-05, "loss": 0.0286, "step": 3970 }, { "epoch": 4.08, "learning_rate": 2.897970085470086e-05, "loss": 0.008, "step": 3980 }, { "epoch": 4.08, "learning_rate": 2.8912927350427355e-05, "loss": 0.0894, "step": 3990 }, { "epoch": 4.08, "learning_rate": 2.8846153846153845e-05, "loss": 0.0484, "step": 4000 }, { "epoch": 4.08, "learning_rate": 2.8779380341880342e-05, "loss": 0.1517, "step": 4010 }, { "epoch": 4.08, "learning_rate": 2.8712606837606836e-05, "loss": 0.0369, "step": 4020 }, { "epoch": 4.08, "learning_rate": 2.8645833333333333e-05, "loss": 0.1208, "step": 4030 }, { "epoch": 4.09, "learning_rate": 2.857905982905983e-05, "loss": 0.1447, "step": 4040 }, { "epoch": 4.09, "learning_rate": 2.8512286324786323e-05, "loss": 0.0959, "step": 4050 }, { "epoch": 4.09, "learning_rate": 2.844551282051282e-05, "loss": 0.021, "step": 4060 }, { "epoch": 4.09, "learning_rate": 2.8378739316239317e-05, "loss": 0.2013, "step": 4070 }, { "epoch": 4.09, "learning_rate": 2.8311965811965814e-05, "loss": 0.0673, "step": 4080 }, { "epoch": 4.09, "learning_rate": 2.8245192307692307e-05, "loss": 0.0502, "step": 4090 }, { "epoch": 4.09, "learning_rate": 2.8178418803418804e-05, "loss": 0.0796, "step": 4100 }, { "epoch": 4.09, "learning_rate": 2.81116452991453e-05, "loss": 0.3334, "step": 4110 }, { "epoch": 4.09, "learning_rate": 2.8044871794871795e-05, "loss": 0.1535, "step": 4120 }, { "epoch": 4.1, "learning_rate": 2.7978098290598292e-05, "loss": 0.1823, "step": 4130 }, { "epoch": 4.1, "learning_rate": 2.791132478632479e-05, "loss": 0.0254, "step": 4140 }, { "epoch": 4.1, "learning_rate": 2.7844551282051286e-05, "loss": 0.125, "step": 4150 }, { "epoch": 4.1, "learning_rate": 2.777777777777778e-05, "loss": 0.1014, "step": 4160 }, { "epoch": 4.1, "eval_accuracy": 0.9592894152479645, "eval_loss": 0.15163935720920563, "eval_runtime": 85.5792, "eval_samples_per_second": 15.787, "eval_steps_per_second": 2.641, "step": 4165 }, { "epoch": 5.0, "learning_rate": 2.7711004273504276e-05, "loss": 0.2326, "step": 4170 }, { "epoch": 5.0, "learning_rate": 2.7644230769230773e-05, "loss": 0.07, "step": 4180 }, { "epoch": 5.0, "learning_rate": 2.757745726495727e-05, "loss": 0.0832, "step": 4190 }, { "epoch": 5.0, "learning_rate": 2.7510683760683763e-05, "loss": 0.1114, "step": 4200 }, { "epoch": 5.01, "learning_rate": 2.744391025641026e-05, "loss": 0.0198, "step": 4210 }, { "epoch": 5.01, "learning_rate": 2.7377136752136757e-05, "loss": 0.0039, "step": 4220 }, { "epoch": 5.01, "learning_rate": 2.7310363247863247e-05, "loss": 0.2082, "step": 4230 }, { "epoch": 5.01, "learning_rate": 2.724358974358974e-05, "loss": 0.1341, "step": 4240 }, { "epoch": 5.01, "learning_rate": 2.7176816239316238e-05, "loss": 0.1808, "step": 4250 }, { "epoch": 5.01, "learning_rate": 2.7110042735042735e-05, "loss": 0.1818, "step": 4260 }, { "epoch": 5.01, "learning_rate": 2.704326923076923e-05, "loss": 0.019, "step": 4270 }, { "epoch": 5.01, "learning_rate": 2.6976495726495725e-05, "loss": 0.005, "step": 4280 }, { "epoch": 5.02, "learning_rate": 2.6909722222222222e-05, "loss": 0.0053, "step": 4290 }, { "epoch": 5.02, "learning_rate": 2.684294871794872e-05, "loss": 0.0751, "step": 4300 }, { "epoch": 5.02, "learning_rate": 2.6776175213675216e-05, "loss": 0.2481, "step": 4310 }, { "epoch": 5.02, "learning_rate": 2.670940170940171e-05, "loss": 0.0346, "step": 4320 }, { "epoch": 5.02, "learning_rate": 2.6642628205128206e-05, "loss": 0.1085, "step": 4330 }, { "epoch": 5.02, "learning_rate": 2.6575854700854703e-05, "loss": 0.2827, "step": 4340 }, { "epoch": 5.02, "learning_rate": 2.6509081196581197e-05, "loss": 0.2162, "step": 4350 }, { "epoch": 5.02, "learning_rate": 2.6442307692307694e-05, "loss": 0.1607, "step": 4360 }, { "epoch": 5.02, "learning_rate": 2.637553418803419e-05, "loss": 0.0169, "step": 4370 }, { "epoch": 5.03, "learning_rate": 2.6308760683760687e-05, "loss": 0.0122, "step": 4380 }, { "epoch": 5.03, "learning_rate": 2.624198717948718e-05, "loss": 0.1715, "step": 4390 }, { "epoch": 5.03, "learning_rate": 2.6175213675213678e-05, "loss": 0.0114, "step": 4400 }, { "epoch": 5.03, "learning_rate": 2.6108440170940175e-05, "loss": 0.0156, "step": 4410 }, { "epoch": 5.03, "learning_rate": 2.604166666666667e-05, "loss": 0.0948, "step": 4420 }, { "epoch": 5.03, "learning_rate": 2.5974893162393165e-05, "loss": 0.1142, "step": 4430 }, { "epoch": 5.03, "learning_rate": 2.5908119658119662e-05, "loss": 0.0091, "step": 4440 }, { "epoch": 5.03, "learning_rate": 2.584134615384616e-05, "loss": 0.0021, "step": 4450 }, { "epoch": 5.04, "learning_rate": 2.577457264957265e-05, "loss": 0.0513, "step": 4460 }, { "epoch": 5.04, "learning_rate": 2.5707799145299143e-05, "loss": 0.002, "step": 4470 }, { "epoch": 5.04, "learning_rate": 2.564102564102564e-05, "loss": 0.0016, "step": 4480 }, { "epoch": 5.04, "learning_rate": 2.5574252136752137e-05, "loss": 0.062, "step": 4490 }, { "epoch": 5.04, "learning_rate": 2.5507478632478633e-05, "loss": 0.0168, "step": 4500 }, { "epoch": 5.04, "learning_rate": 2.5440705128205127e-05, "loss": 0.1189, "step": 4510 }, { "epoch": 5.04, "learning_rate": 2.5373931623931624e-05, "loss": 0.0289, "step": 4520 }, { "epoch": 5.04, "learning_rate": 2.530715811965812e-05, "loss": 0.0263, "step": 4530 }, { "epoch": 5.05, "learning_rate": 2.5240384615384614e-05, "loss": 0.022, "step": 4540 }, { "epoch": 5.05, "learning_rate": 2.517361111111111e-05, "loss": 0.1551, "step": 4550 }, { "epoch": 5.05, "learning_rate": 2.5106837606837608e-05, "loss": 0.1103, "step": 4560 }, { "epoch": 5.05, "learning_rate": 2.5040064102564105e-05, "loss": 0.1434, "step": 4570 }, { "epoch": 5.05, "learning_rate": 2.49732905982906e-05, "loss": 0.2635, "step": 4580 }, { "epoch": 5.05, "learning_rate": 2.4906517094017096e-05, "loss": 0.0956, "step": 4590 }, { "epoch": 5.05, "learning_rate": 2.4839743589743592e-05, "loss": 0.0518, "step": 4600 }, { "epoch": 5.05, "learning_rate": 2.4772970085470086e-05, "loss": 0.1728, "step": 4610 }, { "epoch": 5.05, "learning_rate": 2.4706196581196583e-05, "loss": 0.0058, "step": 4620 }, { "epoch": 5.06, "learning_rate": 2.463942307692308e-05, "loss": 0.1515, "step": 4630 }, { "epoch": 5.06, "learning_rate": 2.4572649572649573e-05, "loss": 0.0366, "step": 4640 }, { "epoch": 5.06, "learning_rate": 2.450587606837607e-05, "loss": 0.003, "step": 4650 }, { "epoch": 5.06, "learning_rate": 2.4439102564102564e-05, "loss": 0.0561, "step": 4660 }, { "epoch": 5.06, "learning_rate": 2.437232905982906e-05, "loss": 0.0039, "step": 4670 }, { "epoch": 5.06, "learning_rate": 2.4305555555555558e-05, "loss": 0.0052, "step": 4680 }, { "epoch": 5.06, "learning_rate": 2.423878205128205e-05, "loss": 0.0821, "step": 4690 }, { "epoch": 5.06, "learning_rate": 2.4172008547008548e-05, "loss": 0.0126, "step": 4700 }, { "epoch": 5.07, "learning_rate": 2.4105235042735045e-05, "loss": 0.0093, "step": 4710 }, { "epoch": 5.07, "learning_rate": 2.4038461538461542e-05, "loss": 0.2059, "step": 4720 }, { "epoch": 5.07, "learning_rate": 2.3971688034188035e-05, "loss": 0.0528, "step": 4730 }, { "epoch": 5.07, "learning_rate": 2.3904914529914532e-05, "loss": 0.0022, "step": 4740 }, { "epoch": 5.07, "learning_rate": 2.383814102564103e-05, "loss": 0.0969, "step": 4750 }, { "epoch": 5.07, "learning_rate": 2.3771367521367523e-05, "loss": 0.0024, "step": 4760 }, { "epoch": 5.07, "learning_rate": 2.3704594017094016e-05, "loss": 0.0674, "step": 4770 }, { "epoch": 5.07, "learning_rate": 2.3637820512820513e-05, "loss": 0.007, "step": 4780 }, { "epoch": 5.08, "learning_rate": 2.357104700854701e-05, "loss": 0.0121, "step": 4790 }, { "epoch": 5.08, "learning_rate": 2.3504273504273504e-05, "loss": 0.1585, "step": 4800 }, { "epoch": 5.08, "learning_rate": 2.34375e-05, "loss": 0.1726, "step": 4810 }, { "epoch": 5.08, "learning_rate": 2.3370726495726497e-05, "loss": 0.0053, "step": 4820 }, { "epoch": 5.08, "learning_rate": 2.3303952991452994e-05, "loss": 0.0597, "step": 4830 }, { "epoch": 5.08, "learning_rate": 2.3237179487179488e-05, "loss": 0.0105, "step": 4840 }, { "epoch": 5.08, "learning_rate": 2.3170405982905985e-05, "loss": 0.002, "step": 4850 }, { "epoch": 5.08, "learning_rate": 2.310363247863248e-05, "loss": 0.0019, "step": 4860 }, { "epoch": 5.08, "learning_rate": 2.3036858974358975e-05, "loss": 0.2447, "step": 4870 }, { "epoch": 5.09, "learning_rate": 2.297008547008547e-05, "loss": 0.0584, "step": 4880 }, { "epoch": 5.09, "learning_rate": 2.2903311965811966e-05, "loss": 0.0955, "step": 4890 }, { "epoch": 5.09, "learning_rate": 2.2836538461538463e-05, "loss": 0.0293, "step": 4900 }, { "epoch": 5.09, "learning_rate": 2.276976495726496e-05, "loss": 0.0053, "step": 4910 }, { "epoch": 5.09, "learning_rate": 2.2702991452991453e-05, "loss": 0.0666, "step": 4920 }, { "epoch": 5.09, "learning_rate": 2.263621794871795e-05, "loss": 0.0213, "step": 4930 }, { "epoch": 5.09, "learning_rate": 2.2569444444444447e-05, "loss": 0.0668, "step": 4940 }, { "epoch": 5.09, "learning_rate": 2.250267094017094e-05, "loss": 0.0146, "step": 4950 }, { "epoch": 5.1, "learning_rate": 2.2435897435897437e-05, "loss": 0.0046, "step": 4960 }, { "epoch": 5.1, "learning_rate": 2.2369123931623934e-05, "loss": 0.0884, "step": 4970 }, { "epoch": 5.1, "learning_rate": 2.230235042735043e-05, "loss": 0.0072, "step": 4980 }, { "epoch": 5.1, "learning_rate": 2.223557692307692e-05, "loss": 0.0084, "step": 4990 }, { "epoch": 5.1, "eval_accuracy": 0.9681717246484086, "eval_loss": 0.147811621427536, "eval_runtime": 85.8137, "eval_samples_per_second": 15.743, "eval_steps_per_second": 2.634, "step": 4998 }, { "epoch": 6.0, "learning_rate": 2.2168803418803418e-05, "loss": 0.1845, "step": 5000 }, { "epoch": 6.0, "learning_rate": 2.2102029914529915e-05, "loss": 0.0636, "step": 5010 }, { "epoch": 6.0, "learning_rate": 2.2035256410256412e-05, "loss": 0.0051, "step": 5020 }, { "epoch": 6.0, "learning_rate": 2.1968482905982905e-05, "loss": 0.0027, "step": 5030 }, { "epoch": 6.01, "learning_rate": 2.1901709401709402e-05, "loss": 0.0974, "step": 5040 }, { "epoch": 6.01, "learning_rate": 2.18349358974359e-05, "loss": 0.0281, "step": 5050 }, { "epoch": 6.01, "learning_rate": 2.1768162393162396e-05, "loss": 0.1491, "step": 5060 }, { "epoch": 6.01, "learning_rate": 2.170138888888889e-05, "loss": 0.0247, "step": 5070 }, { "epoch": 6.01, "learning_rate": 2.1634615384615387e-05, "loss": 0.0898, "step": 5080 }, { "epoch": 6.01, "learning_rate": 2.1567841880341884e-05, "loss": 0.0053, "step": 5090 }, { "epoch": 6.01, "learning_rate": 2.1501068376068377e-05, "loss": 0.0295, "step": 5100 }, { "epoch": 6.01, "learning_rate": 2.143429487179487e-05, "loss": 0.0022, "step": 5110 }, { "epoch": 6.01, "learning_rate": 2.1367521367521368e-05, "loss": 0.0016, "step": 5120 }, { "epoch": 6.02, "learning_rate": 2.1300747863247864e-05, "loss": 0.0012, "step": 5130 }, { "epoch": 6.02, "learning_rate": 2.1233974358974358e-05, "loss": 0.003, "step": 5140 }, { "epoch": 6.02, "learning_rate": 2.1167200854700855e-05, "loss": 0.0022, "step": 5150 }, { "epoch": 6.02, "learning_rate": 2.1100427350427352e-05, "loss": 0.0743, "step": 5160 }, { "epoch": 6.02, "learning_rate": 2.103365384615385e-05, "loss": 0.0254, "step": 5170 }, { "epoch": 6.02, "learning_rate": 2.0966880341880342e-05, "loss": 0.1902, "step": 5180 }, { "epoch": 6.02, "learning_rate": 2.090010683760684e-05, "loss": 0.0027, "step": 5190 }, { "epoch": 6.02, "learning_rate": 2.0833333333333336e-05, "loss": 0.0041, "step": 5200 }, { "epoch": 6.03, "learning_rate": 2.076655982905983e-05, "loss": 0.1141, "step": 5210 }, { "epoch": 6.03, "learning_rate": 2.0699786324786323e-05, "loss": 0.0124, "step": 5220 }, { "epoch": 6.03, "learning_rate": 2.063301282051282e-05, "loss": 0.0542, "step": 5230 }, { "epoch": 6.03, "learning_rate": 2.0566239316239317e-05, "loss": 0.0032, "step": 5240 }, { "epoch": 6.03, "learning_rate": 2.0499465811965814e-05, "loss": 0.1405, "step": 5250 }, { "epoch": 6.03, "learning_rate": 2.0432692307692307e-05, "loss": 0.1003, "step": 5260 }, { "epoch": 6.03, "learning_rate": 2.0365918803418804e-05, "loss": 0.0025, "step": 5270 }, { "epoch": 6.03, "learning_rate": 2.02991452991453e-05, "loss": 0.2127, "step": 5280 }, { "epoch": 6.04, "learning_rate": 2.0232371794871795e-05, "loss": 0.0047, "step": 5290 }, { "epoch": 6.04, "learning_rate": 2.016559829059829e-05, "loss": 0.0746, "step": 5300 }, { "epoch": 6.04, "learning_rate": 2.009882478632479e-05, "loss": 0.0381, "step": 5310 }, { "epoch": 6.04, "learning_rate": 2.0032051282051285e-05, "loss": 0.0017, "step": 5320 }, { "epoch": 6.04, "learning_rate": 1.996527777777778e-05, "loss": 0.0261, "step": 5330 }, { "epoch": 6.04, "learning_rate": 1.9898504273504272e-05, "loss": 0.0052, "step": 5340 }, { "epoch": 6.04, "learning_rate": 1.983173076923077e-05, "loss": 0.0011, "step": 5350 }, { "epoch": 6.04, "learning_rate": 1.9764957264957266e-05, "loss": 0.1598, "step": 5360 }, { "epoch": 6.04, "learning_rate": 1.969818376068376e-05, "loss": 0.0089, "step": 5370 }, { "epoch": 6.05, "learning_rate": 1.9631410256410257e-05, "loss": 0.0011, "step": 5380 }, { "epoch": 6.05, "learning_rate": 1.9564636752136754e-05, "loss": 0.0036, "step": 5390 }, { "epoch": 6.05, "learning_rate": 1.949786324786325e-05, "loss": 0.0014, "step": 5400 }, { "epoch": 6.05, "learning_rate": 1.9431089743589744e-05, "loss": 0.0031, "step": 5410 }, { "epoch": 6.05, "learning_rate": 1.936431623931624e-05, "loss": 0.1976, "step": 5420 }, { "epoch": 6.05, "learning_rate": 1.9297542735042738e-05, "loss": 0.0014, "step": 5430 }, { "epoch": 6.05, "learning_rate": 1.923076923076923e-05, "loss": 0.0037, "step": 5440 }, { "epoch": 6.05, "learning_rate": 1.916399572649573e-05, "loss": 0.0055, "step": 5450 }, { "epoch": 6.06, "learning_rate": 1.9097222222222222e-05, "loss": 0.1296, "step": 5460 }, { "epoch": 6.06, "learning_rate": 1.903044871794872e-05, "loss": 0.003, "step": 5470 }, { "epoch": 6.06, "learning_rate": 1.8963675213675212e-05, "loss": 0.0843, "step": 5480 }, { "epoch": 6.06, "learning_rate": 1.889690170940171e-05, "loss": 0.2126, "step": 5490 }, { "epoch": 6.06, "learning_rate": 1.8830128205128206e-05, "loss": 0.0305, "step": 5500 }, { "epoch": 6.06, "learning_rate": 1.8763354700854703e-05, "loss": 0.0941, "step": 5510 }, { "epoch": 6.06, "learning_rate": 1.8696581196581197e-05, "loss": 0.0868, "step": 5520 }, { "epoch": 6.06, "learning_rate": 1.8629807692307693e-05, "loss": 0.0875, "step": 5530 }, { "epoch": 6.07, "learning_rate": 1.856303418803419e-05, "loss": 0.0053, "step": 5540 }, { "epoch": 6.07, "learning_rate": 1.8496260683760684e-05, "loss": 0.0012, "step": 5550 }, { "epoch": 6.07, "learning_rate": 1.842948717948718e-05, "loss": 0.0017, "step": 5560 }, { "epoch": 6.07, "learning_rate": 1.8362713675213678e-05, "loss": 0.0043, "step": 5570 }, { "epoch": 6.07, "learning_rate": 1.829594017094017e-05, "loss": 0.1032, "step": 5580 }, { "epoch": 6.07, "learning_rate": 1.8229166666666668e-05, "loss": 0.0109, "step": 5590 }, { "epoch": 6.07, "learning_rate": 1.8162393162393162e-05, "loss": 0.0018, "step": 5600 }, { "epoch": 6.07, "learning_rate": 1.809561965811966e-05, "loss": 0.0017, "step": 5610 }, { "epoch": 6.07, "learning_rate": 1.8028846153846156e-05, "loss": 0.0011, "step": 5620 }, { "epoch": 6.08, "learning_rate": 1.796207264957265e-05, "loss": 0.1335, "step": 5630 }, { "epoch": 6.08, "learning_rate": 1.7895299145299146e-05, "loss": 0.0011, "step": 5640 }, { "epoch": 6.08, "learning_rate": 1.7828525641025643e-05, "loss": 0.0019, "step": 5650 }, { "epoch": 6.08, "learning_rate": 1.776175213675214e-05, "loss": 0.0012, "step": 5660 }, { "epoch": 6.08, "learning_rate": 1.7694978632478633e-05, "loss": 0.0063, "step": 5670 }, { "epoch": 6.08, "learning_rate": 1.762820512820513e-05, "loss": 0.0036, "step": 5680 }, { "epoch": 6.08, "learning_rate": 1.7561431623931624e-05, "loss": 0.0293, "step": 5690 }, { "epoch": 6.08, "learning_rate": 1.749465811965812e-05, "loss": 0.1403, "step": 5700 }, { "epoch": 6.09, "learning_rate": 1.7427884615384614e-05, "loss": 0.0012, "step": 5710 }, { "epoch": 6.09, "learning_rate": 1.736111111111111e-05, "loss": 0.1275, "step": 5720 }, { "epoch": 6.09, "learning_rate": 1.7294337606837608e-05, "loss": 0.0219, "step": 5730 }, { "epoch": 6.09, "learning_rate": 1.7227564102564105e-05, "loss": 0.0883, "step": 5740 }, { "epoch": 6.09, "learning_rate": 1.71607905982906e-05, "loss": 0.003, "step": 5750 }, { "epoch": 6.09, "learning_rate": 1.7094017094017095e-05, "loss": 0.1378, "step": 5760 }, { "epoch": 6.09, "learning_rate": 1.7027243589743592e-05, "loss": 0.344, "step": 5770 }, { "epoch": 6.09, "learning_rate": 1.6960470085470086e-05, "loss": 0.0294, "step": 5780 }, { "epoch": 6.1, "learning_rate": 1.6893696581196583e-05, "loss": 0.0099, "step": 5790 }, { "epoch": 6.1, "learning_rate": 1.682692307692308e-05, "loss": 0.3121, "step": 5800 }, { "epoch": 6.1, "learning_rate": 1.6760149572649573e-05, "loss": 0.0649, "step": 5810 }, { "epoch": 6.1, "learning_rate": 1.6693376068376067e-05, "loss": 0.1054, "step": 5820 }, { "epoch": 6.1, "learning_rate": 1.6626602564102564e-05, "loss": 0.0041, "step": 5830 }, { "epoch": 6.1, "eval_accuracy": 0.9652109548482606, "eval_loss": 0.12842880189418793, "eval_runtime": 86.1673, "eval_samples_per_second": 15.679, "eval_steps_per_second": 2.623, "step": 5831 }, { "epoch": 7.0, "learning_rate": 1.655982905982906e-05, "loss": 0.0289, "step": 5840 }, { "epoch": 7.0, "learning_rate": 1.6493055555555557e-05, "loss": 0.0019, "step": 5850 }, { "epoch": 7.0, "learning_rate": 1.642628205128205e-05, "loss": 0.013, "step": 5860 }, { "epoch": 7.0, "learning_rate": 1.6359508547008548e-05, "loss": 0.0072, "step": 5870 }, { "epoch": 7.01, "learning_rate": 1.6292735042735045e-05, "loss": 0.0014, "step": 5880 }, { "epoch": 7.01, "learning_rate": 1.6225961538461538e-05, "loss": 0.0454, "step": 5890 }, { "epoch": 7.01, "learning_rate": 1.6159188034188035e-05, "loss": 0.007, "step": 5900 }, { "epoch": 7.01, "learning_rate": 1.6092414529914532e-05, "loss": 0.087, "step": 5910 }, { "epoch": 7.01, "learning_rate": 1.602564102564103e-05, "loss": 0.0044, "step": 5920 }, { "epoch": 7.01, "learning_rate": 1.5958867521367523e-05, "loss": 0.0058, "step": 5930 }, { "epoch": 7.01, "learning_rate": 1.5892094017094016e-05, "loss": 0.0405, "step": 5940 }, { "epoch": 7.01, "learning_rate": 1.5825320512820513e-05, "loss": 0.0724, "step": 5950 }, { "epoch": 7.02, "learning_rate": 1.575854700854701e-05, "loss": 0.0014, "step": 5960 }, { "epoch": 7.02, "learning_rate": 1.5691773504273503e-05, "loss": 0.0546, "step": 5970 }, { "epoch": 7.02, "learning_rate": 1.5625e-05, "loss": 0.0016, "step": 5980 }, { "epoch": 7.02, "learning_rate": 1.5558226495726497e-05, "loss": 0.0296, "step": 5990 }, { "epoch": 7.02, "learning_rate": 1.5491452991452994e-05, "loss": 0.0905, "step": 6000 }, { "epoch": 7.02, "learning_rate": 1.5424679487179488e-05, "loss": 0.0015, "step": 6010 }, { "epoch": 7.02, "learning_rate": 1.5357905982905985e-05, "loss": 0.057, "step": 6020 }, { "epoch": 7.02, "learning_rate": 1.529113247863248e-05, "loss": 0.1189, "step": 6030 }, { "epoch": 7.03, "learning_rate": 1.5224358974358973e-05, "loss": 0.0133, "step": 6040 }, { "epoch": 7.03, "learning_rate": 1.515758547008547e-05, "loss": 0.001, "step": 6050 }, { "epoch": 7.03, "learning_rate": 1.5090811965811965e-05, "loss": 0.0947, "step": 6060 }, { "epoch": 7.03, "learning_rate": 1.5024038461538462e-05, "loss": 0.0056, "step": 6070 }, { "epoch": 7.03, "learning_rate": 1.4957264957264958e-05, "loss": 0.0015, "step": 6080 }, { "epoch": 7.03, "learning_rate": 1.4890491452991453e-05, "loss": 0.11, "step": 6090 }, { "epoch": 7.03, "learning_rate": 1.482371794871795e-05, "loss": 0.1273, "step": 6100 }, { "epoch": 7.03, "learning_rate": 1.4756944444444445e-05, "loss": 0.1319, "step": 6110 }, { "epoch": 7.03, "learning_rate": 1.4690170940170942e-05, "loss": 0.0055, "step": 6120 }, { "epoch": 7.04, "learning_rate": 1.4623397435897437e-05, "loss": 0.001, "step": 6130 }, { "epoch": 7.04, "learning_rate": 1.4556623931623934e-05, "loss": 0.002, "step": 6140 }, { "epoch": 7.04, "learning_rate": 1.448985042735043e-05, "loss": 0.0008, "step": 6150 }, { "epoch": 7.04, "learning_rate": 1.4423076923076923e-05, "loss": 0.0841, "step": 6160 }, { "epoch": 7.04, "learning_rate": 1.4356303418803418e-05, "loss": 0.001, "step": 6170 }, { "epoch": 7.04, "learning_rate": 1.4289529914529915e-05, "loss": 0.0016, "step": 6180 }, { "epoch": 7.04, "learning_rate": 1.422275641025641e-05, "loss": 0.0305, "step": 6190 }, { "epoch": 7.04, "learning_rate": 1.4155982905982907e-05, "loss": 0.0007, "step": 6200 }, { "epoch": 7.05, "learning_rate": 1.4089209401709402e-05, "loss": 0.0857, "step": 6210 }, { "epoch": 7.05, "learning_rate": 1.4022435897435897e-05, "loss": 0.004, "step": 6220 }, { "epoch": 7.05, "learning_rate": 1.3955662393162394e-05, "loss": 0.001, "step": 6230 }, { "epoch": 7.05, "learning_rate": 1.388888888888889e-05, "loss": 0.0008, "step": 6240 }, { "epoch": 7.05, "learning_rate": 1.3822115384615386e-05, "loss": 0.0025, "step": 6250 }, { "epoch": 7.05, "learning_rate": 1.3755341880341882e-05, "loss": 0.003, "step": 6260 }, { "epoch": 7.05, "learning_rate": 1.3688568376068379e-05, "loss": 0.0075, "step": 6270 }, { "epoch": 7.05, "learning_rate": 1.362179487179487e-05, "loss": 0.0183, "step": 6280 }, { "epoch": 7.06, "learning_rate": 1.3555021367521367e-05, "loss": 0.066, "step": 6290 }, { "epoch": 7.06, "learning_rate": 1.3488247863247863e-05, "loss": 0.0021, "step": 6300 }, { "epoch": 7.06, "learning_rate": 1.342147435897436e-05, "loss": 0.0009, "step": 6310 }, { "epoch": 7.06, "learning_rate": 1.3354700854700855e-05, "loss": 0.0016, "step": 6320 }, { "epoch": 7.06, "learning_rate": 1.3287927350427352e-05, "loss": 0.0017, "step": 6330 }, { "epoch": 7.06, "learning_rate": 1.3221153846153847e-05, "loss": 0.0045, "step": 6340 }, { "epoch": 7.06, "learning_rate": 1.3154380341880344e-05, "loss": 0.0012, "step": 6350 }, { "epoch": 7.06, "learning_rate": 1.3087606837606839e-05, "loss": 0.0057, "step": 6360 }, { "epoch": 7.06, "learning_rate": 1.3020833333333334e-05, "loss": 0.0008, "step": 6370 }, { "epoch": 7.07, "learning_rate": 1.2954059829059831e-05, "loss": 0.0183, "step": 6380 }, { "epoch": 7.07, "learning_rate": 1.2887286324786325e-05, "loss": 0.0011, "step": 6390 }, { "epoch": 7.07, "learning_rate": 1.282051282051282e-05, "loss": 0.0027, "step": 6400 }, { "epoch": 7.07, "learning_rate": 1.2753739316239317e-05, "loss": 0.0384, "step": 6410 }, { "epoch": 7.07, "learning_rate": 1.2686965811965812e-05, "loss": 0.0027, "step": 6420 }, { "epoch": 7.07, "learning_rate": 1.2620192307692307e-05, "loss": 0.001, "step": 6430 }, { "epoch": 7.07, "learning_rate": 1.2553418803418804e-05, "loss": 0.0296, "step": 6440 }, { "epoch": 7.07, "learning_rate": 1.24866452991453e-05, "loss": 0.0008, "step": 6450 }, { "epoch": 7.08, "learning_rate": 1.2419871794871796e-05, "loss": 0.1594, "step": 6460 }, { "epoch": 7.08, "learning_rate": 1.2353098290598291e-05, "loss": 0.0579, "step": 6470 }, { "epoch": 7.08, "learning_rate": 1.2286324786324787e-05, "loss": 0.001, "step": 6480 }, { "epoch": 7.08, "learning_rate": 1.2219551282051282e-05, "loss": 0.0251, "step": 6490 }, { "epoch": 7.08, "learning_rate": 1.2152777777777779e-05, "loss": 0.1325, "step": 6500 }, { "epoch": 7.08, "learning_rate": 1.2086004273504274e-05, "loss": 0.0598, "step": 6510 }, { "epoch": 7.08, "learning_rate": 1.2019230769230771e-05, "loss": 0.0009, "step": 6520 }, { "epoch": 7.08, "learning_rate": 1.1952457264957266e-05, "loss": 0.0392, "step": 6530 }, { "epoch": 7.09, "learning_rate": 1.1885683760683761e-05, "loss": 0.0009, "step": 6540 }, { "epoch": 7.09, "learning_rate": 1.1818910256410257e-05, "loss": 0.1047, "step": 6550 }, { "epoch": 7.09, "learning_rate": 1.1752136752136752e-05, "loss": 0.0196, "step": 6560 }, { "epoch": 7.09, "learning_rate": 1.1685363247863249e-05, "loss": 0.0022, "step": 6570 }, { "epoch": 7.09, "learning_rate": 1.1618589743589744e-05, "loss": 0.0411, "step": 6580 }, { "epoch": 7.09, "learning_rate": 1.155181623931624e-05, "loss": 0.0023, "step": 6590 }, { "epoch": 7.09, "learning_rate": 1.1485042735042734e-05, "loss": 0.0009, "step": 6600 }, { "epoch": 7.09, "learning_rate": 1.1418269230769231e-05, "loss": 0.0007, "step": 6610 }, { "epoch": 7.09, "learning_rate": 1.1351495726495726e-05, "loss": 0.0008, "step": 6620 }, { "epoch": 7.1, "learning_rate": 1.1284722222222223e-05, "loss": 0.0008, "step": 6630 }, { "epoch": 7.1, "learning_rate": 1.1217948717948719e-05, "loss": 0.0012, "step": 6640 }, { "epoch": 7.1, "learning_rate": 1.1151175213675216e-05, "loss": 0.1342, "step": 6650 }, { "epoch": 7.1, "learning_rate": 1.1084401709401709e-05, "loss": 0.006, "step": 6660 }, { "epoch": 7.1, "eval_accuracy": 0.9726128793486306, "eval_loss": 0.12144625186920166, "eval_runtime": 86.0938, "eval_samples_per_second": 15.692, "eval_steps_per_second": 2.625, "step": 6664 }, { "epoch": 8.0, "learning_rate": 1.1017628205128206e-05, "loss": 0.002, "step": 6670 }, { "epoch": 8.0, "learning_rate": 1.0950854700854701e-05, "loss": 0.0011, "step": 6680 }, { "epoch": 8.0, "learning_rate": 1.0884081196581198e-05, "loss": 0.0076, "step": 6690 }, { "epoch": 8.0, "learning_rate": 1.0817307692307693e-05, "loss": 0.0339, "step": 6700 }, { "epoch": 8.01, "learning_rate": 1.0750534188034189e-05, "loss": 0.0542, "step": 6710 }, { "epoch": 8.01, "learning_rate": 1.0683760683760684e-05, "loss": 0.0536, "step": 6720 }, { "epoch": 8.01, "learning_rate": 1.0616987179487179e-05, "loss": 0.0007, "step": 6730 }, { "epoch": 8.01, "learning_rate": 1.0550213675213676e-05, "loss": 0.001, "step": 6740 }, { "epoch": 8.01, "learning_rate": 1.0483440170940171e-05, "loss": 0.0227, "step": 6750 }, { "epoch": 8.01, "learning_rate": 1.0416666666666668e-05, "loss": 0.0667, "step": 6760 }, { "epoch": 8.01, "learning_rate": 1.0349893162393162e-05, "loss": 0.0143, "step": 6770 }, { "epoch": 8.01, "learning_rate": 1.0283119658119658e-05, "loss": 0.0007, "step": 6780 }, { "epoch": 8.02, "learning_rate": 1.0216346153846154e-05, "loss": 0.0007, "step": 6790 }, { "epoch": 8.02, "learning_rate": 1.014957264957265e-05, "loss": 0.0008, "step": 6800 }, { "epoch": 8.02, "learning_rate": 1.0082799145299146e-05, "loss": 0.0013, "step": 6810 }, { "epoch": 8.02, "learning_rate": 1.0016025641025643e-05, "loss": 0.1765, "step": 6820 }, { "epoch": 8.02, "learning_rate": 9.949252136752136e-06, "loss": 0.0007, "step": 6830 }, { "epoch": 8.02, "learning_rate": 9.882478632478633e-06, "loss": 0.0031, "step": 6840 }, { "epoch": 8.02, "learning_rate": 9.815705128205128e-06, "loss": 0.0006, "step": 6850 }, { "epoch": 8.02, "learning_rate": 9.748931623931625e-06, "loss": 0.0021, "step": 6860 }, { "epoch": 8.02, "learning_rate": 9.68215811965812e-06, "loss": 0.0503, "step": 6870 }, { "epoch": 8.03, "learning_rate": 9.615384615384616e-06, "loss": 0.0007, "step": 6880 }, { "epoch": 8.03, "learning_rate": 9.548611111111111e-06, "loss": 0.0009, "step": 6890 }, { "epoch": 8.03, "learning_rate": 9.481837606837606e-06, "loss": 0.1196, "step": 6900 }, { "epoch": 8.03, "learning_rate": 9.415064102564103e-06, "loss": 0.0008, "step": 6910 }, { "epoch": 8.03, "learning_rate": 9.348290598290598e-06, "loss": 0.0041, "step": 6920 }, { "epoch": 8.03, "learning_rate": 9.281517094017095e-06, "loss": 0.0012, "step": 6930 }, { "epoch": 8.03, "learning_rate": 9.21474358974359e-06, "loss": 0.0011, "step": 6940 }, { "epoch": 8.03, "learning_rate": 9.147970085470086e-06, "loss": 0.1124, "step": 6950 }, { "epoch": 8.04, "learning_rate": 9.081196581196581e-06, "loss": 0.1116, "step": 6960 }, { "epoch": 8.04, "learning_rate": 9.014423076923078e-06, "loss": 0.0171, "step": 6970 }, { "epoch": 8.04, "learning_rate": 8.947649572649573e-06, "loss": 0.002, "step": 6980 }, { "epoch": 8.04, "learning_rate": 8.88087606837607e-06, "loss": 0.0688, "step": 6990 }, { "epoch": 8.04, "learning_rate": 8.814102564102565e-06, "loss": 0.0007, "step": 7000 }, { "epoch": 8.04, "learning_rate": 8.74732905982906e-06, "loss": 0.0006, "step": 7010 }, { "epoch": 8.04, "learning_rate": 8.680555555555556e-06, "loss": 0.1653, "step": 7020 }, { "epoch": 8.04, "learning_rate": 8.613782051282052e-06, "loss": 0.0007, "step": 7030 }, { "epoch": 8.05, "learning_rate": 8.547008547008548e-06, "loss": 0.0101, "step": 7040 }, { "epoch": 8.05, "learning_rate": 8.480235042735043e-06, "loss": 0.0005, "step": 7050 }, { "epoch": 8.05, "learning_rate": 8.41346153846154e-06, "loss": 0.001, "step": 7060 }, { "epoch": 8.05, "learning_rate": 8.346688034188033e-06, "loss": 0.0007, "step": 7070 }, { "epoch": 8.05, "learning_rate": 8.27991452991453e-06, "loss": 0.0314, "step": 7080 }, { "epoch": 8.05, "learning_rate": 8.213141025641025e-06, "loss": 0.0194, "step": 7090 }, { "epoch": 8.05, "learning_rate": 8.146367521367522e-06, "loss": 0.0008, "step": 7100 }, { "epoch": 8.05, "learning_rate": 8.079594017094018e-06, "loss": 0.0185, "step": 7110 }, { "epoch": 8.05, "learning_rate": 8.012820512820515e-06, "loss": 0.0006, "step": 7120 }, { "epoch": 8.06, "learning_rate": 7.946047008547008e-06, "loss": 0.0777, "step": 7130 }, { "epoch": 8.06, "learning_rate": 7.879273504273505e-06, "loss": 0.0006, "step": 7140 }, { "epoch": 8.06, "learning_rate": 7.8125e-06, "loss": 0.0008, "step": 7150 }, { "epoch": 8.06, "learning_rate": 7.745726495726497e-06, "loss": 0.0284, "step": 7160 }, { "epoch": 8.06, "learning_rate": 7.678952991452992e-06, "loss": 0.0006, "step": 7170 }, { "epoch": 8.06, "learning_rate": 7.612179487179487e-06, "loss": 0.0011, "step": 7180 }, { "epoch": 8.06, "learning_rate": 7.545405982905983e-06, "loss": 0.0007, "step": 7190 }, { "epoch": 8.06, "learning_rate": 7.478632478632479e-06, "loss": 0.0008, "step": 7200 }, { "epoch": 8.07, "learning_rate": 7.411858974358975e-06, "loss": 0.0011, "step": 7210 }, { "epoch": 8.07, "learning_rate": 7.345085470085471e-06, "loss": 0.0011, "step": 7220 }, { "epoch": 8.07, "learning_rate": 7.278311965811967e-06, "loss": 0.0012, "step": 7230 }, { "epoch": 8.07, "learning_rate": 7.211538461538461e-06, "loss": 0.001, "step": 7240 }, { "epoch": 8.07, "learning_rate": 7.1447649572649574e-06, "loss": 0.0006, "step": 7250 }, { "epoch": 8.07, "learning_rate": 7.0779914529914535e-06, "loss": 0.0026, "step": 7260 }, { "epoch": 8.07, "learning_rate": 7.011217948717949e-06, "loss": 0.001, "step": 7270 }, { "epoch": 8.07, "learning_rate": 6.944444444444445e-06, "loss": 0.0005, "step": 7280 }, { "epoch": 8.08, "learning_rate": 6.877670940170941e-06, "loss": 0.0011, "step": 7290 }, { "epoch": 8.08, "learning_rate": 6.810897435897435e-06, "loss": 0.0005, "step": 7300 }, { "epoch": 8.08, "learning_rate": 6.744123931623931e-06, "loss": 0.0026, "step": 7310 }, { "epoch": 8.08, "learning_rate": 6.677350427350427e-06, "loss": 0.0016, "step": 7320 }, { "epoch": 8.08, "learning_rate": 6.610576923076923e-06, "loss": 0.0029, "step": 7330 }, { "epoch": 8.08, "learning_rate": 6.5438034188034195e-06, "loss": 0.0005, "step": 7340 }, { "epoch": 8.08, "learning_rate": 6.4770299145299155e-06, "loss": 0.0099, "step": 7350 }, { "epoch": 8.08, "learning_rate": 6.41025641025641e-06, "loss": 0.0007, "step": 7360 }, { "epoch": 8.08, "learning_rate": 6.343482905982906e-06, "loss": 0.0018, "step": 7370 }, { "epoch": 8.09, "learning_rate": 6.276709401709402e-06, "loss": 0.001, "step": 7380 }, { "epoch": 8.09, "learning_rate": 6.209935897435898e-06, "loss": 0.0008, "step": 7390 }, { "epoch": 8.09, "learning_rate": 6.143162393162393e-06, "loss": 0.0011, "step": 7400 }, { "epoch": 8.09, "learning_rate": 6.076388888888889e-06, "loss": 0.0098, "step": 7410 }, { "epoch": 8.09, "learning_rate": 6.0096153846153855e-06, "loss": 0.0005, "step": 7420 }, { "epoch": 8.09, "learning_rate": 5.942841880341881e-06, "loss": 0.0006, "step": 7430 }, { "epoch": 8.09, "learning_rate": 5.876068376068376e-06, "loss": 0.0009, "step": 7440 }, { "epoch": 8.09, "learning_rate": 5.809294871794872e-06, "loss": 0.0008, "step": 7450 }, { "epoch": 8.1, "learning_rate": 5.742521367521367e-06, "loss": 0.0007, "step": 7460 }, { "epoch": 8.1, "learning_rate": 5.675747863247863e-06, "loss": 0.0006, "step": 7470 }, { "epoch": 8.1, "learning_rate": 5.608974358974359e-06, "loss": 0.0007, "step": 7480 }, { "epoch": 8.1, "learning_rate": 5.5422008547008545e-06, "loss": 0.001, "step": 7490 }, { "epoch": 8.1, "eval_accuracy": 0.9822353811991118, "eval_loss": 0.07449387013912201, "eval_runtime": 85.6508, "eval_samples_per_second": 15.773, "eval_steps_per_second": 2.639, "step": 7497 }, { "epoch": 9.0, "learning_rate": 5.475427350427351e-06, "loss": 0.0022, "step": 7500 }, { "epoch": 9.0, "learning_rate": 5.408653846153847e-06, "loss": 0.0181, "step": 7510 }, { "epoch": 9.0, "learning_rate": 5.341880341880342e-06, "loss": 0.0153, "step": 7520 }, { "epoch": 9.0, "learning_rate": 5.275106837606838e-06, "loss": 0.0246, "step": 7530 }, { "epoch": 9.01, "learning_rate": 5.208333333333334e-06, "loss": 0.0006, "step": 7540 }, { "epoch": 9.01, "learning_rate": 5.141559829059829e-06, "loss": 0.0717, "step": 7550 }, { "epoch": 9.01, "learning_rate": 5.074786324786325e-06, "loss": 0.0007, "step": 7560 }, { "epoch": 9.01, "learning_rate": 5.008012820512821e-06, "loss": 0.0005, "step": 7570 }, { "epoch": 9.01, "learning_rate": 4.9412393162393166e-06, "loss": 0.0009, "step": 7580 }, { "epoch": 9.01, "learning_rate": 4.874465811965813e-06, "loss": 0.0005, "step": 7590 }, { "epoch": 9.01, "learning_rate": 4.807692307692308e-06, "loss": 0.0005, "step": 7600 }, { "epoch": 9.01, "learning_rate": 4.740918803418803e-06, "loss": 0.0005, "step": 7610 }, { "epoch": 9.01, "learning_rate": 4.674145299145299e-06, "loss": 0.1065, "step": 7620 }, { "epoch": 9.02, "learning_rate": 4.607371794871795e-06, "loss": 0.0561, "step": 7630 }, { "epoch": 9.02, "learning_rate": 4.5405982905982904e-06, "loss": 0.0006, "step": 7640 }, { "epoch": 9.02, "learning_rate": 4.4738247863247865e-06, "loss": 0.0006, "step": 7650 }, { "epoch": 9.02, "learning_rate": 4.4070512820512826e-06, "loss": 0.0009, "step": 7660 }, { "epoch": 9.02, "learning_rate": 4.340277777777778e-06, "loss": 0.0006, "step": 7670 }, { "epoch": 9.02, "learning_rate": 4.273504273504274e-06, "loss": 0.0006, "step": 7680 }, { "epoch": 9.02, "learning_rate": 4.20673076923077e-06, "loss": 0.0005, "step": 7690 }, { "epoch": 9.02, "learning_rate": 4.139957264957265e-06, "loss": 0.0006, "step": 7700 }, { "epoch": 9.03, "learning_rate": 4.073183760683761e-06, "loss": 0.0009, "step": 7710 }, { "epoch": 9.03, "learning_rate": 4.006410256410257e-06, "loss": 0.1264, "step": 7720 }, { "epoch": 9.03, "learning_rate": 3.9396367521367525e-06, "loss": 0.0008, "step": 7730 }, { "epoch": 9.03, "learning_rate": 3.8728632478632485e-06, "loss": 0.0005, "step": 7740 }, { "epoch": 9.03, "learning_rate": 3.8060897435897433e-06, "loss": 0.0009, "step": 7750 }, { "epoch": 9.03, "learning_rate": 3.7393162393162394e-06, "loss": 0.0009, "step": 7760 }, { "epoch": 9.03, "learning_rate": 3.6725427350427355e-06, "loss": 0.0005, "step": 7770 }, { "epoch": 9.03, "learning_rate": 3.6057692307692307e-06, "loss": 0.0427, "step": 7780 }, { "epoch": 9.04, "learning_rate": 3.5389957264957267e-06, "loss": 0.0005, "step": 7790 }, { "epoch": 9.04, "learning_rate": 3.4722222222222224e-06, "loss": 0.0696, "step": 7800 }, { "epoch": 9.04, "learning_rate": 3.4054487179487176e-06, "loss": 0.0006, "step": 7810 }, { "epoch": 9.04, "learning_rate": 3.3386752136752137e-06, "loss": 0.0006, "step": 7820 }, { "epoch": 9.04, "learning_rate": 3.2719017094017097e-06, "loss": 0.005, "step": 7830 }, { "epoch": 9.04, "learning_rate": 3.205128205128205e-06, "loss": 0.0022, "step": 7840 }, { "epoch": 9.04, "learning_rate": 3.138354700854701e-06, "loss": 0.0006, "step": 7850 }, { "epoch": 9.04, "learning_rate": 3.0715811965811967e-06, "loss": 0.0093, "step": 7860 }, { "epoch": 9.04, "learning_rate": 3.0048076923076927e-06, "loss": 0.0007, "step": 7870 }, { "epoch": 9.05, "learning_rate": 2.938034188034188e-06, "loss": 0.0006, "step": 7880 }, { "epoch": 9.05, "learning_rate": 2.8712606837606836e-06, "loss": 0.0152, "step": 7890 }, { "epoch": 9.05, "learning_rate": 2.8044871794871797e-06, "loss": 0.0005, "step": 7900 }, { "epoch": 9.05, "learning_rate": 2.7377136752136753e-06, "loss": 0.0071, "step": 7910 }, { "epoch": 9.05, "learning_rate": 2.670940170940171e-06, "loss": 0.1172, "step": 7920 }, { "epoch": 9.05, "learning_rate": 2.604166666666667e-06, "loss": 0.0031, "step": 7930 }, { "epoch": 9.05, "learning_rate": 2.5373931623931626e-06, "loss": 0.0005, "step": 7940 }, { "epoch": 9.05, "learning_rate": 2.4706196581196583e-06, "loss": 0.0006, "step": 7950 }, { "epoch": 9.06, "learning_rate": 2.403846153846154e-06, "loss": 0.0108, "step": 7960 }, { "epoch": 9.06, "learning_rate": 2.3370726495726496e-06, "loss": 0.0005, "step": 7970 }, { "epoch": 9.06, "learning_rate": 2.2702991452991452e-06, "loss": 0.0005, "step": 7980 }, { "epoch": 9.06, "learning_rate": 2.2035256410256413e-06, "loss": 0.0005, "step": 7990 }, { "epoch": 9.06, "learning_rate": 2.136752136752137e-06, "loss": 0.0005, "step": 8000 }, { "epoch": 9.06, "learning_rate": 2.0699786324786326e-06, "loss": 0.0009, "step": 8010 }, { "epoch": 9.06, "learning_rate": 2.0032051282051286e-06, "loss": 0.0006, "step": 8020 }, { "epoch": 9.06, "learning_rate": 1.9364316239316243e-06, "loss": 0.0005, "step": 8030 }, { "epoch": 9.07, "learning_rate": 1.8696581196581197e-06, "loss": 0.001, "step": 8040 }, { "epoch": 9.07, "learning_rate": 1.8028846153846153e-06, "loss": 0.0005, "step": 8050 }, { "epoch": 9.07, "learning_rate": 1.7361111111111112e-06, "loss": 0.0005, "step": 8060 }, { "epoch": 9.07, "learning_rate": 1.6693376068376068e-06, "loss": 0.0005, "step": 8070 }, { "epoch": 9.07, "learning_rate": 1.6025641025641025e-06, "loss": 0.0008, "step": 8080 }, { "epoch": 9.07, "learning_rate": 1.5357905982905983e-06, "loss": 0.0005, "step": 8090 }, { "epoch": 9.07, "learning_rate": 1.469017094017094e-06, "loss": 0.0005, "step": 8100 }, { "epoch": 9.07, "learning_rate": 1.4022435897435898e-06, "loss": 0.0089, "step": 8110 }, { "epoch": 9.07, "learning_rate": 1.3354700854700855e-06, "loss": 0.0009, "step": 8120 }, { "epoch": 9.08, "learning_rate": 1.2686965811965813e-06, "loss": 0.001, "step": 8130 }, { "epoch": 9.08, "learning_rate": 1.201923076923077e-06, "loss": 0.0005, "step": 8140 }, { "epoch": 9.08, "learning_rate": 1.1351495726495726e-06, "loss": 0.0006, "step": 8150 }, { "epoch": 9.08, "learning_rate": 1.0683760683760685e-06, "loss": 0.0005, "step": 8160 }, { "epoch": 9.08, "learning_rate": 1.0016025641025643e-06, "loss": 0.0005, "step": 8170 }, { "epoch": 9.08, "learning_rate": 9.348290598290598e-07, "loss": 0.0005, "step": 8180 }, { "epoch": 9.08, "learning_rate": 8.680555555555556e-07, "loss": 0.0005, "step": 8190 }, { "epoch": 9.08, "learning_rate": 8.012820512820512e-07, "loss": 0.0006, "step": 8200 }, { "epoch": 9.09, "learning_rate": 7.34508547008547e-07, "loss": 0.0005, "step": 8210 }, { "epoch": 9.09, "learning_rate": 6.677350427350427e-07, "loss": 0.0005, "step": 8220 }, { "epoch": 9.09, "learning_rate": 6.009615384615385e-07, "loss": 0.0004, "step": 8230 }, { "epoch": 9.09, "learning_rate": 5.341880341880342e-07, "loss": 0.0049, "step": 8240 }, { "epoch": 9.09, "learning_rate": 4.674145299145299e-07, "loss": 0.0011, "step": 8250 }, { "epoch": 9.09, "learning_rate": 4.006410256410256e-07, "loss": 0.0007, "step": 8260 }, { "epoch": 9.09, "learning_rate": 3.3386752136752137e-07, "loss": 0.0005, "step": 8270 }, { "epoch": 9.09, "learning_rate": 2.670940170940171e-07, "loss": 0.0006, "step": 8280 }, { "epoch": 9.1, "learning_rate": 2.003205128205128e-07, "loss": 0.0008, "step": 8290 }, { "epoch": 9.1, "learning_rate": 1.3354700854700856e-07, "loss": 0.0676, "step": 8300 }, { "epoch": 9.1, "learning_rate": 6.677350427350428e-08, "loss": 0.0005, "step": 8310 }, { "epoch": 9.1, "learning_rate": 0.0, "loss": 0.0005, "step": 8320 }, { "epoch": 9.1, "eval_accuracy": 0.9837157660991858, "eval_loss": 0.0756925716996193, "eval_runtime": 86.0742, "eval_samples_per_second": 15.696, "eval_steps_per_second": 2.626, "step": 8320 }, { "epoch": 9.1, "step": 8320, "total_flos": 6.218584863603543e+19, "train_loss": 0.42279570934445543, "train_runtime": 6828.4751, "train_samples_per_second": 7.311, "train_steps_per_second": 1.218 }, { "epoch": 9.1, "eval_accuracy": 0.9837157660991858, "eval_loss": 0.0756925716996193, "eval_runtime": 86.0371, "eval_samples_per_second": 15.703, "eval_steps_per_second": 2.627, "step": 8320 }, { "epoch": 9.1, "eval_accuracy": 0.0, "eval_loss": 9.850842475891113, "eval_runtime": 334.1168, "eval_samples_per_second": 14.612, "eval_steps_per_second": 2.436, "step": 8320 }, { "epoch": 9.1, "eval_accuracy": 0.9837157660991858, "eval_loss": 0.0756925716996193, "eval_runtime": 85.3939, "eval_samples_per_second": 15.821, "eval_steps_per_second": 2.647, "step": 8320 } ], "logging_steps": 10, "max_steps": 8320, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 6.218584863603543e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }