{ "best_metric": 0.9925, "best_model_checkpoint": "./model/checkpoint-4382", "epoch": 30.0, "eval_steps": 500, "global_step": 9390, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9169329073482426e-06, "loss": 1.4575, "step": 10 }, { "epoch": 0.06, "learning_rate": 4.046858359957402e-06, "loss": 1.3457, "step": 20 }, { "epoch": 0.1, "learning_rate": 6.17678381256656e-06, "loss": 1.2723, "step": 30 }, { "epoch": 0.13, "learning_rate": 8.306709265175718e-06, "loss": 1.0944, "step": 40 }, { "epoch": 0.16, "learning_rate": 1.0223642172523962e-05, "loss": 0.8709, "step": 50 }, { "epoch": 0.19, "learning_rate": 1.235356762513312e-05, "loss": 0.7749, "step": 60 }, { "epoch": 0.22, "learning_rate": 1.4270500532481365e-05, "loss": 0.7163, "step": 70 }, { "epoch": 0.26, "learning_rate": 1.6400425985090524e-05, "loss": 0.6857, "step": 80 }, { "epoch": 0.29, "learning_rate": 1.8530351437699682e-05, "loss": 0.6966, "step": 90 }, { "epoch": 0.32, "learning_rate": 2.066027689030884e-05, "loss": 0.6462, "step": 100 }, { "epoch": 0.35, "learning_rate": 2.2790202342918e-05, "loss": 0.5817, "step": 110 }, { "epoch": 0.38, "learning_rate": 2.4920127795527157e-05, "loss": 0.5795, "step": 120 }, { "epoch": 0.42, "learning_rate": 2.7050053248136315e-05, "loss": 0.6217, "step": 130 }, { "epoch": 0.45, "learning_rate": 2.9179978700745477e-05, "loss": 0.6524, "step": 140 }, { "epoch": 0.48, "learning_rate": 3.130990415335464e-05, "loss": 0.5952, "step": 150 }, { "epoch": 0.51, "learning_rate": 3.343982960596379e-05, "loss": 0.6165, "step": 160 }, { "epoch": 0.54, "learning_rate": 3.556975505857295e-05, "loss": 0.5443, "step": 170 }, { "epoch": 0.58, "learning_rate": 3.769968051118211e-05, "loss": 0.5546, "step": 180 }, { "epoch": 0.61, "learning_rate": 3.9829605963791265e-05, "loss": 0.6735, "step": 190 }, { "epoch": 0.64, "learning_rate": 4.195953141640043e-05, "loss": 0.6384, "step": 200 }, { "epoch": 0.67, "learning_rate": 4.408945686900959e-05, "loss": 0.5343, "step": 210 }, { "epoch": 0.7, "learning_rate": 4.6219382321618746e-05, "loss": 0.5015, "step": 220 }, { "epoch": 0.73, "learning_rate": 4.8349307774227905e-05, "loss": 0.5105, "step": 230 }, { "epoch": 0.77, "learning_rate": 5.047923322683706e-05, "loss": 0.4695, "step": 240 }, { "epoch": 0.8, "learning_rate": 5.260915867944622e-05, "loss": 0.4371, "step": 250 }, { "epoch": 0.83, "learning_rate": 5.4739084132055386e-05, "loss": 0.4953, "step": 260 }, { "epoch": 0.86, "learning_rate": 5.686900958466454e-05, "loss": 0.4942, "step": 270 }, { "epoch": 0.89, "learning_rate": 5.8998935037273696e-05, "loss": 0.5203, "step": 280 }, { "epoch": 0.93, "learning_rate": 6.112886048988285e-05, "loss": 0.4275, "step": 290 }, { "epoch": 0.96, "learning_rate": 6.30457933972311e-05, "loss": 0.5087, "step": 300 }, { "epoch": 0.99, "learning_rate": 6.517571884984026e-05, "loss": 0.4345, "step": 310 }, { "epoch": 1.0, "eval_accuracy": 0.8075, "eval_loss": 0.41250982880592346, "eval_runtime": 5.1206, "eval_samples_per_second": 78.116, "eval_steps_per_second": 9.764, "step": 313 }, { "epoch": 1.02, "learning_rate": 6.730564430244943e-05, "loss": 0.3821, "step": 320 }, { "epoch": 1.05, "learning_rate": 6.943556975505858e-05, "loss": 0.3927, "step": 330 }, { "epoch": 1.09, "learning_rate": 7.156549520766773e-05, "loss": 0.3204, "step": 340 }, { "epoch": 1.12, "learning_rate": 7.36954206602769e-05, "loss": 0.3437, "step": 350 }, { "epoch": 1.15, "learning_rate": 7.582534611288606e-05, "loss": 0.4701, "step": 360 }, { "epoch": 1.18, "learning_rate": 7.795527156549521e-05, "loss": 0.3827, "step": 370 }, { "epoch": 1.21, "learning_rate": 8.008519701810438e-05, "loss": 0.3503, "step": 380 }, { "epoch": 1.25, "learning_rate": 8.221512247071353e-05, "loss": 0.3412, "step": 390 }, { "epoch": 1.28, "learning_rate": 8.434504792332268e-05, "loss": 0.3545, "step": 400 }, { "epoch": 1.31, "learning_rate": 8.647497337593184e-05, "loss": 0.4233, "step": 410 }, { "epoch": 1.34, "learning_rate": 8.860489882854101e-05, "loss": 0.3038, "step": 420 }, { "epoch": 1.37, "learning_rate": 9.073482428115016e-05, "loss": 0.2849, "step": 430 }, { "epoch": 1.41, "learning_rate": 9.286474973375933e-05, "loss": 0.3164, "step": 440 }, { "epoch": 1.44, "learning_rate": 9.499467518636849e-05, "loss": 0.3366, "step": 450 }, { "epoch": 1.47, "learning_rate": 9.712460063897764e-05, "loss": 0.3453, "step": 460 }, { "epoch": 1.5, "learning_rate": 9.92545260915868e-05, "loss": 0.3852, "step": 470 }, { "epoch": 1.53, "learning_rate": 0.00010138445154419596, "loss": 0.3932, "step": 480 }, { "epoch": 1.57, "learning_rate": 0.00010351437699680511, "loss": 0.3085, "step": 490 }, { "epoch": 1.6, "learning_rate": 0.00010564430244941428, "loss": 0.4473, "step": 500 }, { "epoch": 1.63, "learning_rate": 0.00010777422790202343, "loss": 0.381, "step": 510 }, { "epoch": 1.66, "learning_rate": 0.0001099041533546326, "loss": 0.3917, "step": 520 }, { "epoch": 1.69, "learning_rate": 0.00011203407880724174, "loss": 0.3807, "step": 530 }, { "epoch": 1.73, "learning_rate": 0.00011416400425985092, "loss": 0.3381, "step": 540 }, { "epoch": 1.76, "learning_rate": 0.00011629392971246007, "loss": 0.3207, "step": 550 }, { "epoch": 1.79, "learning_rate": 0.00011842385516506922, "loss": 0.2718, "step": 560 }, { "epoch": 1.82, "learning_rate": 0.00012055378061767839, "loss": 0.3206, "step": 570 }, { "epoch": 1.85, "learning_rate": 0.00012268370607028753, "loss": 0.3239, "step": 580 }, { "epoch": 1.88, "learning_rate": 0.0001248136315228967, "loss": 0.2478, "step": 590 }, { "epoch": 1.92, "learning_rate": 0.00012694355697550586, "loss": 0.1582, "step": 600 }, { "epoch": 1.95, "learning_rate": 0.000129073482428115, "loss": 0.2166, "step": 610 }, { "epoch": 1.98, "learning_rate": 0.0001312034078807242, "loss": 0.5444, "step": 620 }, { "epoch": 2.0, "eval_accuracy": 0.9075, "eval_loss": 0.28795498609542847, "eval_runtime": 5.2021, "eval_samples_per_second": 76.892, "eval_steps_per_second": 9.612, "step": 626 }, { "epoch": 2.01, "learning_rate": 0.00013333333333333334, "loss": 0.2552, "step": 630 }, { "epoch": 2.04, "learning_rate": 0.0001354632587859425, "loss": 0.2285, "step": 640 }, { "epoch": 2.08, "learning_rate": 0.00013759318423855164, "loss": 0.296, "step": 650 }, { "epoch": 2.11, "learning_rate": 0.00013972310969116082, "loss": 0.2718, "step": 660 }, { "epoch": 2.14, "learning_rate": 0.00014185303514376997, "loss": 0.1728, "step": 670 }, { "epoch": 2.17, "learning_rate": 0.00014398296059637912, "loss": 0.1852, "step": 680 }, { "epoch": 2.2, "learning_rate": 0.0001461128860489883, "loss": 0.2437, "step": 690 }, { "epoch": 2.24, "learning_rate": 0.00014824281150159745, "loss": 0.2741, "step": 700 }, { "epoch": 2.27, "learning_rate": 0.0001503727369542066, "loss": 0.1316, "step": 710 }, { "epoch": 2.3, "learning_rate": 0.00015250266240681576, "loss": 0.2716, "step": 720 }, { "epoch": 2.33, "learning_rate": 0.00015463258785942494, "loss": 0.2318, "step": 730 }, { "epoch": 2.36, "learning_rate": 0.0001567625133120341, "loss": 0.4942, "step": 740 }, { "epoch": 2.4, "learning_rate": 0.00015889243876464324, "loss": 0.218, "step": 750 }, { "epoch": 2.43, "learning_rate": 0.00016102236421725242, "loss": 0.2507, "step": 760 }, { "epoch": 2.46, "learning_rate": 0.00016315228966986157, "loss": 0.2782, "step": 770 }, { "epoch": 2.49, "learning_rate": 0.00016528221512247072, "loss": 0.1941, "step": 780 }, { "epoch": 2.52, "learning_rate": 0.00016741214057507987, "loss": 0.4018, "step": 790 }, { "epoch": 2.56, "learning_rate": 0.00016954206602768902, "loss": 0.3268, "step": 800 }, { "epoch": 2.59, "learning_rate": 0.0001716719914802982, "loss": 0.2585, "step": 810 }, { "epoch": 2.62, "learning_rate": 0.00017380191693290735, "loss": 0.3031, "step": 820 }, { "epoch": 2.65, "learning_rate": 0.00017593184238551653, "loss": 0.313, "step": 830 }, { "epoch": 2.68, "learning_rate": 0.00017806176783812566, "loss": 0.3424, "step": 840 }, { "epoch": 2.72, "learning_rate": 0.00018019169329073484, "loss": 0.2635, "step": 850 }, { "epoch": 2.75, "learning_rate": 0.000182321618743344, "loss": 0.2479, "step": 860 }, { "epoch": 2.78, "learning_rate": 0.00018445154419595314, "loss": 0.2651, "step": 870 }, { "epoch": 2.81, "learning_rate": 0.00018658146964856232, "loss": 0.228, "step": 880 }, { "epoch": 2.84, "learning_rate": 0.00018871139510117147, "loss": 0.288, "step": 890 }, { "epoch": 2.88, "learning_rate": 0.00019084132055378062, "loss": 0.2814, "step": 900 }, { "epoch": 2.91, "learning_rate": 0.00019297124600638977, "loss": 0.2692, "step": 910 }, { "epoch": 2.94, "learning_rate": 0.00019510117145899895, "loss": 0.235, "step": 920 }, { "epoch": 2.97, "learning_rate": 0.0001972310969116081, "loss": 0.2855, "step": 930 }, { "epoch": 3.0, "eval_accuracy": 0.88, "eval_loss": 0.3687535524368286, "eval_runtime": 6.15, "eval_samples_per_second": 65.041, "eval_steps_per_second": 8.13, "step": 939 }, { "epoch": 3.0, "learning_rate": 0.00019936102236421725, "loss": 0.2939, "step": 940 }, { "epoch": 3.04, "learning_rate": 0.00019983433913146374, "loss": 0.3828, "step": 950 }, { "epoch": 3.07, "learning_rate": 0.0001995976807478405, "loss": 0.1991, "step": 960 }, { "epoch": 3.1, "learning_rate": 0.00019936102236421725, "loss": 0.2212, "step": 970 }, { "epoch": 3.13, "learning_rate": 0.00019912436398059402, "loss": 0.2374, "step": 980 }, { "epoch": 3.16, "learning_rate": 0.0001988877055969708, "loss": 0.1184, "step": 990 }, { "epoch": 3.19, "learning_rate": 0.00019865104721334756, "loss": 0.1316, "step": 1000 }, { "epoch": 3.23, "learning_rate": 0.0001984143888297243, "loss": 0.2247, "step": 1010 }, { "epoch": 3.26, "learning_rate": 0.00019817773044610105, "loss": 0.2707, "step": 1020 }, { "epoch": 3.29, "learning_rate": 0.00019794107206247782, "loss": 0.1726, "step": 1030 }, { "epoch": 3.32, "learning_rate": 0.00019770441367885456, "loss": 0.2527, "step": 1040 }, { "epoch": 3.35, "learning_rate": 0.00019746775529523133, "loss": 0.2112, "step": 1050 }, { "epoch": 3.39, "learning_rate": 0.0001972310969116081, "loss": 0.1687, "step": 1060 }, { "epoch": 3.42, "learning_rate": 0.00019699443852798487, "loss": 0.3774, "step": 1070 }, { "epoch": 3.45, "learning_rate": 0.00019675778014436161, "loss": 0.2342, "step": 1080 }, { "epoch": 3.48, "learning_rate": 0.00019652112176073838, "loss": 0.195, "step": 1090 }, { "epoch": 3.51, "learning_rate": 0.00019628446337711515, "loss": 0.1589, "step": 1100 }, { "epoch": 3.55, "learning_rate": 0.00019604780499349192, "loss": 0.2088, "step": 1110 }, { "epoch": 3.58, "learning_rate": 0.00019581114660986867, "loss": 0.2503, "step": 1120 }, { "epoch": 3.61, "learning_rate": 0.0001955744882262454, "loss": 0.1818, "step": 1130 }, { "epoch": 3.64, "learning_rate": 0.00019533782984262218, "loss": 0.2153, "step": 1140 }, { "epoch": 3.67, "learning_rate": 0.00019510117145899895, "loss": 0.3874, "step": 1150 }, { "epoch": 3.71, "learning_rate": 0.0001948645130753757, "loss": 0.179, "step": 1160 }, { "epoch": 3.74, "learning_rate": 0.00019462785469175246, "loss": 0.1135, "step": 1170 }, { "epoch": 3.77, "learning_rate": 0.00019439119630812923, "loss": 0.3062, "step": 1180 }, { "epoch": 3.8, "learning_rate": 0.00019415453792450598, "loss": 0.163, "step": 1190 }, { "epoch": 3.83, "learning_rate": 0.00019391787954088275, "loss": 0.2267, "step": 1200 }, { "epoch": 3.87, "learning_rate": 0.00019368122115725952, "loss": 0.1419, "step": 1210 }, { "epoch": 3.9, "learning_rate": 0.00019344456277363629, "loss": 0.1842, "step": 1220 }, { "epoch": 3.93, "learning_rate": 0.00019320790439001303, "loss": 0.1742, "step": 1230 }, { "epoch": 3.96, "learning_rate": 0.00019297124600638977, "loss": 0.1806, "step": 1240 }, { "epoch": 3.99, "learning_rate": 0.00019273458762276654, "loss": 0.1533, "step": 1250 }, { "epoch": 4.0, "eval_accuracy": 0.94, "eval_loss": 0.2066069096326828, "eval_runtime": 5.147, "eval_samples_per_second": 77.715, "eval_steps_per_second": 9.714, "step": 1252 }, { "epoch": 4.03, "learning_rate": 0.0001924979292391433, "loss": 0.153, "step": 1260 }, { "epoch": 4.06, "learning_rate": 0.00019226127085552005, "loss": 0.1504, "step": 1270 }, { "epoch": 4.09, "learning_rate": 0.00019202461247189682, "loss": 0.1796, "step": 1280 }, { "epoch": 4.12, "learning_rate": 0.0001917879540882736, "loss": 0.1309, "step": 1290 }, { "epoch": 4.15, "learning_rate": 0.00019155129570465034, "loss": 0.1167, "step": 1300 }, { "epoch": 4.19, "learning_rate": 0.0001913146373210271, "loss": 0.0874, "step": 1310 }, { "epoch": 4.22, "learning_rate": 0.00019107797893740388, "loss": 0.137, "step": 1320 }, { "epoch": 4.25, "learning_rate": 0.00019084132055378062, "loss": 0.0889, "step": 1330 }, { "epoch": 4.28, "learning_rate": 0.0001906046621701574, "loss": 0.1578, "step": 1340 }, { "epoch": 4.31, "learning_rate": 0.00019036800378653413, "loss": 0.1551, "step": 1350 }, { "epoch": 4.35, "learning_rate": 0.0001901313454029109, "loss": 0.087, "step": 1360 }, { "epoch": 4.38, "learning_rate": 0.00018989468701928767, "loss": 0.0657, "step": 1370 }, { "epoch": 4.41, "learning_rate": 0.00018965802863566442, "loss": 0.1458, "step": 1380 }, { "epoch": 4.44, "learning_rate": 0.00018942137025204119, "loss": 0.2047, "step": 1390 }, { "epoch": 4.47, "learning_rate": 0.00018918471186841796, "loss": 0.1534, "step": 1400 }, { "epoch": 4.5, "learning_rate": 0.00018894805348479473, "loss": 0.11, "step": 1410 }, { "epoch": 4.54, "learning_rate": 0.00018871139510117147, "loss": 0.0986, "step": 1420 }, { "epoch": 4.57, "learning_rate": 0.00018847473671754824, "loss": 0.1409, "step": 1430 }, { "epoch": 4.6, "learning_rate": 0.00018823807833392498, "loss": 0.0848, "step": 1440 }, { "epoch": 4.63, "learning_rate": 0.00018800141995030175, "loss": 0.1584, "step": 1450 }, { "epoch": 4.66, "learning_rate": 0.0001877647615666785, "loss": 0.1823, "step": 1460 }, { "epoch": 4.7, "learning_rate": 0.00018752810318305526, "loss": 0.1486, "step": 1470 }, { "epoch": 4.73, "learning_rate": 0.00018729144479943203, "loss": 0.0709, "step": 1480 }, { "epoch": 4.76, "learning_rate": 0.00018705478641580878, "loss": 0.1484, "step": 1490 }, { "epoch": 4.79, "learning_rate": 0.00018681812803218555, "loss": 0.1866, "step": 1500 }, { "epoch": 4.82, "learning_rate": 0.00018658146964856232, "loss": 0.1004, "step": 1510 }, { "epoch": 4.86, "learning_rate": 0.0001863448112649391, "loss": 0.095, "step": 1520 }, { "epoch": 4.89, "learning_rate": 0.00018610815288131583, "loss": 0.1464, "step": 1530 }, { "epoch": 4.92, "learning_rate": 0.0001858714944976926, "loss": 0.14, "step": 1540 }, { "epoch": 4.95, "learning_rate": 0.00018563483611406934, "loss": 0.1327, "step": 1550 }, { "epoch": 4.98, "learning_rate": 0.0001853981777304461, "loss": 0.0551, "step": 1560 }, { "epoch": 5.0, "eval_accuracy": 0.9725, "eval_loss": 0.12336118519306183, "eval_runtime": 5.8571, "eval_samples_per_second": 68.293, "eval_steps_per_second": 8.537, "step": 1565 }, { "epoch": 5.02, "learning_rate": 0.00018516151934682286, "loss": 0.1012, "step": 1570 }, { "epoch": 5.05, "learning_rate": 0.00018492486096319963, "loss": 0.0478, "step": 1580 }, { "epoch": 5.08, "learning_rate": 0.0001846882025795764, "loss": 0.1106, "step": 1590 }, { "epoch": 5.11, "learning_rate": 0.00018445154419595314, "loss": 0.1352, "step": 1600 }, { "epoch": 5.14, "learning_rate": 0.0001842148858123299, "loss": 0.2177, "step": 1610 }, { "epoch": 5.18, "learning_rate": 0.00018397822742870668, "loss": 0.1155, "step": 1620 }, { "epoch": 5.21, "learning_rate": 0.00018374156904508345, "loss": 0.067, "step": 1630 }, { "epoch": 5.24, "learning_rate": 0.0001835049106614602, "loss": 0.0788, "step": 1640 }, { "epoch": 5.27, "learning_rate": 0.00018326825227783696, "loss": 0.1365, "step": 1650 }, { "epoch": 5.3, "learning_rate": 0.0001830315938942137, "loss": 0.096, "step": 1660 }, { "epoch": 5.34, "learning_rate": 0.00018279493551059047, "loss": 0.1184, "step": 1670 }, { "epoch": 5.37, "learning_rate": 0.00018255827712696722, "loss": 0.0786, "step": 1680 }, { "epoch": 5.4, "learning_rate": 0.000182321618743344, "loss": 0.1599, "step": 1690 }, { "epoch": 5.43, "learning_rate": 0.00018208496035972076, "loss": 0.0736, "step": 1700 }, { "epoch": 5.46, "learning_rate": 0.0001818483019760975, "loss": 0.1095, "step": 1710 }, { "epoch": 5.5, "learning_rate": 0.00018161164359247427, "loss": 0.1107, "step": 1720 }, { "epoch": 5.53, "learning_rate": 0.00018137498520885104, "loss": 0.0465, "step": 1730 }, { "epoch": 5.56, "learning_rate": 0.0001811383268252278, "loss": 0.1244, "step": 1740 }, { "epoch": 5.59, "learning_rate": 0.00018090166844160455, "loss": 0.1155, "step": 1750 }, { "epoch": 5.62, "learning_rate": 0.00018066501005798132, "loss": 0.1181, "step": 1760 }, { "epoch": 5.65, "learning_rate": 0.00018042835167435807, "loss": 0.0315, "step": 1770 }, { "epoch": 5.69, "learning_rate": 0.00018019169329073484, "loss": 0.0446, "step": 1780 }, { "epoch": 5.72, "learning_rate": 0.00017995503490711158, "loss": 0.1342, "step": 1790 }, { "epoch": 5.75, "learning_rate": 0.00017971837652348835, "loss": 0.1308, "step": 1800 }, { "epoch": 5.78, "learning_rate": 0.00017948171813986512, "loss": 0.0927, "step": 1810 }, { "epoch": 5.81, "learning_rate": 0.0001792450597562419, "loss": 0.1147, "step": 1820 }, { "epoch": 5.85, "learning_rate": 0.00017900840137261863, "loss": 0.0814, "step": 1830 }, { "epoch": 5.88, "learning_rate": 0.0001787717429889954, "loss": 0.1629, "step": 1840 }, { "epoch": 5.91, "learning_rate": 0.00017853508460537217, "loss": 0.1288, "step": 1850 }, { "epoch": 5.94, "learning_rate": 0.00017829842622174891, "loss": 0.2005, "step": 1860 }, { "epoch": 5.97, "learning_rate": 0.00017806176783812566, "loss": 0.1027, "step": 1870 }, { "epoch": 6.0, "eval_accuracy": 0.97, "eval_loss": 0.11499868333339691, "eval_runtime": 8.0385, "eval_samples_per_second": 49.761, "eval_steps_per_second": 6.22, "step": 1878 }, { "epoch": 6.01, "learning_rate": 0.00017782510945450243, "loss": 0.051, "step": 1880 }, { "epoch": 6.04, "learning_rate": 0.0001775884510708792, "loss": 0.0318, "step": 1890 }, { "epoch": 6.07, "learning_rate": 0.00017735179268725594, "loss": 0.0468, "step": 1900 }, { "epoch": 6.1, "learning_rate": 0.0001771151343036327, "loss": 0.074, "step": 1910 }, { "epoch": 6.13, "learning_rate": 0.00017687847592000948, "loss": 0.0653, "step": 1920 }, { "epoch": 6.17, "learning_rate": 0.00017664181753638625, "loss": 0.1136, "step": 1930 }, { "epoch": 6.2, "learning_rate": 0.000176405159152763, "loss": 0.0369, "step": 1940 }, { "epoch": 6.23, "learning_rate": 0.00017616850076913976, "loss": 0.0872, "step": 1950 }, { "epoch": 6.26, "learning_rate": 0.00017593184238551653, "loss": 0.1174, "step": 1960 }, { "epoch": 6.29, "learning_rate": 0.00017569518400189328, "loss": 0.0716, "step": 1970 }, { "epoch": 6.33, "learning_rate": 0.00017545852561827002, "loss": 0.1122, "step": 1980 }, { "epoch": 6.36, "learning_rate": 0.0001752218672346468, "loss": 0.1801, "step": 1990 }, { "epoch": 6.39, "learning_rate": 0.00017498520885102356, "loss": 0.0333, "step": 2000 }, { "epoch": 6.42, "learning_rate": 0.0001747485504674003, "loss": 0.0227, "step": 2010 }, { "epoch": 6.45, "learning_rate": 0.00017451189208377707, "loss": 0.0389, "step": 2020 }, { "epoch": 6.49, "learning_rate": 0.00017427523370015384, "loss": 0.0434, "step": 2030 }, { "epoch": 6.52, "learning_rate": 0.0001740385753165306, "loss": 0.0726, "step": 2040 }, { "epoch": 6.55, "learning_rate": 0.00017380191693290735, "loss": 0.0456, "step": 2050 }, { "epoch": 6.58, "learning_rate": 0.00017356525854928412, "loss": 0.0396, "step": 2060 }, { "epoch": 6.61, "learning_rate": 0.0001733286001656609, "loss": 0.0983, "step": 2070 }, { "epoch": 6.65, "learning_rate": 0.00017309194178203764, "loss": 0.0743, "step": 2080 }, { "epoch": 6.68, "learning_rate": 0.00017285528339841438, "loss": 0.055, "step": 2090 }, { "epoch": 6.71, "learning_rate": 0.00017261862501479115, "loss": 0.1489, "step": 2100 }, { "epoch": 6.74, "learning_rate": 0.00017238196663116792, "loss": 0.0918, "step": 2110 }, { "epoch": 6.77, "learning_rate": 0.00017214530824754466, "loss": 0.0382, "step": 2120 }, { "epoch": 6.81, "learning_rate": 0.00017190864986392143, "loss": 0.03, "step": 2130 }, { "epoch": 6.84, "learning_rate": 0.0001716719914802982, "loss": 0.035, "step": 2140 }, { "epoch": 6.87, "learning_rate": 0.00017143533309667497, "loss": 0.1244, "step": 2150 }, { "epoch": 6.9, "learning_rate": 0.00017119867471305172, "loss": 0.1426, "step": 2160 }, { "epoch": 6.93, "learning_rate": 0.00017096201632942849, "loss": 0.0794, "step": 2170 }, { "epoch": 6.96, "learning_rate": 0.00017072535794580526, "loss": 0.1954, "step": 2180 }, { "epoch": 7.0, "learning_rate": 0.000170488699562182, "loss": 0.0466, "step": 2190 }, { "epoch": 7.0, "eval_accuracy": 0.955, "eval_loss": 0.15742188692092896, "eval_runtime": 6.8368, "eval_samples_per_second": 58.507, "eval_steps_per_second": 7.313, "step": 2191 }, { "epoch": 7.03, "learning_rate": 0.00017025204117855874, "loss": 0.0528, "step": 2200 }, { "epoch": 7.06, "learning_rate": 0.0001700153827949355, "loss": 0.0362, "step": 2210 }, { "epoch": 7.09, "learning_rate": 0.00016977872441131228, "loss": 0.1156, "step": 2220 }, { "epoch": 7.12, "learning_rate": 0.00016954206602768902, "loss": 0.0595, "step": 2230 }, { "epoch": 7.16, "learning_rate": 0.0001693054076440658, "loss": 0.0263, "step": 2240 }, { "epoch": 7.19, "learning_rate": 0.00016906874926044256, "loss": 0.071, "step": 2250 }, { "epoch": 7.22, "learning_rate": 0.00016883209087681933, "loss": 0.0282, "step": 2260 }, { "epoch": 7.25, "learning_rate": 0.00016859543249319608, "loss": 0.0367, "step": 2270 }, { "epoch": 7.28, "learning_rate": 0.00016835877410957285, "loss": 0.0475, "step": 2280 }, { "epoch": 7.32, "learning_rate": 0.0001681221157259496, "loss": 0.0636, "step": 2290 }, { "epoch": 7.35, "learning_rate": 0.00016788545734232636, "loss": 0.0471, "step": 2300 }, { "epoch": 7.38, "learning_rate": 0.0001676487989587031, "loss": 0.0322, "step": 2310 }, { "epoch": 7.41, "learning_rate": 0.00016741214057507987, "loss": 0.0763, "step": 2320 }, { "epoch": 7.44, "learning_rate": 0.00016717548219145664, "loss": 0.1087, "step": 2330 }, { "epoch": 7.48, "learning_rate": 0.0001669388238078334, "loss": 0.0612, "step": 2340 }, { "epoch": 7.51, "learning_rate": 0.00016670216542421016, "loss": 0.0327, "step": 2350 }, { "epoch": 7.54, "learning_rate": 0.00016646550704058693, "loss": 0.0609, "step": 2360 }, { "epoch": 7.57, "learning_rate": 0.0001662288486569637, "loss": 0.0506, "step": 2370 }, { "epoch": 7.6, "learning_rate": 0.00016599219027334044, "loss": 0.031, "step": 2380 }, { "epoch": 7.64, "learning_rate": 0.0001657555318897172, "loss": 0.0729, "step": 2390 }, { "epoch": 7.67, "learning_rate": 0.00016551887350609395, "loss": 0.0224, "step": 2400 }, { "epoch": 7.7, "learning_rate": 0.00016528221512247072, "loss": 0.0306, "step": 2410 }, { "epoch": 7.73, "learning_rate": 0.00016504555673884746, "loss": 0.0286, "step": 2420 }, { "epoch": 7.76, "learning_rate": 0.00016480889835522423, "loss": 0.075, "step": 2430 }, { "epoch": 7.8, "learning_rate": 0.000164572239971601, "loss": 0.074, "step": 2440 }, { "epoch": 7.83, "learning_rate": 0.00016433558158797777, "loss": 0.1059, "step": 2450 }, { "epoch": 7.86, "learning_rate": 0.00016409892320435452, "loss": 0.078, "step": 2460 }, { "epoch": 7.89, "learning_rate": 0.00016386226482073129, "loss": 0.0753, "step": 2470 }, { "epoch": 7.92, "learning_rate": 0.00016362560643710806, "loss": 0.0698, "step": 2480 }, { "epoch": 7.96, "learning_rate": 0.0001633889480534848, "loss": 0.1025, "step": 2490 }, { "epoch": 7.99, "learning_rate": 0.00016315228966986157, "loss": 0.1343, "step": 2500 }, { "epoch": 8.0, "eval_accuracy": 0.955, "eval_loss": 0.2272537797689438, "eval_runtime": 5.1085, "eval_samples_per_second": 78.302, "eval_steps_per_second": 9.788, "step": 2504 }, { "epoch": 8.02, "learning_rate": 0.0001629156312862383, "loss": 0.038, "step": 2510 }, { "epoch": 8.05, "learning_rate": 0.00016267897290261508, "loss": 0.0652, "step": 2520 }, { "epoch": 8.08, "learning_rate": 0.00016244231451899182, "loss": 0.037, "step": 2530 }, { "epoch": 8.12, "learning_rate": 0.0001622056561353686, "loss": 0.0205, "step": 2540 }, { "epoch": 8.15, "learning_rate": 0.00016196899775174536, "loss": 0.0323, "step": 2550 }, { "epoch": 8.18, "learning_rate": 0.00016173233936812213, "loss": 0.0932, "step": 2560 }, { "epoch": 8.21, "learning_rate": 0.00016149568098449888, "loss": 0.0683, "step": 2570 }, { "epoch": 8.24, "learning_rate": 0.00016125902260087565, "loss": 0.0637, "step": 2580 }, { "epoch": 8.27, "learning_rate": 0.00016102236421725242, "loss": 0.0249, "step": 2590 }, { "epoch": 8.31, "learning_rate": 0.0001607857058336292, "loss": 0.132, "step": 2600 }, { "epoch": 8.34, "learning_rate": 0.00016054904745000593, "loss": 0.0318, "step": 2610 }, { "epoch": 8.37, "learning_rate": 0.00016031238906638267, "loss": 0.0479, "step": 2620 }, { "epoch": 8.4, "learning_rate": 0.00016007573068275944, "loss": 0.0313, "step": 2630 }, { "epoch": 8.43, "learning_rate": 0.00015983907229913619, "loss": 0.0409, "step": 2640 }, { "epoch": 8.47, "learning_rate": 0.00015960241391551296, "loss": 0.038, "step": 2650 }, { "epoch": 8.5, "learning_rate": 0.00015936575553188973, "loss": 0.0527, "step": 2660 }, { "epoch": 8.53, "learning_rate": 0.0001591290971482665, "loss": 0.0792, "step": 2670 }, { "epoch": 8.56, "learning_rate": 0.00015889243876464324, "loss": 0.0708, "step": 2680 }, { "epoch": 8.59, "learning_rate": 0.00015865578038102, "loss": 0.0673, "step": 2690 }, { "epoch": 8.63, "learning_rate": 0.00015841912199739678, "loss": 0.0326, "step": 2700 }, { "epoch": 8.66, "learning_rate": 0.00015818246361377352, "loss": 0.071, "step": 2710 }, { "epoch": 8.69, "learning_rate": 0.0001579458052301503, "loss": 0.1203, "step": 2720 }, { "epoch": 8.72, "learning_rate": 0.00015770914684652703, "loss": 0.0396, "step": 2730 }, { "epoch": 8.75, "learning_rate": 0.0001574724884629038, "loss": 0.0379, "step": 2740 }, { "epoch": 8.79, "learning_rate": 0.00015723583007928055, "loss": 0.0451, "step": 2750 }, { "epoch": 8.82, "learning_rate": 0.00015699917169565732, "loss": 0.0135, "step": 2760 }, { "epoch": 8.85, "learning_rate": 0.0001567625133120341, "loss": 0.0331, "step": 2770 }, { "epoch": 8.88, "learning_rate": 0.00015652585492841086, "loss": 0.0789, "step": 2780 }, { "epoch": 8.91, "learning_rate": 0.0001562891965447876, "loss": 0.0183, "step": 2790 }, { "epoch": 8.95, "learning_rate": 0.00015605253816116437, "loss": 0.0287, "step": 2800 }, { "epoch": 8.98, "learning_rate": 0.00015581587977754114, "loss": 0.0721, "step": 2810 }, { "epoch": 9.0, "eval_accuracy": 0.985, "eval_loss": 0.07111229002475739, "eval_runtime": 6.7619, "eval_samples_per_second": 59.155, "eval_steps_per_second": 7.394, "step": 2817 }, { "epoch": 9.01, "learning_rate": 0.00015557922139391788, "loss": 0.0057, "step": 2820 }, { "epoch": 9.04, "learning_rate": 0.00015534256301029463, "loss": 0.0627, "step": 2830 }, { "epoch": 9.07, "learning_rate": 0.0001551059046266714, "loss": 0.0825, "step": 2840 }, { "epoch": 9.11, "learning_rate": 0.00015486924624304817, "loss": 0.0128, "step": 2850 }, { "epoch": 9.14, "learning_rate": 0.00015463258785942494, "loss": 0.011, "step": 2860 }, { "epoch": 9.17, "learning_rate": 0.00015439592947580168, "loss": 0.0174, "step": 2870 }, { "epoch": 9.2, "learning_rate": 0.00015415927109217845, "loss": 0.0184, "step": 2880 }, { "epoch": 9.23, "learning_rate": 0.00015392261270855522, "loss": 0.0184, "step": 2890 }, { "epoch": 9.27, "learning_rate": 0.00015368595432493196, "loss": 0.0292, "step": 2900 }, { "epoch": 9.3, "learning_rate": 0.00015344929594130873, "loss": 0.0538, "step": 2910 }, { "epoch": 9.33, "learning_rate": 0.0001532126375576855, "loss": 0.0304, "step": 2920 }, { "epoch": 9.36, "learning_rate": 0.00015297597917406224, "loss": 0.0354, "step": 2930 }, { "epoch": 9.39, "learning_rate": 0.000152739320790439, "loss": 0.038, "step": 2940 }, { "epoch": 9.42, "learning_rate": 0.00015250266240681576, "loss": 0.0028, "step": 2950 }, { "epoch": 9.46, "learning_rate": 0.00015226600402319253, "loss": 0.0448, "step": 2960 }, { "epoch": 9.49, "learning_rate": 0.0001520293456395693, "loss": 0.041, "step": 2970 }, { "epoch": 9.52, "learning_rate": 0.00015179268725594604, "loss": 0.0441, "step": 2980 }, { "epoch": 9.55, "learning_rate": 0.0001515560288723228, "loss": 0.0047, "step": 2990 }, { "epoch": 9.58, "learning_rate": 0.00015131937048869958, "loss": 0.0098, "step": 3000 }, { "epoch": 9.62, "learning_rate": 0.00015108271210507635, "loss": 0.0495, "step": 3010 }, { "epoch": 9.65, "learning_rate": 0.0001508460537214531, "loss": 0.0019, "step": 3020 }, { "epoch": 9.68, "learning_rate": 0.00015060939533782986, "loss": 0.0681, "step": 3030 }, { "epoch": 9.71, "learning_rate": 0.0001503727369542066, "loss": 0.0154, "step": 3040 }, { "epoch": 9.74, "learning_rate": 0.00015013607857058335, "loss": 0.033, "step": 3050 }, { "epoch": 9.78, "learning_rate": 0.00014989942018696012, "loss": 0.0211, "step": 3060 }, { "epoch": 9.81, "learning_rate": 0.0001496627618033369, "loss": 0.0246, "step": 3070 }, { "epoch": 9.84, "learning_rate": 0.00014942610341971366, "loss": 0.0153, "step": 3080 }, { "epoch": 9.87, "learning_rate": 0.0001491894450360904, "loss": 0.0035, "step": 3090 }, { "epoch": 9.9, "learning_rate": 0.00014895278665246717, "loss": 0.1033, "step": 3100 }, { "epoch": 9.94, "learning_rate": 0.00014871612826884394, "loss": 0.0397, "step": 3110 }, { "epoch": 9.97, "learning_rate": 0.0001484794698852207, "loss": 0.0303, "step": 3120 }, { "epoch": 10.0, "learning_rate": 0.00014824281150159745, "loss": 0.0023, "step": 3130 }, { "epoch": 10.0, "eval_accuracy": 0.9775, "eval_loss": 0.16127361357212067, "eval_runtime": 5.6352, "eval_samples_per_second": 70.983, "eval_steps_per_second": 8.873, "step": 3130 }, { "epoch": 10.03, "learning_rate": 0.00014800615311797422, "loss": 0.0019, "step": 3140 }, { "epoch": 10.06, "learning_rate": 0.00014776949473435097, "loss": 0.0403, "step": 3150 }, { "epoch": 10.1, "learning_rate": 0.0001475328363507277, "loss": 0.0242, "step": 3160 }, { "epoch": 10.13, "learning_rate": 0.00014729617796710448, "loss": 0.0859, "step": 3170 }, { "epoch": 10.16, "learning_rate": 0.00014705951958348125, "loss": 0.0928, "step": 3180 }, { "epoch": 10.19, "learning_rate": 0.00014682286119985802, "loss": 0.0374, "step": 3190 }, { "epoch": 10.22, "learning_rate": 0.00014658620281623476, "loss": 0.0535, "step": 3200 }, { "epoch": 10.26, "learning_rate": 0.00014634954443261153, "loss": 0.0237, "step": 3210 }, { "epoch": 10.29, "learning_rate": 0.0001461128860489883, "loss": 0.0512, "step": 3220 }, { "epoch": 10.32, "learning_rate": 0.00014587622766536507, "loss": 0.0284, "step": 3230 }, { "epoch": 10.35, "learning_rate": 0.00014563956928174182, "loss": 0.014, "step": 3240 }, { "epoch": 10.38, "learning_rate": 0.00014540291089811856, "loss": 0.0281, "step": 3250 }, { "epoch": 10.42, "learning_rate": 0.00014516625251449533, "loss": 0.0441, "step": 3260 }, { "epoch": 10.45, "learning_rate": 0.0001449295941308721, "loss": 0.0165, "step": 3270 }, { "epoch": 10.48, "learning_rate": 0.00014469293574724884, "loss": 0.0239, "step": 3280 }, { "epoch": 10.51, "learning_rate": 0.0001444562773636256, "loss": 0.0202, "step": 3290 }, { "epoch": 10.54, "learning_rate": 0.00014421961898000238, "loss": 0.0323, "step": 3300 }, { "epoch": 10.58, "learning_rate": 0.00014398296059637912, "loss": 0.0383, "step": 3310 }, { "epoch": 10.61, "learning_rate": 0.0001437463022127559, "loss": 0.0405, "step": 3320 }, { "epoch": 10.64, "learning_rate": 0.00014350964382913266, "loss": 0.0575, "step": 3330 }, { "epoch": 10.67, "learning_rate": 0.00014327298544550943, "loss": 0.0177, "step": 3340 }, { "epoch": 10.7, "learning_rate": 0.00014303632706188618, "loss": 0.015, "step": 3350 }, { "epoch": 10.73, "learning_rate": 0.00014279966867826292, "loss": 0.0085, "step": 3360 }, { "epoch": 10.77, "learning_rate": 0.0001425630102946397, "loss": 0.0035, "step": 3370 }, { "epoch": 10.8, "learning_rate": 0.00014232635191101646, "loss": 0.035, "step": 3380 }, { "epoch": 10.83, "learning_rate": 0.0001420896935273932, "loss": 0.0257, "step": 3390 }, { "epoch": 10.86, "learning_rate": 0.00014185303514376997, "loss": 0.0435, "step": 3400 }, { "epoch": 10.89, "learning_rate": 0.00014161637676014674, "loss": 0.0202, "step": 3410 }, { "epoch": 10.93, "learning_rate": 0.00014137971837652349, "loss": 0.0196, "step": 3420 }, { "epoch": 10.96, "learning_rate": 0.00014114305999290026, "loss": 0.0119, "step": 3430 }, { "epoch": 10.99, "learning_rate": 0.00014090640160927703, "loss": 0.0037, "step": 3440 }, { "epoch": 11.0, "eval_accuracy": 0.9875, "eval_loss": 0.06855019927024841, "eval_runtime": 6.4278, "eval_samples_per_second": 62.229, "eval_steps_per_second": 7.779, "step": 3443 }, { "epoch": 11.02, "learning_rate": 0.0001406697432256538, "loss": 0.0182, "step": 3450 }, { "epoch": 11.05, "learning_rate": 0.00014043308484203054, "loss": 0.0152, "step": 3460 }, { "epoch": 11.09, "learning_rate": 0.00014019642645840728, "loss": 0.0647, "step": 3470 }, { "epoch": 11.12, "learning_rate": 0.00013995976807478405, "loss": 0.0197, "step": 3480 }, { "epoch": 11.15, "learning_rate": 0.00013972310969116082, "loss": 0.0118, "step": 3490 }, { "epoch": 11.18, "learning_rate": 0.00013948645130753756, "loss": 0.0111, "step": 3500 }, { "epoch": 11.21, "learning_rate": 0.00013924979292391433, "loss": 0.0521, "step": 3510 }, { "epoch": 11.25, "learning_rate": 0.0001390131345402911, "loss": 0.024, "step": 3520 }, { "epoch": 11.28, "learning_rate": 0.00013877647615666787, "loss": 0.0609, "step": 3530 }, { "epoch": 11.31, "learning_rate": 0.00013853981777304462, "loss": 0.0066, "step": 3540 }, { "epoch": 11.34, "learning_rate": 0.0001383031593894214, "loss": 0.0029, "step": 3550 }, { "epoch": 11.37, "learning_rate": 0.00013806650100579813, "loss": 0.0616, "step": 3560 }, { "epoch": 11.41, "learning_rate": 0.0001378298426221749, "loss": 0.026, "step": 3570 }, { "epoch": 11.44, "learning_rate": 0.00013759318423855164, "loss": 0.0362, "step": 3580 }, { "epoch": 11.47, "learning_rate": 0.0001373565258549284, "loss": 0.0199, "step": 3590 }, { "epoch": 11.5, "learning_rate": 0.00013711986747130518, "loss": 0.0463, "step": 3600 }, { "epoch": 11.53, "learning_rate": 0.00013688320908768193, "loss": 0.0164, "step": 3610 }, { "epoch": 11.57, "learning_rate": 0.0001366465507040587, "loss": 0.0013, "step": 3620 }, { "epoch": 11.6, "learning_rate": 0.00013640989232043547, "loss": 0.0082, "step": 3630 }, { "epoch": 11.63, "learning_rate": 0.00013617323393681224, "loss": 0.0024, "step": 3640 }, { "epoch": 11.66, "learning_rate": 0.00013593657555318898, "loss": 0.0384, "step": 3650 }, { "epoch": 11.69, "learning_rate": 0.00013569991716956575, "loss": 0.0283, "step": 3660 }, { "epoch": 11.73, "learning_rate": 0.0001354632587859425, "loss": 0.0261, "step": 3670 }, { "epoch": 11.76, "learning_rate": 0.00013522660040231926, "loss": 0.0245, "step": 3680 }, { "epoch": 11.79, "learning_rate": 0.000134989942018696, "loss": 0.0789, "step": 3690 }, { "epoch": 11.82, "learning_rate": 0.00013475328363507277, "loss": 0.0312, "step": 3700 }, { "epoch": 11.85, "learning_rate": 0.00013451662525144954, "loss": 0.053, "step": 3710 }, { "epoch": 11.88, "learning_rate": 0.0001342799668678263, "loss": 0.0202, "step": 3720 }, { "epoch": 11.92, "learning_rate": 0.00013404330848420306, "loss": 0.0081, "step": 3730 }, { "epoch": 11.95, "learning_rate": 0.00013380665010057983, "loss": 0.0511, "step": 3740 }, { "epoch": 11.98, "learning_rate": 0.0001335699917169566, "loss": 0.0369, "step": 3750 }, { "epoch": 12.0, "eval_accuracy": 0.9625, "eval_loss": 0.1564856767654419, "eval_runtime": 5.157, "eval_samples_per_second": 77.564, "eval_steps_per_second": 9.695, "step": 3756 }, { "epoch": 12.01, "learning_rate": 0.00013333333333333334, "loss": 0.0043, "step": 3760 }, { "epoch": 12.04, "learning_rate": 0.0001330966749497101, "loss": 0.0015, "step": 3770 }, { "epoch": 12.08, "learning_rate": 0.00013286001656608685, "loss": 0.0085, "step": 3780 }, { "epoch": 12.11, "learning_rate": 0.00013262335818246362, "loss": 0.0498, "step": 3790 }, { "epoch": 12.14, "learning_rate": 0.00013238669979884037, "loss": 0.0111, "step": 3800 }, { "epoch": 12.17, "learning_rate": 0.00013215004141521714, "loss": 0.0255, "step": 3810 }, { "epoch": 12.2, "learning_rate": 0.0001319133830315939, "loss": 0.0061, "step": 3820 }, { "epoch": 12.24, "learning_rate": 0.00013167672464797065, "loss": 0.0162, "step": 3830 }, { "epoch": 12.27, "learning_rate": 0.00013144006626434742, "loss": 0.0032, "step": 3840 }, { "epoch": 12.3, "learning_rate": 0.0001312034078807242, "loss": 0.0193, "step": 3850 }, { "epoch": 12.33, "learning_rate": 0.00013096674949710096, "loss": 0.0082, "step": 3860 }, { "epoch": 12.36, "learning_rate": 0.0001307300911134777, "loss": 0.0054, "step": 3870 }, { "epoch": 12.4, "learning_rate": 0.00013049343272985447, "loss": 0.0226, "step": 3880 }, { "epoch": 12.43, "learning_rate": 0.00013025677434623121, "loss": 0.0042, "step": 3890 }, { "epoch": 12.46, "learning_rate": 0.00013002011596260798, "loss": 0.0065, "step": 3900 }, { "epoch": 12.49, "learning_rate": 0.00012978345757898473, "loss": 0.008, "step": 3910 }, { "epoch": 12.52, "learning_rate": 0.0001295467991953615, "loss": 0.0209, "step": 3920 }, { "epoch": 12.56, "learning_rate": 0.00012931014081173827, "loss": 0.0192, "step": 3930 }, { "epoch": 12.59, "learning_rate": 0.000129073482428115, "loss": 0.0098, "step": 3940 }, { "epoch": 12.62, "learning_rate": 0.00012883682404449178, "loss": 0.0154, "step": 3950 }, { "epoch": 12.65, "learning_rate": 0.00012860016566086855, "loss": 0.0675, "step": 3960 }, { "epoch": 12.68, "learning_rate": 0.00012836350727724532, "loss": 0.0252, "step": 3970 }, { "epoch": 12.72, "learning_rate": 0.00012812684889362206, "loss": 0.037, "step": 3980 }, { "epoch": 12.75, "learning_rate": 0.00012789019050999883, "loss": 0.0148, "step": 3990 }, { "epoch": 12.78, "learning_rate": 0.00012765353212637558, "loss": 0.0041, "step": 4000 }, { "epoch": 12.81, "learning_rate": 0.00012741687374275235, "loss": 0.0283, "step": 4010 }, { "epoch": 12.84, "learning_rate": 0.0001271802153591291, "loss": 0.0428, "step": 4020 }, { "epoch": 12.88, "learning_rate": 0.00012694355697550586, "loss": 0.0165, "step": 4030 }, { "epoch": 12.91, "learning_rate": 0.00012670689859188263, "loss": 0.0263, "step": 4040 }, { "epoch": 12.94, "learning_rate": 0.0001264702402082594, "loss": 0.0365, "step": 4050 }, { "epoch": 12.97, "learning_rate": 0.00012623358182463614, "loss": 0.0016, "step": 4060 }, { "epoch": 13.0, "eval_accuracy": 0.9725, "eval_loss": 0.14009271562099457, "eval_runtime": 6.8166, "eval_samples_per_second": 58.68, "eval_steps_per_second": 7.335, "step": 4069 }, { "epoch": 13.0, "learning_rate": 0.0001259969234410129, "loss": 0.0087, "step": 4070 }, { "epoch": 13.04, "learning_rate": 0.00012576026505738968, "loss": 0.0028, "step": 4080 }, { "epoch": 13.07, "learning_rate": 0.00012552360667376642, "loss": 0.0354, "step": 4090 }, { "epoch": 13.1, "learning_rate": 0.00012528694829014317, "loss": 0.0096, "step": 4100 }, { "epoch": 13.13, "learning_rate": 0.00012505028990651994, "loss": 0.0302, "step": 4110 }, { "epoch": 13.16, "learning_rate": 0.0001248136315228967, "loss": 0.0679, "step": 4120 }, { "epoch": 13.19, "learning_rate": 0.00012457697313927345, "loss": 0.0313, "step": 4130 }, { "epoch": 13.23, "learning_rate": 0.00012434031475565022, "loss": 0.048, "step": 4140 }, { "epoch": 13.26, "learning_rate": 0.000124103656372027, "loss": 0.014, "step": 4150 }, { "epoch": 13.29, "learning_rate": 0.00012386699798840376, "loss": 0.0143, "step": 4160 }, { "epoch": 13.32, "learning_rate": 0.0001236303396047805, "loss": 0.0013, "step": 4170 }, { "epoch": 13.35, "learning_rate": 0.00012339368122115727, "loss": 0.0005, "step": 4180 }, { "epoch": 13.39, "learning_rate": 0.00012315702283753404, "loss": 0.0017, "step": 4190 }, { "epoch": 13.42, "learning_rate": 0.00012292036445391079, "loss": 0.0012, "step": 4200 }, { "epoch": 13.45, "learning_rate": 0.00012268370607028753, "loss": 0.0055, "step": 4210 }, { "epoch": 13.48, "learning_rate": 0.0001224470476866643, "loss": 0.0103, "step": 4220 }, { "epoch": 13.51, "learning_rate": 0.00012221038930304107, "loss": 0.0095, "step": 4230 }, { "epoch": 13.55, "learning_rate": 0.00012197373091941781, "loss": 0.0175, "step": 4240 }, { "epoch": 13.58, "learning_rate": 0.00012173707253579458, "loss": 0.0155, "step": 4250 }, { "epoch": 13.61, "learning_rate": 0.00012150041415217135, "loss": 0.0169, "step": 4260 }, { "epoch": 13.64, "learning_rate": 0.00012126375576854812, "loss": 0.0419, "step": 4270 }, { "epoch": 13.67, "learning_rate": 0.00012102709738492486, "loss": 0.0281, "step": 4280 }, { "epoch": 13.71, "learning_rate": 0.00012079043900130162, "loss": 0.0124, "step": 4290 }, { "epoch": 13.74, "learning_rate": 0.00012055378061767839, "loss": 0.0054, "step": 4300 }, { "epoch": 13.77, "learning_rate": 0.00012031712223405516, "loss": 0.0682, "step": 4310 }, { "epoch": 13.8, "learning_rate": 0.0001200804638504319, "loss": 0.0139, "step": 4320 }, { "epoch": 13.83, "learning_rate": 0.00011984380546680867, "loss": 0.0018, "step": 4330 }, { "epoch": 13.87, "learning_rate": 0.00011960714708318543, "loss": 0.0258, "step": 4340 }, { "epoch": 13.9, "learning_rate": 0.00011937048869956217, "loss": 0.0194, "step": 4350 }, { "epoch": 13.93, "learning_rate": 0.00011913383031593894, "loss": 0.0132, "step": 4360 }, { "epoch": 13.96, "learning_rate": 0.00011889717193231571, "loss": 0.0024, "step": 4370 }, { "epoch": 13.99, "learning_rate": 0.00011866051354869248, "loss": 0.0006, "step": 4380 }, { "epoch": 14.0, "eval_accuracy": 0.9925, "eval_loss": 0.1054958701133728, "eval_runtime": 5.0787, "eval_samples_per_second": 78.761, "eval_steps_per_second": 9.845, "step": 4382 }, { "epoch": 14.03, "learning_rate": 0.00011842385516506922, "loss": 0.0259, "step": 4390 }, { "epoch": 14.06, "learning_rate": 0.00011818719678144598, "loss": 0.0004, "step": 4400 }, { "epoch": 14.09, "learning_rate": 0.00011795053839782275, "loss": 0.0318, "step": 4410 }, { "epoch": 14.12, "learning_rate": 0.00011771388001419952, "loss": 0.0292, "step": 4420 }, { "epoch": 14.15, "learning_rate": 0.00011747722163057626, "loss": 0.0068, "step": 4430 }, { "epoch": 14.19, "learning_rate": 0.00011724056324695303, "loss": 0.0118, "step": 4440 }, { "epoch": 14.22, "learning_rate": 0.00011700390486332979, "loss": 0.0136, "step": 4450 }, { "epoch": 14.25, "learning_rate": 0.00011676724647970656, "loss": 0.0937, "step": 4460 }, { "epoch": 14.28, "learning_rate": 0.0001165305880960833, "loss": 0.0369, "step": 4470 }, { "epoch": 14.31, "learning_rate": 0.00011629392971246007, "loss": 0.0098, "step": 4480 }, { "epoch": 14.35, "learning_rate": 0.00011605727132883683, "loss": 0.0144, "step": 4490 }, { "epoch": 14.38, "learning_rate": 0.00011582061294521359, "loss": 0.0939, "step": 4500 }, { "epoch": 14.41, "learning_rate": 0.00011558395456159034, "loss": 0.0064, "step": 4510 }, { "epoch": 14.44, "learning_rate": 0.00011534729617796711, "loss": 0.0376, "step": 4520 }, { "epoch": 14.47, "learning_rate": 0.00011511063779434388, "loss": 0.0066, "step": 4530 }, { "epoch": 14.5, "learning_rate": 0.00011487397941072063, "loss": 0.0348, "step": 4540 }, { "epoch": 14.54, "learning_rate": 0.00011463732102709738, "loss": 0.0144, "step": 4550 }, { "epoch": 14.57, "learning_rate": 0.00011440066264347415, "loss": 0.005, "step": 4560 }, { "epoch": 14.6, "learning_rate": 0.00011416400425985092, "loss": 0.0041, "step": 4570 }, { "epoch": 14.63, "learning_rate": 0.00011392734587622766, "loss": 0.086, "step": 4580 }, { "epoch": 14.66, "learning_rate": 0.00011369068749260443, "loss": 0.0031, "step": 4590 }, { "epoch": 14.7, "learning_rate": 0.00011345402910898119, "loss": 0.0158, "step": 4600 }, { "epoch": 14.73, "learning_rate": 0.00011321737072535793, "loss": 0.0056, "step": 4610 }, { "epoch": 14.76, "learning_rate": 0.0001129807123417347, "loss": 0.0021, "step": 4620 }, { "epoch": 14.79, "learning_rate": 0.00011274405395811147, "loss": 0.012, "step": 4630 }, { "epoch": 14.82, "learning_rate": 0.00011250739557448824, "loss": 0.0001, "step": 4640 }, { "epoch": 14.86, "learning_rate": 0.00011227073719086499, "loss": 0.0003, "step": 4650 }, { "epoch": 14.89, "learning_rate": 0.00011203407880724174, "loss": 0.001, "step": 4660 }, { "epoch": 14.92, "learning_rate": 0.00011179742042361851, "loss": 0.0003, "step": 4670 }, { "epoch": 14.95, "learning_rate": 0.00011156076203999528, "loss": 0.0132, "step": 4680 }, { "epoch": 14.98, "learning_rate": 0.00011132410365637203, "loss": 0.0001, "step": 4690 }, { "epoch": 15.0, "eval_accuracy": 0.9925, "eval_loss": 0.08063908666372299, "eval_runtime": 5.1103, "eval_samples_per_second": 78.274, "eval_steps_per_second": 9.784, "step": 4695 }, { "epoch": 15.02, "learning_rate": 0.0001110874452727488, "loss": 0.0015, "step": 4700 }, { "epoch": 15.05, "learning_rate": 0.00011085078688912555, "loss": 0.0056, "step": 4710 }, { "epoch": 15.08, "learning_rate": 0.00011061412850550232, "loss": 0.0119, "step": 4720 }, { "epoch": 15.11, "learning_rate": 0.00011037747012187907, "loss": 0.0031, "step": 4730 }, { "epoch": 15.14, "learning_rate": 0.00011014081173825584, "loss": 0.031, "step": 4740 }, { "epoch": 15.18, "learning_rate": 0.0001099041533546326, "loss": 0.0055, "step": 4750 }, { "epoch": 15.21, "learning_rate": 0.00010966749497100935, "loss": 0.0176, "step": 4760 }, { "epoch": 15.24, "learning_rate": 0.0001094308365873861, "loss": 0.0115, "step": 4770 }, { "epoch": 15.27, "learning_rate": 0.00010919417820376287, "loss": 0.0084, "step": 4780 }, { "epoch": 15.3, "learning_rate": 0.00010895751982013964, "loss": 0.0255, "step": 4790 }, { "epoch": 15.34, "learning_rate": 0.00010872086143651639, "loss": 0.0007, "step": 4800 }, { "epoch": 15.37, "learning_rate": 0.00010848420305289316, "loss": 0.003, "step": 4810 }, { "epoch": 15.4, "learning_rate": 0.00010824754466926991, "loss": 0.0016, "step": 4820 }, { "epoch": 15.43, "learning_rate": 0.00010801088628564668, "loss": 0.0156, "step": 4830 }, { "epoch": 15.46, "learning_rate": 0.00010777422790202343, "loss": 0.0531, "step": 4840 }, { "epoch": 15.5, "learning_rate": 0.0001075375695184002, "loss": 0.0154, "step": 4850 }, { "epoch": 15.53, "learning_rate": 0.00010730091113477697, "loss": 0.0105, "step": 4860 }, { "epoch": 15.56, "learning_rate": 0.00010706425275115371, "loss": 0.0094, "step": 4870 }, { "epoch": 15.59, "learning_rate": 0.00010682759436753047, "loss": 0.0165, "step": 4880 }, { "epoch": 15.62, "learning_rate": 0.00010659093598390724, "loss": 0.061, "step": 4890 }, { "epoch": 15.65, "learning_rate": 0.000106354277600284, "loss": 0.0035, "step": 4900 }, { "epoch": 15.69, "learning_rate": 0.00010611761921666075, "loss": 0.0003, "step": 4910 }, { "epoch": 15.72, "learning_rate": 0.00010588096083303752, "loss": 0.0038, "step": 4920 }, { "epoch": 15.75, "learning_rate": 0.00010564430244941428, "loss": 0.0002, "step": 4930 }, { "epoch": 15.78, "learning_rate": 0.00010540764406579105, "loss": 0.0002, "step": 4940 }, { "epoch": 15.81, "learning_rate": 0.00010517098568216779, "loss": 0.0001, "step": 4950 }, { "epoch": 15.85, "learning_rate": 0.00010493432729854456, "loss": 0.0005, "step": 4960 }, { "epoch": 15.88, "learning_rate": 0.00010469766891492131, "loss": 0.0058, "step": 4970 }, { "epoch": 15.91, "learning_rate": 0.00010446101053129808, "loss": 0.005, "step": 4980 }, { "epoch": 15.94, "learning_rate": 0.00010422435214767483, "loss": 0.0057, "step": 4990 }, { "epoch": 15.97, "learning_rate": 0.0001039876937640516, "loss": 0.0003, "step": 5000 }, { "epoch": 16.0, "eval_accuracy": 0.9925, "eval_loss": 0.08654958754777908, "eval_runtime": 6.5529, "eval_samples_per_second": 61.042, "eval_steps_per_second": 7.63, "step": 5008 }, { "epoch": 16.01, "learning_rate": 0.00010375103538042837, "loss": 0.0026, "step": 5010 }, { "epoch": 16.04, "learning_rate": 0.00010351437699680511, "loss": 0.0072, "step": 5020 }, { "epoch": 16.07, "learning_rate": 0.00010327771861318187, "loss": 0.001, "step": 5030 }, { "epoch": 16.1, "learning_rate": 0.00010304106022955864, "loss": 0.0035, "step": 5040 }, { "epoch": 16.13, "learning_rate": 0.0001028044018459354, "loss": 0.0001, "step": 5050 }, { "epoch": 16.17, "learning_rate": 0.00010259140930067448, "loss": 0.0183, "step": 5060 }, { "epoch": 16.2, "learning_rate": 0.00010235475091705124, "loss": 0.011, "step": 5070 }, { "epoch": 16.23, "learning_rate": 0.00010211809253342801, "loss": 0.0002, "step": 5080 }, { "epoch": 16.26, "learning_rate": 0.00010188143414980475, "loss": 0.0245, "step": 5090 }, { "epoch": 16.29, "learning_rate": 0.00010164477576618152, "loss": 0.0009, "step": 5100 }, { "epoch": 16.33, "learning_rate": 0.00010140811738255829, "loss": 0.0003, "step": 5110 }, { "epoch": 16.36, "learning_rate": 0.00010117145899893504, "loss": 0.0031, "step": 5120 }, { "epoch": 16.39, "learning_rate": 0.00010093480061531179, "loss": 0.0001, "step": 5130 }, { "epoch": 16.42, "learning_rate": 0.00010069814223168856, "loss": 0.0132, "step": 5140 }, { "epoch": 16.45, "learning_rate": 0.00010046148384806533, "loss": 0.0185, "step": 5150 }, { "epoch": 16.49, "learning_rate": 0.00010022482546444207, "loss": 0.0048, "step": 5160 }, { "epoch": 16.52, "learning_rate": 9.998816708081884e-05, "loss": 0.0159, "step": 5170 }, { "epoch": 16.55, "learning_rate": 9.97515086971956e-05, "loss": 0.0681, "step": 5180 }, { "epoch": 16.58, "learning_rate": 9.951485031357236e-05, "loss": 0.0237, "step": 5190 }, { "epoch": 16.61, "learning_rate": 9.927819192994913e-05, "loss": 0.0213, "step": 5200 }, { "epoch": 16.65, "learning_rate": 9.904153354632588e-05, "loss": 0.0139, "step": 5210 }, { "epoch": 16.68, "learning_rate": 9.880487516270265e-05, "loss": 0.0224, "step": 5220 }, { "epoch": 16.71, "learning_rate": 9.85682167790794e-05, "loss": 0.008, "step": 5230 }, { "epoch": 16.74, "learning_rate": 9.833155839545617e-05, "loss": 0.0013, "step": 5240 }, { "epoch": 16.77, "learning_rate": 9.809490001183292e-05, "loss": 0.0317, "step": 5250 }, { "epoch": 16.81, "learning_rate": 9.785824162820968e-05, "loss": 0.0001, "step": 5260 }, { "epoch": 16.84, "learning_rate": 9.762158324458645e-05, "loss": 0.0058, "step": 5270 }, { "epoch": 16.87, "learning_rate": 9.73849248609632e-05, "loss": 0.0044, "step": 5280 }, { "epoch": 16.9, "learning_rate": 9.714826647733996e-05, "loss": 0.0175, "step": 5290 }, { "epoch": 16.93, "learning_rate": 9.691160809371672e-05, "loss": 0.0028, "step": 5300 }, { "epoch": 16.96, "learning_rate": 9.667494971009349e-05, "loss": 0.0001, "step": 5310 }, { "epoch": 17.0, "learning_rate": 9.643829132647025e-05, "loss": 0.0035, "step": 5320 }, { "epoch": 17.0, "eval_accuracy": 0.9825, "eval_loss": 0.09907230734825134, "eval_runtime": 5.2571, "eval_samples_per_second": 76.088, "eval_steps_per_second": 9.511, "step": 5321 }, { "epoch": 17.03, "learning_rate": 9.620163294284702e-05, "loss": 0.0286, "step": 5330 }, { "epoch": 17.06, "learning_rate": 9.596497455922376e-05, "loss": 0.0002, "step": 5340 }, { "epoch": 17.09, "learning_rate": 9.572831617560053e-05, "loss": 0.0009, "step": 5350 }, { "epoch": 17.12, "learning_rate": 9.549165779197728e-05, "loss": 0.0016, "step": 5360 }, { "epoch": 17.16, "learning_rate": 9.525499940835405e-05, "loss": 0.0021, "step": 5370 }, { "epoch": 17.19, "learning_rate": 9.501834102473081e-05, "loss": 0.0001, "step": 5380 }, { "epoch": 17.22, "learning_rate": 9.478168264110757e-05, "loss": 0.0012, "step": 5390 }, { "epoch": 17.25, "learning_rate": 9.454502425748432e-05, "loss": 0.0209, "step": 5400 }, { "epoch": 17.28, "learning_rate": 9.430836587386108e-05, "loss": 0.0003, "step": 5410 }, { "epoch": 17.32, "learning_rate": 9.407170749023785e-05, "loss": 0.0002, "step": 5420 }, { "epoch": 17.35, "learning_rate": 9.38350491066146e-05, "loss": 0.0002, "step": 5430 }, { "epoch": 17.38, "learning_rate": 9.359839072299136e-05, "loss": 0.0076, "step": 5440 }, { "epoch": 17.41, "learning_rate": 9.336173233936812e-05, "loss": 0.0001, "step": 5450 }, { "epoch": 17.44, "learning_rate": 9.312507395574489e-05, "loss": 0.0001, "step": 5460 }, { "epoch": 17.48, "learning_rate": 9.288841557212165e-05, "loss": 0.0026, "step": 5470 }, { "epoch": 17.51, "learning_rate": 9.265175718849842e-05, "loss": 0.0004, "step": 5480 }, { "epoch": 17.54, "learning_rate": 9.241509880487517e-05, "loss": 0.0, "step": 5490 }, { "epoch": 17.57, "learning_rate": 9.217844042125193e-05, "loss": 0.0017, "step": 5500 }, { "epoch": 17.6, "learning_rate": 9.194178203762869e-05, "loss": 0.0119, "step": 5510 }, { "epoch": 17.64, "learning_rate": 9.170512365400544e-05, "loss": 0.0033, "step": 5520 }, { "epoch": 17.67, "learning_rate": 9.146846527038221e-05, "loss": 0.0004, "step": 5530 }, { "epoch": 17.7, "learning_rate": 9.123180688675897e-05, "loss": 0.0104, "step": 5540 }, { "epoch": 17.73, "learning_rate": 9.099514850313572e-05, "loss": 0.0018, "step": 5550 }, { "epoch": 17.76, "learning_rate": 9.075849011951248e-05, "loss": 0.0203, "step": 5560 }, { "epoch": 17.8, "learning_rate": 9.052183173588925e-05, "loss": 0.0005, "step": 5570 }, { "epoch": 17.83, "learning_rate": 9.028517335226601e-05, "loss": 0.0108, "step": 5580 }, { "epoch": 17.86, "learning_rate": 9.004851496864278e-05, "loss": 0.0192, "step": 5590 }, { "epoch": 17.89, "learning_rate": 8.981185658501953e-05, "loss": 0.0023, "step": 5600 }, { "epoch": 17.92, "learning_rate": 8.957519820139629e-05, "loss": 0.0508, "step": 5610 }, { "epoch": 17.96, "learning_rate": 8.933853981777305e-05, "loss": 0.0007, "step": 5620 }, { "epoch": 17.99, "learning_rate": 8.910188143414982e-05, "loss": 0.0001, "step": 5630 }, { "epoch": 18.0, "eval_accuracy": 0.9875, "eval_loss": 0.12689071893692017, "eval_runtime": 6.9648, "eval_samples_per_second": 57.431, "eval_steps_per_second": 7.179, "step": 5634 }, { "epoch": 18.02, "learning_rate": 8.886522305052657e-05, "loss": 0.001, "step": 5640 }, { "epoch": 18.05, "learning_rate": 8.862856466690333e-05, "loss": 0.0042, "step": 5650 }, { "epoch": 18.08, "learning_rate": 8.839190628328009e-05, "loss": 0.0254, "step": 5660 }, { "epoch": 18.12, "learning_rate": 8.815524789965684e-05, "loss": 0.0141, "step": 5670 }, { "epoch": 18.15, "learning_rate": 8.791858951603361e-05, "loss": 0.0008, "step": 5680 }, { "epoch": 18.18, "learning_rate": 8.768193113241037e-05, "loss": 0.0015, "step": 5690 }, { "epoch": 18.21, "learning_rate": 8.744527274878714e-05, "loss": 0.0, "step": 5700 }, { "epoch": 18.24, "learning_rate": 8.720861436516388e-05, "loss": 0.0038, "step": 5710 }, { "epoch": 18.27, "learning_rate": 8.697195598154065e-05, "loss": 0.0111, "step": 5720 }, { "epoch": 18.31, "learning_rate": 8.673529759791741e-05, "loss": 0.001, "step": 5730 }, { "epoch": 18.34, "learning_rate": 8.649863921429418e-05, "loss": 0.0003, "step": 5740 }, { "epoch": 18.37, "learning_rate": 8.626198083067093e-05, "loss": 0.0009, "step": 5750 }, { "epoch": 18.4, "learning_rate": 8.602532244704769e-05, "loss": 0.0002, "step": 5760 }, { "epoch": 18.43, "learning_rate": 8.578866406342445e-05, "loss": 0.0002, "step": 5770 }, { "epoch": 18.47, "learning_rate": 8.55520056798012e-05, "loss": 0.0001, "step": 5780 }, { "epoch": 18.5, "learning_rate": 8.531534729617797e-05, "loss": 0.0117, "step": 5790 }, { "epoch": 18.53, "learning_rate": 8.507868891255473e-05, "loss": 0.0009, "step": 5800 }, { "epoch": 18.56, "learning_rate": 8.48420305289315e-05, "loss": 0.0185, "step": 5810 }, { "epoch": 18.59, "learning_rate": 8.460537214530824e-05, "loss": 0.001, "step": 5820 }, { "epoch": 18.63, "learning_rate": 8.436871376168501e-05, "loss": 0.0014, "step": 5830 }, { "epoch": 18.66, "learning_rate": 8.413205537806177e-05, "loss": 0.0001, "step": 5840 }, { "epoch": 18.69, "learning_rate": 8.389539699443854e-05, "loss": 0.0014, "step": 5850 }, { "epoch": 18.72, "learning_rate": 8.36587386108153e-05, "loss": 0.0001, "step": 5860 }, { "epoch": 18.75, "learning_rate": 8.342208022719205e-05, "loss": 0.0053, "step": 5870 }, { "epoch": 18.79, "learning_rate": 8.318542184356881e-05, "loss": 0.0055, "step": 5880 }, { "epoch": 18.82, "learning_rate": 8.294876345994558e-05, "loss": 0.0, "step": 5890 }, { "epoch": 18.85, "learning_rate": 8.271210507632233e-05, "loss": 0.0011, "step": 5900 }, { "epoch": 18.88, "learning_rate": 8.247544669269909e-05, "loss": 0.0, "step": 5910 }, { "epoch": 18.91, "learning_rate": 8.223878830907585e-05, "loss": 0.0003, "step": 5920 }, { "epoch": 18.95, "learning_rate": 8.20021299254526e-05, "loss": 0.0001, "step": 5930 }, { "epoch": 18.98, "learning_rate": 8.176547154182937e-05, "loss": 0.0088, "step": 5940 }, { "epoch": 19.0, "eval_accuracy": 0.9925, "eval_loss": 0.10001015663146973, "eval_runtime": 5.2536, "eval_samples_per_second": 76.138, "eval_steps_per_second": 9.517, "step": 5947 }, { "epoch": 19.01, "learning_rate": 8.152881315820613e-05, "loss": 0.0001, "step": 5950 }, { "epoch": 19.04, "learning_rate": 8.12921547745829e-05, "loss": 0.0, "step": 5960 }, { "epoch": 19.07, "learning_rate": 8.105549639095966e-05, "loss": 0.0045, "step": 5970 }, { "epoch": 19.11, "learning_rate": 8.081883800733641e-05, "loss": 0.0, "step": 5980 }, { "epoch": 19.14, "learning_rate": 8.058217962371317e-05, "loss": 0.0, "step": 5990 }, { "epoch": 19.17, "learning_rate": 8.034552124008994e-05, "loss": 0.0, "step": 6000 }, { "epoch": 19.2, "learning_rate": 8.01088628564667e-05, "loss": 0.0, "step": 6010 }, { "epoch": 19.23, "learning_rate": 7.987220447284345e-05, "loss": 0.0, "step": 6020 }, { "epoch": 19.27, "learning_rate": 7.963554608922021e-05, "loss": 0.0, "step": 6030 }, { "epoch": 19.3, "learning_rate": 7.939888770559697e-05, "loss": 0.0, "step": 6040 }, { "epoch": 19.33, "learning_rate": 7.916222932197374e-05, "loss": 0.0005, "step": 6050 }, { "epoch": 19.36, "learning_rate": 7.892557093835049e-05, "loss": 0.0, "step": 6060 }, { "epoch": 19.39, "learning_rate": 7.868891255472726e-05, "loss": 0.0224, "step": 6070 }, { "epoch": 19.42, "learning_rate": 7.845225417110402e-05, "loss": 0.0, "step": 6080 }, { "epoch": 19.46, "learning_rate": 7.821559578748077e-05, "loss": 0.0, "step": 6090 }, { "epoch": 19.49, "learning_rate": 7.797893740385753e-05, "loss": 0.0, "step": 6100 }, { "epoch": 19.52, "learning_rate": 7.77422790202343e-05, "loss": 0.0, "step": 6110 }, { "epoch": 19.55, "learning_rate": 7.750562063661106e-05, "loss": 0.0, "step": 6120 }, { "epoch": 19.58, "learning_rate": 7.726896225298781e-05, "loss": 0.0, "step": 6130 }, { "epoch": 19.62, "learning_rate": 7.703230386936457e-05, "loss": 0.0, "step": 6140 }, { "epoch": 19.65, "learning_rate": 7.679564548574134e-05, "loss": 0.0005, "step": 6150 }, { "epoch": 19.68, "learning_rate": 7.65589871021181e-05, "loss": 0.0001, "step": 6160 }, { "epoch": 19.71, "learning_rate": 7.632232871849485e-05, "loss": 0.0, "step": 6170 }, { "epoch": 19.74, "learning_rate": 7.608567033487162e-05, "loss": 0.0, "step": 6180 }, { "epoch": 19.78, "learning_rate": 7.584901195124837e-05, "loss": 0.0005, "step": 6190 }, { "epoch": 19.81, "learning_rate": 7.561235356762514e-05, "loss": 0.0001, "step": 6200 }, { "epoch": 19.84, "learning_rate": 7.537569518400189e-05, "loss": 0.0001, "step": 6210 }, { "epoch": 19.87, "learning_rate": 7.513903680037866e-05, "loss": 0.0064, "step": 6220 }, { "epoch": 19.9, "learning_rate": 7.490237841675542e-05, "loss": 0.0204, "step": 6230 }, { "epoch": 19.94, "learning_rate": 7.466572003313218e-05, "loss": 0.0061, "step": 6240 }, { "epoch": 19.97, "learning_rate": 7.442906164950893e-05, "loss": 0.0001, "step": 6250 }, { "epoch": 20.0, "learning_rate": 7.41924032658857e-05, "loss": 0.0, "step": 6260 }, { "epoch": 20.0, "eval_accuracy": 0.99, "eval_loss": 0.09493375569581985, "eval_runtime": 6.1355, "eval_samples_per_second": 65.194, "eval_steps_per_second": 8.149, "step": 6260 }, { "epoch": 20.03, "learning_rate": 7.395574488226246e-05, "loss": 0.0001, "step": 6270 }, { "epoch": 20.06, "learning_rate": 7.371908649863923e-05, "loss": 0.0442, "step": 6280 }, { "epoch": 20.1, "learning_rate": 7.348242811501597e-05, "loss": 0.0001, "step": 6290 }, { "epoch": 20.13, "learning_rate": 7.324576973139273e-05, "loss": 0.0, "step": 6300 }, { "epoch": 20.16, "learning_rate": 7.30091113477695e-05, "loss": 0.0011, "step": 6310 }, { "epoch": 20.19, "learning_rate": 7.277245296414625e-05, "loss": 0.0, "step": 6320 }, { "epoch": 20.22, "learning_rate": 7.253579458052302e-05, "loss": 0.0, "step": 6330 }, { "epoch": 20.26, "learning_rate": 7.229913619689978e-05, "loss": 0.0, "step": 6340 }, { "epoch": 20.29, "learning_rate": 7.206247781327654e-05, "loss": 0.0016, "step": 6350 }, { "epoch": 20.32, "learning_rate": 7.182581942965329e-05, "loss": 0.0, "step": 6360 }, { "epoch": 20.35, "learning_rate": 7.158916104603006e-05, "loss": 0.0, "step": 6370 }, { "epoch": 20.38, "learning_rate": 7.135250266240682e-05, "loss": 0.0, "step": 6380 }, { "epoch": 20.42, "learning_rate": 7.111584427878359e-05, "loss": 0.0071, "step": 6390 }, { "epoch": 20.45, "learning_rate": 7.087918589516033e-05, "loss": 0.0, "step": 6400 }, { "epoch": 20.48, "learning_rate": 7.06425275115371e-05, "loss": 0.0014, "step": 6410 }, { "epoch": 20.51, "learning_rate": 7.040586912791386e-05, "loss": 0.0077, "step": 6420 }, { "epoch": 20.54, "learning_rate": 7.016921074429062e-05, "loss": 0.0022, "step": 6430 }, { "epoch": 20.58, "learning_rate": 6.993255236066739e-05, "loss": 0.0004, "step": 6440 }, { "epoch": 20.61, "learning_rate": 6.969589397704414e-05, "loss": 0.0, "step": 6450 }, { "epoch": 20.64, "learning_rate": 6.94592355934209e-05, "loss": 0.0, "step": 6460 }, { "epoch": 20.67, "learning_rate": 6.922257720979765e-05, "loss": 0.0014, "step": 6470 }, { "epoch": 20.7, "learning_rate": 6.898591882617442e-05, "loss": 0.0102, "step": 6480 }, { "epoch": 20.73, "learning_rate": 6.874926044255118e-05, "loss": 0.0305, "step": 6490 }, { "epoch": 20.77, "learning_rate": 6.851260205892794e-05, "loss": 0.0079, "step": 6500 }, { "epoch": 20.8, "learning_rate": 6.82759436753047e-05, "loss": 0.0, "step": 6510 }, { "epoch": 20.83, "learning_rate": 6.803928529168146e-05, "loss": 0.0208, "step": 6520 }, { "epoch": 20.86, "learning_rate": 6.780262690805822e-05, "loss": 0.0001, "step": 6530 }, { "epoch": 20.89, "learning_rate": 6.756596852443499e-05, "loss": 0.0, "step": 6540 }, { "epoch": 20.93, "learning_rate": 6.732931014081175e-05, "loss": 0.0001, "step": 6550 }, { "epoch": 20.96, "learning_rate": 6.70926517571885e-05, "loss": 0.0001, "step": 6560 }, { "epoch": 20.99, "learning_rate": 6.685599337356526e-05, "loss": 0.0, "step": 6570 }, { "epoch": 21.0, "eval_accuracy": 0.985, "eval_loss": 0.1263246089220047, "eval_runtime": 5.756, "eval_samples_per_second": 69.493, "eval_steps_per_second": 8.687, "step": 6573 }, { "epoch": 21.02, "learning_rate": 6.661933498994202e-05, "loss": 0.0, "step": 6580 }, { "epoch": 21.05, "learning_rate": 6.638267660631879e-05, "loss": 0.0015, "step": 6590 }, { "epoch": 21.09, "learning_rate": 6.614601822269554e-05, "loss": 0.0168, "step": 6600 }, { "epoch": 21.12, "learning_rate": 6.59093598390723e-05, "loss": 0.0, "step": 6610 }, { "epoch": 21.15, "learning_rate": 6.567270145544905e-05, "loss": 0.0004, "step": 6620 }, { "epoch": 21.18, "learning_rate": 6.543604307182582e-05, "loss": 0.0, "step": 6630 }, { "epoch": 21.21, "learning_rate": 6.519938468820258e-05, "loss": 0.0019, "step": 6640 }, { "epoch": 21.25, "learning_rate": 6.496272630457935e-05, "loss": 0.0436, "step": 6650 }, { "epoch": 21.28, "learning_rate": 6.472606792095611e-05, "loss": 0.0003, "step": 6660 }, { "epoch": 21.31, "learning_rate": 6.448940953733286e-05, "loss": 0.0002, "step": 6670 }, { "epoch": 21.34, "learning_rate": 6.425275115370962e-05, "loss": 0.0091, "step": 6680 }, { "epoch": 21.37, "learning_rate": 6.401609277008639e-05, "loss": 0.0003, "step": 6690 }, { "epoch": 21.41, "learning_rate": 6.377943438646315e-05, "loss": 0.0, "step": 6700 }, { "epoch": 21.44, "learning_rate": 6.35427760028399e-05, "loss": 0.0143, "step": 6710 }, { "epoch": 21.47, "learning_rate": 6.330611761921666e-05, "loss": 0.0002, "step": 6720 }, { "epoch": 21.5, "learning_rate": 6.306945923559342e-05, "loss": 0.0, "step": 6730 }, { "epoch": 21.53, "learning_rate": 6.283280085197019e-05, "loss": 0.0, "step": 6740 }, { "epoch": 21.57, "learning_rate": 6.259614246834694e-05, "loss": 0.0, "step": 6750 }, { "epoch": 21.6, "learning_rate": 6.235948408472371e-05, "loss": 0.0, "step": 6760 }, { "epoch": 21.63, "learning_rate": 6.212282570110046e-05, "loss": 0.0006, "step": 6770 }, { "epoch": 21.66, "learning_rate": 6.188616731747723e-05, "loss": 0.0, "step": 6780 }, { "epoch": 21.69, "learning_rate": 6.164950893385398e-05, "loss": 0.0002, "step": 6790 }, { "epoch": 21.73, "learning_rate": 6.141285055023075e-05, "loss": 0.0001, "step": 6800 }, { "epoch": 21.76, "learning_rate": 6.117619216660751e-05, "loss": 0.0, "step": 6810 }, { "epoch": 21.79, "learning_rate": 6.093953378298427e-05, "loss": 0.0, "step": 6820 }, { "epoch": 21.82, "learning_rate": 6.070287539936103e-05, "loss": 0.0038, "step": 6830 }, { "epoch": 21.85, "learning_rate": 6.046621701573778e-05, "loss": 0.072, "step": 6840 }, { "epoch": 21.88, "learning_rate": 6.022955863211455e-05, "loss": 0.0467, "step": 6850 }, { "epoch": 21.92, "learning_rate": 5.9992900248491304e-05, "loss": 0.0134, "step": 6860 }, { "epoch": 21.95, "learning_rate": 5.975624186486807e-05, "loss": 0.0026, "step": 6870 }, { "epoch": 21.98, "learning_rate": 5.9519583481244824e-05, "loss": 0.0239, "step": 6880 }, { "epoch": 22.0, "eval_accuracy": 0.9725, "eval_loss": 0.20680759847164154, "eval_runtime": 6.3851, "eval_samples_per_second": 62.646, "eval_steps_per_second": 7.831, "step": 6886 }, { "epoch": 22.01, "learning_rate": 5.928292509762159e-05, "loss": 0.0005, "step": 6890 }, { "epoch": 22.04, "learning_rate": 5.904626671399834e-05, "loss": 0.0217, "step": 6900 }, { "epoch": 22.08, "learning_rate": 5.880960833037511e-05, "loss": 0.0035, "step": 6910 }, { "epoch": 22.11, "learning_rate": 5.857294994675186e-05, "loss": 0.005, "step": 6920 }, { "epoch": 22.14, "learning_rate": 5.833629156312863e-05, "loss": 0.0001, "step": 6930 }, { "epoch": 22.17, "learning_rate": 5.809963317950539e-05, "loss": 0.0003, "step": 6940 }, { "epoch": 22.2, "learning_rate": 5.786297479588215e-05, "loss": 0.0007, "step": 6950 }, { "epoch": 22.24, "learning_rate": 5.762631641225891e-05, "loss": 0.0, "step": 6960 }, { "epoch": 22.27, "learning_rate": 5.7389658028635665e-05, "loss": 0.0001, "step": 6970 }, { "epoch": 22.3, "learning_rate": 5.715299964501243e-05, "loss": 0.0, "step": 6980 }, { "epoch": 22.33, "learning_rate": 5.6916341261389185e-05, "loss": 0.0052, "step": 6990 }, { "epoch": 22.36, "learning_rate": 5.667968287776595e-05, "loss": 0.0325, "step": 7000 }, { "epoch": 22.4, "learning_rate": 5.6443024494142705e-05, "loss": 0.0003, "step": 7010 }, { "epoch": 22.43, "learning_rate": 5.620636611051947e-05, "loss": 0.0037, "step": 7020 }, { "epoch": 22.46, "learning_rate": 5.5969707726896224e-05, "loss": 0.0002, "step": 7030 }, { "epoch": 22.49, "learning_rate": 5.5733049343272994e-05, "loss": 0.0245, "step": 7040 }, { "epoch": 22.52, "learning_rate": 5.5496390959649744e-05, "loss": 0.0013, "step": 7050 }, { "epoch": 22.56, "learning_rate": 5.5259732576026514e-05, "loss": 0.0097, "step": 7060 }, { "epoch": 22.59, "learning_rate": 5.502307419240327e-05, "loss": 0.0006, "step": 7070 }, { "epoch": 22.62, "learning_rate": 5.4786415808780034e-05, "loss": 0.0, "step": 7080 }, { "epoch": 22.65, "learning_rate": 5.454975742515679e-05, "loss": 0.0004, "step": 7090 }, { "epoch": 22.68, "learning_rate": 5.4313099041533546e-05, "loss": 0.0007, "step": 7100 }, { "epoch": 22.72, "learning_rate": 5.407644065791031e-05, "loss": 0.0044, "step": 7110 }, { "epoch": 22.75, "learning_rate": 5.3839782274287066e-05, "loss": 0.0003, "step": 7120 }, { "epoch": 22.78, "learning_rate": 5.360312389066383e-05, "loss": 0.0, "step": 7130 }, { "epoch": 22.81, "learning_rate": 5.3366465507040586e-05, "loss": 0.0026, "step": 7140 }, { "epoch": 22.84, "learning_rate": 5.312980712341735e-05, "loss": 0.0, "step": 7150 }, { "epoch": 22.88, "learning_rate": 5.2893148739794105e-05, "loss": 0.0019, "step": 7160 }, { "epoch": 22.91, "learning_rate": 5.2656490356170875e-05, "loss": 0.0, "step": 7170 }, { "epoch": 22.94, "learning_rate": 5.241983197254763e-05, "loss": 0.0001, "step": 7180 }, { "epoch": 22.97, "learning_rate": 5.2183173588924395e-05, "loss": 0.0, "step": 7190 }, { "epoch": 23.0, "eval_accuracy": 0.99, "eval_loss": 0.10589393973350525, "eval_runtime": 5.2362, "eval_samples_per_second": 76.391, "eval_steps_per_second": 9.549, "step": 7199 }, { "epoch": 23.0, "learning_rate": 5.194651520530115e-05, "loss": 0.0, "step": 7200 }, { "epoch": 23.04, "learning_rate": 5.1709856821677915e-05, "loss": 0.0003, "step": 7210 }, { "epoch": 23.07, "learning_rate": 5.147319843805467e-05, "loss": 0.0003, "step": 7220 }, { "epoch": 23.1, "learning_rate": 5.123654005443143e-05, "loss": 0.0, "step": 7230 }, { "epoch": 23.13, "learning_rate": 5.099988167080819e-05, "loss": 0.0, "step": 7240 }, { "epoch": 23.16, "learning_rate": 5.076322328718495e-05, "loss": 0.0016, "step": 7250 }, { "epoch": 23.19, "learning_rate": 5.052656490356171e-05, "loss": 0.0003, "step": 7260 }, { "epoch": 23.23, "learning_rate": 5.028990651993847e-05, "loss": 0.0002, "step": 7270 }, { "epoch": 23.26, "learning_rate": 5.0053248136315237e-05, "loss": 0.0058, "step": 7280 }, { "epoch": 23.29, "learning_rate": 4.9816589752691986e-05, "loss": 0.0, "step": 7290 }, { "epoch": 23.32, "learning_rate": 4.957993136906875e-05, "loss": 0.0001, "step": 7300 }, { "epoch": 23.35, "learning_rate": 4.934327298544551e-05, "loss": 0.0085, "step": 7310 }, { "epoch": 23.39, "learning_rate": 4.910661460182227e-05, "loss": 0.0073, "step": 7320 }, { "epoch": 23.42, "learning_rate": 4.886995621819903e-05, "loss": 0.0076, "step": 7330 }, { "epoch": 23.45, "learning_rate": 4.863329783457579e-05, "loss": 0.0, "step": 7340 }, { "epoch": 23.48, "learning_rate": 4.839663945095255e-05, "loss": 0.0, "step": 7350 }, { "epoch": 23.51, "learning_rate": 4.8159981067329315e-05, "loss": 0.0, "step": 7360 }, { "epoch": 23.55, "learning_rate": 4.792332268370607e-05, "loss": 0.0002, "step": 7370 }, { "epoch": 23.58, "learning_rate": 4.7686664300082835e-05, "loss": 0.0, "step": 7380 }, { "epoch": 23.61, "learning_rate": 4.745000591645959e-05, "loss": 0.0, "step": 7390 }, { "epoch": 23.64, "learning_rate": 4.7213347532836354e-05, "loss": 0.0, "step": 7400 }, { "epoch": 23.67, "learning_rate": 4.697668914921312e-05, "loss": 0.0001, "step": 7410 }, { "epoch": 23.71, "learning_rate": 4.674003076558987e-05, "loss": 0.0027, "step": 7420 }, { "epoch": 23.74, "learning_rate": 4.650337238196663e-05, "loss": 0.0, "step": 7430 }, { "epoch": 23.77, "learning_rate": 4.6266713998343394e-05, "loss": 0.0, "step": 7440 }, { "epoch": 23.8, "learning_rate": 4.603005561472015e-05, "loss": 0.0, "step": 7450 }, { "epoch": 23.83, "learning_rate": 4.579339723109691e-05, "loss": 0.0003, "step": 7460 }, { "epoch": 23.87, "learning_rate": 4.5556738847473676e-05, "loss": 0.0, "step": 7470 }, { "epoch": 23.9, "learning_rate": 4.532008046385043e-05, "loss": 0.0, "step": 7480 }, { "epoch": 23.93, "learning_rate": 4.5083422080227196e-05, "loss": 0.0, "step": 7490 }, { "epoch": 23.96, "learning_rate": 4.484676369660395e-05, "loss": 0.0, "step": 7500 }, { "epoch": 23.99, "learning_rate": 4.4610105312980716e-05, "loss": 0.0, "step": 7510 }, { "epoch": 24.0, "eval_accuracy": 0.9875, "eval_loss": 0.14845281839370728, "eval_runtime": 6.865, "eval_samples_per_second": 58.267, "eval_steps_per_second": 7.283, "step": 7512 }, { "epoch": 24.03, "learning_rate": 4.437344692935748e-05, "loss": 0.0, "step": 7520 }, { "epoch": 24.06, "learning_rate": 4.4136788545734235e-05, "loss": 0.0001, "step": 7530 }, { "epoch": 24.09, "learning_rate": 4.3900130162111e-05, "loss": 0.0008, "step": 7540 }, { "epoch": 24.12, "learning_rate": 4.3663471778487755e-05, "loss": 0.0, "step": 7550 }, { "epoch": 24.15, "learning_rate": 4.342681339486451e-05, "loss": 0.0, "step": 7560 }, { "epoch": 24.19, "learning_rate": 4.3190155011241275e-05, "loss": 0.0, "step": 7570 }, { "epoch": 24.22, "learning_rate": 4.295349662761803e-05, "loss": 0.0, "step": 7580 }, { "epoch": 24.25, "learning_rate": 4.2716838243994794e-05, "loss": 0.0, "step": 7590 }, { "epoch": 24.28, "learning_rate": 4.248017986037156e-05, "loss": 0.0001, "step": 7600 }, { "epoch": 24.31, "learning_rate": 4.2243521476748314e-05, "loss": 0.0059, "step": 7610 }, { "epoch": 24.35, "learning_rate": 4.200686309312508e-05, "loss": 0.0, "step": 7620 }, { "epoch": 24.38, "learning_rate": 4.1770204709501833e-05, "loss": 0.0038, "step": 7630 }, { "epoch": 24.41, "learning_rate": 4.15335463258786e-05, "loss": 0.0, "step": 7640 }, { "epoch": 24.44, "learning_rate": 4.129688794225536e-05, "loss": 0.0, "step": 7650 }, { "epoch": 24.47, "learning_rate": 4.1060229558632116e-05, "loss": 0.0, "step": 7660 }, { "epoch": 24.5, "learning_rate": 4.082357117500888e-05, "loss": 0.0, "step": 7670 }, { "epoch": 24.54, "learning_rate": 4.058691279138564e-05, "loss": 0.0002, "step": 7680 }, { "epoch": 24.57, "learning_rate": 4.035025440776239e-05, "loss": 0.0, "step": 7690 }, { "epoch": 24.6, "learning_rate": 4.0113596024139156e-05, "loss": 0.0, "step": 7700 }, { "epoch": 24.63, "learning_rate": 3.987693764051592e-05, "loss": 0.0, "step": 7710 }, { "epoch": 24.66, "learning_rate": 3.9640279256892675e-05, "loss": 0.0, "step": 7720 }, { "epoch": 24.7, "learning_rate": 3.940362087326944e-05, "loss": 0.0001, "step": 7730 }, { "epoch": 24.73, "learning_rate": 3.9166962489646195e-05, "loss": 0.0005, "step": 7740 }, { "epoch": 24.76, "learning_rate": 3.893030410602296e-05, "loss": 0.0, "step": 7750 }, { "epoch": 24.79, "learning_rate": 3.869364572239972e-05, "loss": 0.0001, "step": 7760 }, { "epoch": 24.82, "learning_rate": 3.845698733877648e-05, "loss": 0.0, "step": 7770 }, { "epoch": 24.86, "learning_rate": 3.822032895515324e-05, "loss": 0.0, "step": 7780 }, { "epoch": 24.89, "learning_rate": 3.798367057153e-05, "loss": 0.0, "step": 7790 }, { "epoch": 24.92, "learning_rate": 3.774701218790676e-05, "loss": 0.0, "step": 7800 }, { "epoch": 24.95, "learning_rate": 3.7510353804283524e-05, "loss": 0.0245, "step": 7810 }, { "epoch": 24.98, "learning_rate": 3.727369542066027e-05, "loss": 0.0, "step": 7820 }, { "epoch": 25.0, "eval_accuracy": 0.99, "eval_loss": 0.15507297217845917, "eval_runtime": 5.1221, "eval_samples_per_second": 78.093, "eval_steps_per_second": 9.762, "step": 7825 }, { "epoch": 25.02, "learning_rate": 3.7037037037037037e-05, "loss": 0.0, "step": 7830 }, { "epoch": 25.05, "learning_rate": 3.68003786534138e-05, "loss": 0.0, "step": 7840 }, { "epoch": 25.08, "learning_rate": 3.6563720269790556e-05, "loss": 0.0, "step": 7850 }, { "epoch": 25.11, "learning_rate": 3.632706188616732e-05, "loss": 0.0, "step": 7860 }, { "epoch": 25.14, "learning_rate": 3.6090403502544076e-05, "loss": 0.0, "step": 7870 }, { "epoch": 25.18, "learning_rate": 3.585374511892084e-05, "loss": 0.0, "step": 7880 }, { "epoch": 25.21, "learning_rate": 3.56170867352976e-05, "loss": 0.0, "step": 7890 }, { "epoch": 25.24, "learning_rate": 3.538042835167436e-05, "loss": 0.0, "step": 7900 }, { "epoch": 25.27, "learning_rate": 3.514376996805112e-05, "loss": 0.0, "step": 7910 }, { "epoch": 25.3, "learning_rate": 3.4907111584427885e-05, "loss": 0.0, "step": 7920 }, { "epoch": 25.34, "learning_rate": 3.467045320080464e-05, "loss": 0.0, "step": 7930 }, { "epoch": 25.37, "learning_rate": 3.4433794817181405e-05, "loss": 0.0, "step": 7940 }, { "epoch": 25.4, "learning_rate": 3.419713643355816e-05, "loss": 0.0033, "step": 7950 }, { "epoch": 25.43, "learning_rate": 3.396047804993492e-05, "loss": 0.0, "step": 7960 }, { "epoch": 25.46, "learning_rate": 3.372381966631168e-05, "loss": 0.0, "step": 7970 }, { "epoch": 25.5, "learning_rate": 3.348716128268844e-05, "loss": 0.0, "step": 7980 }, { "epoch": 25.53, "learning_rate": 3.32505028990652e-05, "loss": 0.0, "step": 7990 }, { "epoch": 25.56, "learning_rate": 3.3013844515441964e-05, "loss": 0.0071, "step": 8000 }, { "epoch": 25.59, "learning_rate": 3.277718613181872e-05, "loss": 0.0, "step": 8010 }, { "epoch": 25.62, "learning_rate": 3.254052774819548e-05, "loss": 0.0001, "step": 8020 }, { "epoch": 25.65, "learning_rate": 3.230386936457224e-05, "loss": 0.0, "step": 8030 }, { "epoch": 25.69, "learning_rate": 3.2067210980949e-05, "loss": 0.0, "step": 8040 }, { "epoch": 25.72, "learning_rate": 3.1830552597325766e-05, "loss": 0.0, "step": 8050 }, { "epoch": 25.75, "learning_rate": 3.159389421370252e-05, "loss": 0.0, "step": 8060 }, { "epoch": 25.78, "learning_rate": 3.1357235830079286e-05, "loss": 0.0, "step": 8070 }, { "epoch": 25.81, "learning_rate": 3.112057744645604e-05, "loss": 0.0, "step": 8080 }, { "epoch": 25.85, "learning_rate": 3.08839190628328e-05, "loss": 0.0001, "step": 8090 }, { "epoch": 25.88, "learning_rate": 3.064726067920956e-05, "loss": 0.0002, "step": 8100 }, { "epoch": 25.91, "learning_rate": 3.041060229558632e-05, "loss": 0.0, "step": 8110 }, { "epoch": 25.94, "learning_rate": 3.017394391196308e-05, "loss": 0.0, "step": 8120 }, { "epoch": 25.97, "learning_rate": 2.993728552833984e-05, "loss": 0.0, "step": 8130 }, { "epoch": 26.0, "eval_accuracy": 0.99, "eval_loss": 0.14341458678245544, "eval_runtime": 6.8347, "eval_samples_per_second": 58.525, "eval_steps_per_second": 7.316, "step": 8138 }, { "epoch": 26.01, "learning_rate": 2.9700627144716604e-05, "loss": 0.0, "step": 8140 }, { "epoch": 26.04, "learning_rate": 2.9463968761093364e-05, "loss": 0.0, "step": 8150 }, { "epoch": 26.07, "learning_rate": 2.9227310377470124e-05, "loss": 0.0, "step": 8160 }, { "epoch": 26.1, "learning_rate": 2.8990651993846884e-05, "loss": 0.0, "step": 8170 }, { "epoch": 26.13, "learning_rate": 2.8753993610223644e-05, "loss": 0.0, "step": 8180 }, { "epoch": 26.17, "learning_rate": 2.8517335226600407e-05, "loss": 0.0, "step": 8190 }, { "epoch": 26.2, "learning_rate": 2.8280676842977167e-05, "loss": 0.0, "step": 8200 }, { "epoch": 26.23, "learning_rate": 2.804401845935392e-05, "loss": 0.0, "step": 8210 }, { "epoch": 26.26, "learning_rate": 2.7807360075730683e-05, "loss": 0.0, "step": 8220 }, { "epoch": 26.29, "learning_rate": 2.7570701692107443e-05, "loss": 0.0, "step": 8230 }, { "epoch": 26.33, "learning_rate": 2.7334043308484202e-05, "loss": 0.0, "step": 8240 }, { "epoch": 26.36, "learning_rate": 2.7097384924860962e-05, "loss": 0.0, "step": 8250 }, { "epoch": 26.39, "learning_rate": 2.6860726541237726e-05, "loss": 0.0, "step": 8260 }, { "epoch": 26.42, "learning_rate": 2.6624068157614485e-05, "loss": 0.0001, "step": 8270 }, { "epoch": 26.45, "learning_rate": 2.6387409773991245e-05, "loss": 0.0, "step": 8280 }, { "epoch": 26.49, "learning_rate": 2.6150751390368005e-05, "loss": 0.0, "step": 8290 }, { "epoch": 26.52, "learning_rate": 2.5914093006744765e-05, "loss": 0.0, "step": 8300 }, { "epoch": 26.55, "learning_rate": 2.5677434623121528e-05, "loss": 0.0, "step": 8310 }, { "epoch": 26.58, "learning_rate": 2.5440776239498288e-05, "loss": 0.0, "step": 8320 }, { "epoch": 26.61, "learning_rate": 2.5204117855875048e-05, "loss": 0.0001, "step": 8330 }, { "epoch": 26.65, "learning_rate": 2.4967459472251804e-05, "loss": 0.0, "step": 8340 }, { "epoch": 26.68, "learning_rate": 2.4730801088628567e-05, "loss": 0.0, "step": 8350 }, { "epoch": 26.71, "learning_rate": 2.4494142705005327e-05, "loss": 0.0, "step": 8360 }, { "epoch": 26.74, "learning_rate": 2.4257484321382087e-05, "loss": 0.0, "step": 8370 }, { "epoch": 26.77, "learning_rate": 2.4020825937758847e-05, "loss": 0.0, "step": 8380 }, { "epoch": 26.81, "learning_rate": 2.3784167554135606e-05, "loss": 0.0, "step": 8390 }, { "epoch": 26.84, "learning_rate": 2.3547509170512366e-05, "loss": 0.0, "step": 8400 }, { "epoch": 26.87, "learning_rate": 2.3310850786889126e-05, "loss": 0.0, "step": 8410 }, { "epoch": 26.9, "learning_rate": 2.3074192403265886e-05, "loss": 0.0, "step": 8420 }, { "epoch": 26.93, "learning_rate": 2.283753401964265e-05, "loss": 0.0, "step": 8430 }, { "epoch": 26.96, "learning_rate": 2.2600875636019406e-05, "loss": 0.0015, "step": 8440 }, { "epoch": 27.0, "learning_rate": 2.2364217252396165e-05, "loss": 0.0001, "step": 8450 }, { "epoch": 27.0, "eval_accuracy": 0.99, "eval_loss": 0.1398298293352127, "eval_runtime": 5.1804, "eval_samples_per_second": 77.213, "eval_steps_per_second": 9.652, "step": 8451 }, { "epoch": 27.03, "learning_rate": 2.2127558868772925e-05, "loss": 0.0, "step": 8460 }, { "epoch": 27.06, "learning_rate": 2.189090048514969e-05, "loss": 0.0, "step": 8470 }, { "epoch": 27.09, "learning_rate": 2.1654242101526448e-05, "loss": 0.0, "step": 8480 }, { "epoch": 27.12, "learning_rate": 2.1417583717903208e-05, "loss": 0.0007, "step": 8490 }, { "epoch": 27.16, "learning_rate": 2.1180925334279968e-05, "loss": 0.0, "step": 8500 }, { "epoch": 27.19, "learning_rate": 2.0944266950656728e-05, "loss": 0.0, "step": 8510 }, { "epoch": 27.22, "learning_rate": 2.0707608567033487e-05, "loss": 0.0, "step": 8520 }, { "epoch": 27.25, "learning_rate": 2.0470950183410247e-05, "loss": 0.0, "step": 8530 }, { "epoch": 27.28, "learning_rate": 2.0234291799787007e-05, "loss": 0.0, "step": 8540 }, { "epoch": 27.32, "learning_rate": 1.999763341616377e-05, "loss": 0.0, "step": 8550 }, { "epoch": 27.35, "learning_rate": 1.976097503254053e-05, "loss": 0.0, "step": 8560 }, { "epoch": 27.38, "learning_rate": 1.952431664891729e-05, "loss": 0.0, "step": 8570 }, { "epoch": 27.41, "learning_rate": 1.9287658265294046e-05, "loss": 0.0019, "step": 8580 }, { "epoch": 27.44, "learning_rate": 1.905099988167081e-05, "loss": 0.0, "step": 8590 }, { "epoch": 27.48, "learning_rate": 1.881434149804757e-05, "loss": 0.0, "step": 8600 }, { "epoch": 27.51, "learning_rate": 1.857768311442433e-05, "loss": 0.0002, "step": 8610 }, { "epoch": 27.54, "learning_rate": 1.834102473080109e-05, "loss": 0.0, "step": 8620 }, { "epoch": 27.57, "learning_rate": 1.8104366347177852e-05, "loss": 0.0, "step": 8630 }, { "epoch": 27.6, "learning_rate": 1.786770796355461e-05, "loss": 0.0001, "step": 8640 }, { "epoch": 27.64, "learning_rate": 1.763104957993137e-05, "loss": 0.0031, "step": 8650 }, { "epoch": 27.67, "learning_rate": 1.7394391196308128e-05, "loss": 0.0, "step": 8660 }, { "epoch": 27.7, "learning_rate": 1.715773281268489e-05, "loss": 0.0, "step": 8670 }, { "epoch": 27.73, "learning_rate": 1.692107442906165e-05, "loss": 0.0, "step": 8680 }, { "epoch": 27.76, "learning_rate": 1.668441604543841e-05, "loss": 0.0, "step": 8690 }, { "epoch": 27.8, "learning_rate": 1.644775766181517e-05, "loss": 0.0, "step": 8700 }, { "epoch": 27.83, "learning_rate": 1.621109927819193e-05, "loss": 0.0, "step": 8710 }, { "epoch": 27.86, "learning_rate": 1.597444089456869e-05, "loss": 0.0, "step": 8720 }, { "epoch": 27.89, "learning_rate": 1.573778251094545e-05, "loss": 0.0, "step": 8730 }, { "epoch": 27.92, "learning_rate": 1.550112412732221e-05, "loss": 0.0, "step": 8740 }, { "epoch": 27.96, "learning_rate": 1.5264465743698973e-05, "loss": 0.0, "step": 8750 }, { "epoch": 27.99, "learning_rate": 1.5027807360075733e-05, "loss": 0.0, "step": 8760 }, { "epoch": 28.0, "eval_accuracy": 0.99, "eval_loss": 0.13574786484241486, "eval_runtime": 6.1395, "eval_samples_per_second": 65.152, "eval_steps_per_second": 8.144, "step": 8764 }, { "epoch": 28.02, "learning_rate": 1.479114897645249e-05, "loss": 0.0, "step": 8770 }, { "epoch": 28.05, "learning_rate": 1.4554490592829251e-05, "loss": 0.0, "step": 8780 }, { "epoch": 28.08, "learning_rate": 1.4317832209206011e-05, "loss": 0.0, "step": 8790 }, { "epoch": 28.12, "learning_rate": 1.4081173825582772e-05, "loss": 0.0, "step": 8800 }, { "epoch": 28.15, "learning_rate": 1.3844515441959532e-05, "loss": 0.0, "step": 8810 }, { "epoch": 28.18, "learning_rate": 1.3607857058336294e-05, "loss": 0.0, "step": 8820 }, { "epoch": 28.21, "learning_rate": 1.3371198674713054e-05, "loss": 0.0, "step": 8830 }, { "epoch": 28.24, "learning_rate": 1.3134540291089812e-05, "loss": 0.0033, "step": 8840 }, { "epoch": 28.27, "learning_rate": 1.2897881907466571e-05, "loss": 0.0, "step": 8850 }, { "epoch": 28.31, "learning_rate": 1.2661223523843333e-05, "loss": 0.0, "step": 8860 }, { "epoch": 28.34, "learning_rate": 1.2424565140220093e-05, "loss": 0.0, "step": 8870 }, { "epoch": 28.37, "learning_rate": 1.2187906756596854e-05, "loss": 0.0, "step": 8880 }, { "epoch": 28.4, "learning_rate": 1.1951248372973612e-05, "loss": 0.0, "step": 8890 }, { "epoch": 28.43, "learning_rate": 1.1714589989350374e-05, "loss": 0.0, "step": 8900 }, { "epoch": 28.47, "learning_rate": 1.1477931605727134e-05, "loss": 0.0, "step": 8910 }, { "epoch": 28.5, "learning_rate": 1.1241273222103894e-05, "loss": 0.0, "step": 8920 }, { "epoch": 28.53, "learning_rate": 1.1004614838480653e-05, "loss": 0.0, "step": 8930 }, { "epoch": 28.56, "learning_rate": 1.0767956454857415e-05, "loss": 0.0001, "step": 8940 }, { "epoch": 28.59, "learning_rate": 1.0531298071234175e-05, "loss": 0.0, "step": 8950 }, { "epoch": 28.63, "learning_rate": 1.0294639687610935e-05, "loss": 0.0, "step": 8960 }, { "epoch": 28.66, "learning_rate": 1.0057981303987694e-05, "loss": 0.0, "step": 8970 }, { "epoch": 28.69, "learning_rate": 9.821322920364454e-06, "loss": 0.0, "step": 8980 }, { "epoch": 28.72, "learning_rate": 9.584664536741214e-06, "loss": 0.0, "step": 8990 }, { "epoch": 28.75, "learning_rate": 9.348006153117975e-06, "loss": 0.0, "step": 9000 }, { "epoch": 28.79, "learning_rate": 9.111347769494735e-06, "loss": 0.0, "step": 9010 }, { "epoch": 28.82, "learning_rate": 8.874689385871495e-06, "loss": 0.0, "step": 9020 }, { "epoch": 28.85, "learning_rate": 8.638031002248255e-06, "loss": 0.0, "step": 9030 }, { "epoch": 28.88, "learning_rate": 8.401372618625015e-06, "loss": 0.0, "step": 9040 }, { "epoch": 28.91, "learning_rate": 8.164714235001775e-06, "loss": 0.0, "step": 9050 }, { "epoch": 28.95, "learning_rate": 7.928055851378536e-06, "loss": 0.0, "step": 9060 }, { "epoch": 28.98, "learning_rate": 7.691397467755296e-06, "loss": 0.0, "step": 9070 }, { "epoch": 29.0, "eval_accuracy": 0.99, "eval_loss": 0.13164377212524414, "eval_runtime": 5.2001, "eval_samples_per_second": 76.922, "eval_steps_per_second": 9.615, "step": 9077 }, { "epoch": 29.01, "learning_rate": 7.4547390841320565e-06, "loss": 0.0, "step": 9080 }, { "epoch": 29.04, "learning_rate": 7.2180807005088155e-06, "loss": 0.0, "step": 9090 }, { "epoch": 29.07, "learning_rate": 6.981422316885576e-06, "loss": 0.0, "step": 9100 }, { "epoch": 29.11, "learning_rate": 6.744763933262337e-06, "loss": 0.0, "step": 9110 }, { "epoch": 29.14, "learning_rate": 6.508105549639096e-06, "loss": 0.0, "step": 9120 }, { "epoch": 29.17, "learning_rate": 6.2714471660158565e-06, "loss": 0.0016, "step": 9130 }, { "epoch": 29.2, "learning_rate": 6.034788782392616e-06, "loss": 0.0, "step": 9140 }, { "epoch": 29.23, "learning_rate": 5.798130398769377e-06, "loss": 0.0, "step": 9150 }, { "epoch": 29.27, "learning_rate": 5.561472015146137e-06, "loss": 0.0, "step": 9160 }, { "epoch": 29.3, "learning_rate": 5.324813631522897e-06, "loss": 0.0, "step": 9170 }, { "epoch": 29.33, "learning_rate": 5.088155247899657e-06, "loss": 0.0, "step": 9180 }, { "epoch": 29.36, "learning_rate": 4.851496864276417e-06, "loss": 0.0, "step": 9190 }, { "epoch": 29.39, "learning_rate": 4.614838480653178e-06, "loss": 0.0, "step": 9200 }, { "epoch": 29.42, "learning_rate": 4.3781800970299375e-06, "loss": 0.0, "step": 9210 }, { "epoch": 29.46, "learning_rate": 4.141521713406698e-06, "loss": 0.0, "step": 9220 }, { "epoch": 29.49, "learning_rate": 3.904863329783458e-06, "loss": 0.0, "step": 9230 }, { "epoch": 29.52, "learning_rate": 3.668204946160218e-06, "loss": 0.0, "step": 9240 }, { "epoch": 29.55, "learning_rate": 3.431546562536978e-06, "loss": 0.0, "step": 9250 }, { "epoch": 29.58, "learning_rate": 3.194888178913738e-06, "loss": 0.0, "step": 9260 }, { "epoch": 29.62, "learning_rate": 2.958229795290498e-06, "loss": 0.0, "step": 9270 }, { "epoch": 29.65, "learning_rate": 2.7215714116672583e-06, "loss": 0.0, "step": 9280 }, { "epoch": 29.68, "learning_rate": 2.4849130280440186e-06, "loss": 0.0, "step": 9290 }, { "epoch": 29.71, "learning_rate": 2.248254644420779e-06, "loss": 0.0, "step": 9300 }, { "epoch": 29.74, "learning_rate": 2.0115962607975386e-06, "loss": 0.0, "step": 9310 }, { "epoch": 29.78, "learning_rate": 1.7749378771742989e-06, "loss": 0.0, "step": 9320 }, { "epoch": 29.81, "learning_rate": 1.538279493551059e-06, "loss": 0.0, "step": 9330 }, { "epoch": 29.84, "learning_rate": 1.3016211099278191e-06, "loss": 0.0, "step": 9340 }, { "epoch": 29.87, "learning_rate": 1.0649627263045794e-06, "loss": 0.0, "step": 9350 }, { "epoch": 29.9, "learning_rate": 8.283043426813395e-07, "loss": 0.0, "step": 9360 }, { "epoch": 29.94, "learning_rate": 5.916459590580996e-07, "loss": 0.0, "step": 9370 }, { "epoch": 29.97, "learning_rate": 3.549875754348598e-07, "loss": 0.0, "step": 9380 }, { "epoch": 30.0, "learning_rate": 1.1832919181161993e-07, "loss": 0.0, "step": 9390 }, { "epoch": 30.0, "eval_accuracy": 0.9925, "eval_loss": 0.12477295100688934, "eval_runtime": 5.3362, "eval_samples_per_second": 74.96, "eval_steps_per_second": 9.37, "step": 9390 }, { "epoch": 30.0, "step": 9390, "total_flos": 2.3515974247797965e+19, "train_loss": 0.07091858377127398, "train_runtime": 7238.3055, "train_samples_per_second": 41.467, "train_steps_per_second": 1.297 } ], "logging_steps": 10, "max_steps": 9390, "num_train_epochs": 30, "save_steps": 500, "total_flos": 2.3515974247797965e+19, "trial_name": null, "trial_params": null }