pphuc25's picture
End of training
bb2fd6b
{
"best_metric": 0.22282175719738007,
"best_model_checkpoint": "whisper-base-full-data-v4/checkpoint-60000",
"epoch": 20.003133322888925,
"global_step": 63840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 2.0000000000000002e-07,
"loss": 2.2652,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.8464,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 6.000000000000001e-07,
"loss": 1.2569,
"step": 300
},
{
"epoch": 0.13,
"learning_rate": 8.000000000000001e-07,
"loss": 0.8772,
"step": 400
},
{
"epoch": 0.16,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.6906,
"step": 500
},
{
"epoch": 0.19,
"learning_rate": 1.2000000000000002e-06,
"loss": 0.6237,
"step": 600
},
{
"epoch": 0.22,
"learning_rate": 1.4000000000000001e-06,
"loss": 0.5784,
"step": 700
},
{
"epoch": 0.25,
"learning_rate": 1.6000000000000001e-06,
"loss": 0.5455,
"step": 800
},
{
"epoch": 0.28,
"learning_rate": 1.8000000000000001e-06,
"loss": 0.5264,
"step": 900
},
{
"epoch": 0.31,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.5018,
"step": 1000
},
{
"epoch": 0.34,
"learning_rate": 2.2e-06,
"loss": 0.4829,
"step": 1100
},
{
"epoch": 0.38,
"learning_rate": 2.4000000000000003e-06,
"loss": 0.4635,
"step": 1200
},
{
"epoch": 0.41,
"learning_rate": 2.6e-06,
"loss": 0.4505,
"step": 1300
},
{
"epoch": 0.44,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.4415,
"step": 1400
},
{
"epoch": 0.47,
"learning_rate": 3e-06,
"loss": 0.4276,
"step": 1500
},
{
"epoch": 0.5,
"learning_rate": 3.2000000000000003e-06,
"loss": 0.4168,
"step": 1600
},
{
"epoch": 0.53,
"learning_rate": 3.4000000000000005e-06,
"loss": 0.4074,
"step": 1700
},
{
"epoch": 0.56,
"learning_rate": 3.6000000000000003e-06,
"loss": 0.3999,
"step": 1800
},
{
"epoch": 0.6,
"learning_rate": 3.8000000000000005e-06,
"loss": 0.3904,
"step": 1900
},
{
"epoch": 0.63,
"learning_rate": 4.000000000000001e-06,
"loss": 0.3819,
"step": 2000
},
{
"epoch": 0.66,
"learning_rate": 4.2000000000000004e-06,
"loss": 0.3717,
"step": 2100
},
{
"epoch": 0.69,
"learning_rate": 4.4e-06,
"loss": 0.3635,
"step": 2200
},
{
"epoch": 0.72,
"learning_rate": 4.600000000000001e-06,
"loss": 0.3586,
"step": 2300
},
{
"epoch": 0.75,
"learning_rate": 4.800000000000001e-06,
"loss": 0.3596,
"step": 2400
},
{
"epoch": 0.78,
"learning_rate": 5e-06,
"loss": 0.3483,
"step": 2500
},
{
"epoch": 0.81,
"learning_rate": 5.2e-06,
"loss": 0.3465,
"step": 2600
},
{
"epoch": 0.85,
"learning_rate": 5.400000000000001e-06,
"loss": 0.3389,
"step": 2700
},
{
"epoch": 0.88,
"learning_rate": 5.600000000000001e-06,
"loss": 0.3361,
"step": 2800
},
{
"epoch": 0.91,
"learning_rate": 5.8e-06,
"loss": 0.3271,
"step": 2900
},
{
"epoch": 0.94,
"learning_rate": 6e-06,
"loss": 0.3221,
"step": 3000
},
{
"epoch": 0.97,
"learning_rate": 6.200000000000001e-06,
"loss": 0.3173,
"step": 3100
},
{
"epoch": 1.0,
"learning_rate": 6.4000000000000006e-06,
"loss": 0.3169,
"step": 3200
},
{
"epoch": 1.03,
"learning_rate": 6.600000000000001e-06,
"loss": 0.3099,
"step": 3300
},
{
"epoch": 1.07,
"learning_rate": 6.800000000000001e-06,
"loss": 0.3025,
"step": 3400
},
{
"epoch": 1.1,
"learning_rate": 7e-06,
"loss": 0.2969,
"step": 3500
},
{
"epoch": 1.13,
"learning_rate": 7.2000000000000005e-06,
"loss": 0.2949,
"step": 3600
},
{
"epoch": 1.16,
"learning_rate": 7.4e-06,
"loss": 0.2907,
"step": 3700
},
{
"epoch": 1.19,
"learning_rate": 7.600000000000001e-06,
"loss": 0.2914,
"step": 3800
},
{
"epoch": 1.22,
"learning_rate": 7.800000000000002e-06,
"loss": 0.2859,
"step": 3900
},
{
"epoch": 1.25,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2829,
"step": 4000
},
{
"epoch": 1.28,
"learning_rate": 8.2e-06,
"loss": 0.2824,
"step": 4100
},
{
"epoch": 1.32,
"learning_rate": 8.400000000000001e-06,
"loss": 0.2762,
"step": 4200
},
{
"epoch": 1.35,
"learning_rate": 8.6e-06,
"loss": 0.2739,
"step": 4300
},
{
"epoch": 1.38,
"learning_rate": 8.8e-06,
"loss": 0.2709,
"step": 4400
},
{
"epoch": 1.41,
"learning_rate": 9e-06,
"loss": 0.2712,
"step": 4500
},
{
"epoch": 1.44,
"learning_rate": 9.200000000000002e-06,
"loss": 0.2695,
"step": 4600
},
{
"epoch": 1.47,
"learning_rate": 9.4e-06,
"loss": 0.2689,
"step": 4700
},
{
"epoch": 1.5,
"learning_rate": 9.600000000000001e-06,
"loss": 0.263,
"step": 4800
},
{
"epoch": 1.54,
"learning_rate": 9.800000000000001e-06,
"loss": 0.2583,
"step": 4900
},
{
"epoch": 1.57,
"learning_rate": 1e-05,
"loss": 0.2569,
"step": 5000
},
{
"epoch": 1.57,
"eval_loss": 0.4137764871120453,
"eval_runtime": 73.4793,
"eval_samples_per_second": 121.681,
"eval_steps_per_second": 1.905,
"step": 5000
},
{
"epoch": 1.6,
"learning_rate": 9.983004758667573e-06,
"loss": 0.2546,
"step": 5100
},
{
"epoch": 1.63,
"learning_rate": 9.966009517335146e-06,
"loss": 0.2544,
"step": 5200
},
{
"epoch": 1.66,
"learning_rate": 9.94901427600272e-06,
"loss": 0.2497,
"step": 5300
},
{
"epoch": 1.69,
"learning_rate": 9.932019034670293e-06,
"loss": 0.2491,
"step": 5400
},
{
"epoch": 1.72,
"learning_rate": 9.915023793337866e-06,
"loss": 0.2468,
"step": 5500
},
{
"epoch": 1.75,
"learning_rate": 9.89802855200544e-06,
"loss": 0.2431,
"step": 5600
},
{
"epoch": 1.79,
"learning_rate": 9.881033310673012e-06,
"loss": 0.2407,
"step": 5700
},
{
"epoch": 1.82,
"learning_rate": 9.864038069340585e-06,
"loss": 0.2398,
"step": 5800
},
{
"epoch": 1.85,
"learning_rate": 9.84704282800816e-06,
"loss": 0.2389,
"step": 5900
},
{
"epoch": 1.88,
"learning_rate": 9.830047586675732e-06,
"loss": 0.2318,
"step": 6000
},
{
"epoch": 1.91,
"learning_rate": 9.813052345343305e-06,
"loss": 0.2347,
"step": 6100
},
{
"epoch": 1.94,
"learning_rate": 9.796057104010877e-06,
"loss": 0.2325,
"step": 6200
},
{
"epoch": 1.97,
"learning_rate": 9.779061862678451e-06,
"loss": 0.2278,
"step": 6300
},
{
"epoch": 2.01,
"learning_rate": 9.762066621346024e-06,
"loss": 0.2295,
"step": 6400
},
{
"epoch": 2.04,
"learning_rate": 9.745071380013597e-06,
"loss": 0.2215,
"step": 6500
},
{
"epoch": 2.07,
"learning_rate": 9.728076138681171e-06,
"loss": 0.2177,
"step": 6600
},
{
"epoch": 2.1,
"learning_rate": 9.711080897348744e-06,
"loss": 0.219,
"step": 6700
},
{
"epoch": 2.13,
"learning_rate": 9.694085656016316e-06,
"loss": 0.2159,
"step": 6800
},
{
"epoch": 2.16,
"learning_rate": 9.677090414683889e-06,
"loss": 0.2129,
"step": 6900
},
{
"epoch": 2.19,
"learning_rate": 9.660095173351463e-06,
"loss": 0.215,
"step": 7000
},
{
"epoch": 2.22,
"learning_rate": 9.643099932019036e-06,
"loss": 0.2113,
"step": 7100
},
{
"epoch": 2.26,
"learning_rate": 9.626104690686608e-06,
"loss": 0.2134,
"step": 7200
},
{
"epoch": 2.29,
"learning_rate": 9.60910944935418e-06,
"loss": 0.2096,
"step": 7300
},
{
"epoch": 2.32,
"learning_rate": 9.592114208021755e-06,
"loss": 0.2087,
"step": 7400
},
{
"epoch": 2.35,
"learning_rate": 9.575118966689328e-06,
"loss": 0.2056,
"step": 7500
},
{
"epoch": 2.38,
"learning_rate": 9.5581237253569e-06,
"loss": 0.2069,
"step": 7600
},
{
"epoch": 2.41,
"learning_rate": 9.541128484024475e-06,
"loss": 0.2067,
"step": 7700
},
{
"epoch": 2.44,
"learning_rate": 9.524133242692047e-06,
"loss": 0.2057,
"step": 7800
},
{
"epoch": 2.48,
"learning_rate": 9.50713800135962e-06,
"loss": 0.2019,
"step": 7900
},
{
"epoch": 2.51,
"learning_rate": 9.490142760027194e-06,
"loss": 0.203,
"step": 8000
},
{
"epoch": 2.54,
"learning_rate": 9.473147518694767e-06,
"loss": 0.2025,
"step": 8100
},
{
"epoch": 2.57,
"learning_rate": 9.45615227736234e-06,
"loss": 0.2002,
"step": 8200
},
{
"epoch": 2.6,
"learning_rate": 9.439157036029912e-06,
"loss": 0.197,
"step": 8300
},
{
"epoch": 2.63,
"learning_rate": 9.422161794697484e-06,
"loss": 0.2002,
"step": 8400
},
{
"epoch": 2.66,
"learning_rate": 9.405166553365059e-06,
"loss": 0.1953,
"step": 8500
},
{
"epoch": 2.69,
"learning_rate": 9.388171312032631e-06,
"loss": 0.1974,
"step": 8600
},
{
"epoch": 2.73,
"learning_rate": 9.371176070700204e-06,
"loss": 0.1923,
"step": 8700
},
{
"epoch": 2.76,
"learning_rate": 9.354180829367778e-06,
"loss": 0.1936,
"step": 8800
},
{
"epoch": 2.79,
"learning_rate": 9.337185588035351e-06,
"loss": 0.1934,
"step": 8900
},
{
"epoch": 2.82,
"learning_rate": 9.320190346702923e-06,
"loss": 0.1951,
"step": 9000
},
{
"epoch": 2.85,
"learning_rate": 9.303195105370498e-06,
"loss": 0.1926,
"step": 9100
},
{
"epoch": 2.88,
"learning_rate": 9.28619986403807e-06,
"loss": 0.1926,
"step": 9200
},
{
"epoch": 2.91,
"learning_rate": 9.269204622705643e-06,
"loss": 0.1899,
"step": 9300
},
{
"epoch": 2.95,
"learning_rate": 9.252209381373216e-06,
"loss": 0.1896,
"step": 9400
},
{
"epoch": 2.98,
"learning_rate": 9.23521414004079e-06,
"loss": 0.187,
"step": 9500
},
{
"epoch": 3.01,
"learning_rate": 9.218218898708362e-06,
"loss": 0.1861,
"step": 9600
},
{
"epoch": 3.04,
"learning_rate": 9.201223657375935e-06,
"loss": 0.1801,
"step": 9700
},
{
"epoch": 3.07,
"learning_rate": 9.18422841604351e-06,
"loss": 0.1801,
"step": 9800
},
{
"epoch": 3.1,
"learning_rate": 9.167233174711082e-06,
"loss": 0.1808,
"step": 9900
},
{
"epoch": 3.13,
"learning_rate": 9.150237933378655e-06,
"loss": 0.1774,
"step": 10000
},
{
"epoch": 3.13,
"eval_loss": 0.32951679825782776,
"eval_runtime": 64.7018,
"eval_samples_per_second": 138.188,
"eval_steps_per_second": 2.164,
"step": 10000
},
{
"epoch": 3.16,
"learning_rate": 9.133242692046229e-06,
"loss": 0.1785,
"step": 10100
},
{
"epoch": 3.2,
"learning_rate": 9.116247450713801e-06,
"loss": 0.1758,
"step": 10200
},
{
"epoch": 3.23,
"learning_rate": 9.099252209381374e-06,
"loss": 0.179,
"step": 10300
},
{
"epoch": 3.26,
"learning_rate": 9.082256968048947e-06,
"loss": 0.1761,
"step": 10400
},
{
"epoch": 3.29,
"learning_rate": 9.06526172671652e-06,
"loss": 0.1756,
"step": 10500
},
{
"epoch": 3.32,
"learning_rate": 9.048266485384094e-06,
"loss": 0.1766,
"step": 10600
},
{
"epoch": 3.35,
"learning_rate": 9.031271244051666e-06,
"loss": 0.1782,
"step": 10700
},
{
"epoch": 3.38,
"learning_rate": 9.014276002719239e-06,
"loss": 0.1753,
"step": 10800
},
{
"epoch": 3.42,
"learning_rate": 8.997280761386813e-06,
"loss": 0.175,
"step": 10900
},
{
"epoch": 3.45,
"learning_rate": 8.980285520054386e-06,
"loss": 0.1743,
"step": 11000
},
{
"epoch": 3.48,
"learning_rate": 8.963290278721958e-06,
"loss": 0.1705,
"step": 11100
},
{
"epoch": 3.51,
"learning_rate": 8.946295037389533e-06,
"loss": 0.1722,
"step": 11200
},
{
"epoch": 3.54,
"learning_rate": 8.929299796057105e-06,
"loss": 0.1685,
"step": 11300
},
{
"epoch": 3.57,
"learning_rate": 8.912304554724678e-06,
"loss": 0.1689,
"step": 11400
},
{
"epoch": 3.6,
"learning_rate": 8.89530931339225e-06,
"loss": 0.1675,
"step": 11500
},
{
"epoch": 3.63,
"learning_rate": 8.878314072059823e-06,
"loss": 0.1706,
"step": 11600
},
{
"epoch": 3.67,
"learning_rate": 8.861318830727397e-06,
"loss": 0.1683,
"step": 11700
},
{
"epoch": 3.7,
"learning_rate": 8.84432358939497e-06,
"loss": 0.169,
"step": 11800
},
{
"epoch": 3.73,
"learning_rate": 8.827328348062542e-06,
"loss": 0.1675,
"step": 11900
},
{
"epoch": 3.76,
"learning_rate": 8.810333106730117e-06,
"loss": 0.1686,
"step": 12000
},
{
"epoch": 3.79,
"learning_rate": 8.79333786539769e-06,
"loss": 0.167,
"step": 12100
},
{
"epoch": 3.82,
"learning_rate": 8.776342624065262e-06,
"loss": 0.1673,
"step": 12200
},
{
"epoch": 3.85,
"learning_rate": 8.759347382732836e-06,
"loss": 0.168,
"step": 12300
},
{
"epoch": 3.89,
"learning_rate": 8.742352141400409e-06,
"loss": 0.1642,
"step": 12400
},
{
"epoch": 3.92,
"learning_rate": 8.725356900067981e-06,
"loss": 0.1661,
"step": 12500
},
{
"epoch": 3.95,
"learning_rate": 8.708361658735554e-06,
"loss": 0.1645,
"step": 12600
},
{
"epoch": 3.98,
"learning_rate": 8.691366417403128e-06,
"loss": 0.1645,
"step": 12700
},
{
"epoch": 4.01,
"learning_rate": 8.674371176070701e-06,
"loss": 0.1607,
"step": 12800
},
{
"epoch": 4.04,
"learning_rate": 8.657375934738274e-06,
"loss": 0.1595,
"step": 12900
},
{
"epoch": 4.07,
"learning_rate": 8.640380693405848e-06,
"loss": 0.1582,
"step": 13000
},
{
"epoch": 4.1,
"learning_rate": 8.62338545207342e-06,
"loss": 0.1592,
"step": 13100
},
{
"epoch": 4.14,
"learning_rate": 8.606390210740993e-06,
"loss": 0.1564,
"step": 13200
},
{
"epoch": 4.17,
"learning_rate": 8.589394969408567e-06,
"loss": 0.158,
"step": 13300
},
{
"epoch": 4.2,
"learning_rate": 8.57239972807614e-06,
"loss": 0.156,
"step": 13400
},
{
"epoch": 4.23,
"learning_rate": 8.555404486743713e-06,
"loss": 0.1542,
"step": 13500
},
{
"epoch": 4.26,
"learning_rate": 8.538409245411285e-06,
"loss": 0.1551,
"step": 13600
},
{
"epoch": 4.29,
"learning_rate": 8.521414004078858e-06,
"loss": 0.1558,
"step": 13700
},
{
"epoch": 4.32,
"learning_rate": 8.504418762746432e-06,
"loss": 0.1531,
"step": 13800
},
{
"epoch": 4.36,
"learning_rate": 8.487423521414005e-06,
"loss": 0.1548,
"step": 13900
},
{
"epoch": 4.39,
"learning_rate": 8.470428280081577e-06,
"loss": 0.1554,
"step": 14000
},
{
"epoch": 4.42,
"learning_rate": 8.453433038749152e-06,
"loss": 0.1555,
"step": 14100
},
{
"epoch": 4.45,
"learning_rate": 8.436437797416724e-06,
"loss": 0.1532,
"step": 14200
},
{
"epoch": 4.48,
"learning_rate": 8.419442556084297e-06,
"loss": 0.1521,
"step": 14300
},
{
"epoch": 4.51,
"learning_rate": 8.402447314751871e-06,
"loss": 0.1543,
"step": 14400
},
{
"epoch": 4.54,
"learning_rate": 8.385452073419444e-06,
"loss": 0.1519,
"step": 14500
},
{
"epoch": 4.57,
"learning_rate": 8.368456832087016e-06,
"loss": 0.1536,
"step": 14600
},
{
"epoch": 4.61,
"learning_rate": 8.351461590754589e-06,
"loss": 0.1518,
"step": 14700
},
{
"epoch": 4.64,
"learning_rate": 8.334466349422161e-06,
"loss": 0.1516,
"step": 14800
},
{
"epoch": 4.67,
"learning_rate": 8.317471108089736e-06,
"loss": 0.1502,
"step": 14900
},
{
"epoch": 4.7,
"learning_rate": 8.300475866757308e-06,
"loss": 0.1485,
"step": 15000
},
{
"epoch": 4.7,
"eval_loss": 0.29357945919036865,
"eval_runtime": 82.3606,
"eval_samples_per_second": 108.559,
"eval_steps_per_second": 1.7,
"step": 15000
},
{
"epoch": 4.73,
"learning_rate": 8.283480625424881e-06,
"loss": 0.1515,
"step": 15100
},
{
"epoch": 4.76,
"learning_rate": 8.266485384092455e-06,
"loss": 0.1488,
"step": 15200
},
{
"epoch": 4.79,
"learning_rate": 8.249490142760028e-06,
"loss": 0.1504,
"step": 15300
},
{
"epoch": 4.83,
"learning_rate": 8.2324949014276e-06,
"loss": 0.1449,
"step": 15400
},
{
"epoch": 4.86,
"learning_rate": 8.215499660095175e-06,
"loss": 0.1504,
"step": 15500
},
{
"epoch": 4.89,
"learning_rate": 8.198504418762747e-06,
"loss": 0.1472,
"step": 15600
},
{
"epoch": 4.92,
"learning_rate": 8.18150917743032e-06,
"loss": 0.1502,
"step": 15700
},
{
"epoch": 4.95,
"learning_rate": 8.164513936097893e-06,
"loss": 0.1461,
"step": 15800
},
{
"epoch": 4.98,
"learning_rate": 8.147518694765467e-06,
"loss": 0.1475,
"step": 15900
},
{
"epoch": 5.01,
"learning_rate": 8.13052345343304e-06,
"loss": 0.1449,
"step": 16000
},
{
"epoch": 5.04,
"learning_rate": 8.113528212100612e-06,
"loss": 0.1402,
"step": 16100
},
{
"epoch": 5.08,
"learning_rate": 8.096532970768186e-06,
"loss": 0.1423,
"step": 16200
},
{
"epoch": 5.11,
"learning_rate": 8.079537729435759e-06,
"loss": 0.1402,
"step": 16300
},
{
"epoch": 5.14,
"learning_rate": 8.062542488103332e-06,
"loss": 0.1407,
"step": 16400
},
{
"epoch": 5.17,
"learning_rate": 8.045547246770906e-06,
"loss": 0.1408,
"step": 16500
},
{
"epoch": 5.2,
"learning_rate": 8.028552005438478e-06,
"loss": 0.1415,
"step": 16600
},
{
"epoch": 5.23,
"learning_rate": 8.011556764106051e-06,
"loss": 0.1405,
"step": 16700
},
{
"epoch": 5.26,
"learning_rate": 7.994561522773624e-06,
"loss": 0.141,
"step": 16800
},
{
"epoch": 5.3,
"learning_rate": 7.977566281441196e-06,
"loss": 0.1423,
"step": 16900
},
{
"epoch": 5.33,
"learning_rate": 7.96057104010877e-06,
"loss": 0.144,
"step": 17000
},
{
"epoch": 5.36,
"learning_rate": 7.943575798776343e-06,
"loss": 0.1394,
"step": 17100
},
{
"epoch": 5.39,
"learning_rate": 7.926580557443916e-06,
"loss": 0.1396,
"step": 17200
},
{
"epoch": 5.42,
"learning_rate": 7.90958531611149e-06,
"loss": 0.1393,
"step": 17300
},
{
"epoch": 5.45,
"learning_rate": 7.892590074779063e-06,
"loss": 0.138,
"step": 17400
},
{
"epoch": 5.48,
"learning_rate": 7.875594833446635e-06,
"loss": 0.1393,
"step": 17500
},
{
"epoch": 5.51,
"learning_rate": 7.85859959211421e-06,
"loss": 0.1403,
"step": 17600
},
{
"epoch": 5.55,
"learning_rate": 7.841604350781782e-06,
"loss": 0.1357,
"step": 17700
},
{
"epoch": 5.58,
"learning_rate": 7.824609109449355e-06,
"loss": 0.1368,
"step": 17800
},
{
"epoch": 5.61,
"learning_rate": 7.807613868116927e-06,
"loss": 0.1371,
"step": 17900
},
{
"epoch": 5.64,
"learning_rate": 7.790618626784502e-06,
"loss": 0.1384,
"step": 18000
},
{
"epoch": 5.67,
"learning_rate": 7.773623385452074e-06,
"loss": 0.1356,
"step": 18100
},
{
"epoch": 5.7,
"learning_rate": 7.756628144119647e-06,
"loss": 0.1378,
"step": 18200
},
{
"epoch": 5.73,
"learning_rate": 7.73963290278722e-06,
"loss": 0.1375,
"step": 18300
},
{
"epoch": 5.77,
"learning_rate": 7.722637661454794e-06,
"loss": 0.1377,
"step": 18400
},
{
"epoch": 5.8,
"learning_rate": 7.705642420122366e-06,
"loss": 0.1365,
"step": 18500
},
{
"epoch": 5.83,
"learning_rate": 7.688647178789939e-06,
"loss": 0.138,
"step": 18600
},
{
"epoch": 5.86,
"learning_rate": 7.671651937457513e-06,
"loss": 0.1344,
"step": 18700
},
{
"epoch": 5.89,
"learning_rate": 7.654656696125086e-06,
"loss": 0.1328,
"step": 18800
},
{
"epoch": 5.92,
"learning_rate": 7.637661454792658e-06,
"loss": 0.1363,
"step": 18900
},
{
"epoch": 5.95,
"learning_rate": 7.620666213460232e-06,
"loss": 0.1366,
"step": 19000
},
{
"epoch": 5.98,
"learning_rate": 7.6036709721278044e-06,
"loss": 0.1356,
"step": 19100
},
{
"epoch": 6.02,
"learning_rate": 7.586675730795378e-06,
"loss": 0.1326,
"step": 19200
},
{
"epoch": 6.05,
"learning_rate": 7.5696804894629505e-06,
"loss": 0.1288,
"step": 19300
},
{
"epoch": 6.08,
"learning_rate": 7.552685248130525e-06,
"loss": 0.1304,
"step": 19400
},
{
"epoch": 6.11,
"learning_rate": 7.535690006798097e-06,
"loss": 0.1287,
"step": 19500
},
{
"epoch": 6.14,
"learning_rate": 7.518694765465671e-06,
"loss": 0.13,
"step": 19600
},
{
"epoch": 6.17,
"learning_rate": 7.5016995241332434e-06,
"loss": 0.127,
"step": 19700
},
{
"epoch": 6.2,
"learning_rate": 7.484704282800817e-06,
"loss": 0.1301,
"step": 19800
},
{
"epoch": 6.24,
"learning_rate": 7.4677090414683895e-06,
"loss": 0.1295,
"step": 19900
},
{
"epoch": 6.27,
"learning_rate": 7.450713800135962e-06,
"loss": 0.1316,
"step": 20000
},
{
"epoch": 6.27,
"eval_loss": 0.27446937561035156,
"eval_runtime": 83.4787,
"eval_samples_per_second": 107.105,
"eval_steps_per_second": 1.677,
"step": 20000
},
{
"epoch": 6.3,
"learning_rate": 7.4337185588035355e-06,
"loss": 0.1278,
"step": 20100
},
{
"epoch": 6.33,
"learning_rate": 7.416723317471108e-06,
"loss": 0.1273,
"step": 20200
},
{
"epoch": 6.36,
"learning_rate": 7.399728076138682e-06,
"loss": 0.1309,
"step": 20300
},
{
"epoch": 6.39,
"learning_rate": 7.382732834806254e-06,
"loss": 0.127,
"step": 20400
},
{
"epoch": 6.42,
"learning_rate": 7.3657375934738285e-06,
"loss": 0.1278,
"step": 20500
},
{
"epoch": 6.45,
"learning_rate": 7.348742352141401e-06,
"loss": 0.1286,
"step": 20600
},
{
"epoch": 6.49,
"learning_rate": 7.3317471108089745e-06,
"loss": 0.1284,
"step": 20700
},
{
"epoch": 6.52,
"learning_rate": 7.314751869476547e-06,
"loss": 0.1289,
"step": 20800
},
{
"epoch": 6.55,
"learning_rate": 7.297756628144121e-06,
"loss": 0.1278,
"step": 20900
},
{
"epoch": 6.58,
"learning_rate": 7.280761386811693e-06,
"loss": 0.1271,
"step": 21000
},
{
"epoch": 6.61,
"learning_rate": 7.263766145479267e-06,
"loss": 0.1263,
"step": 21100
},
{
"epoch": 6.64,
"learning_rate": 7.246770904146839e-06,
"loss": 0.1261,
"step": 21200
},
{
"epoch": 6.67,
"learning_rate": 7.229775662814412e-06,
"loss": 0.1304,
"step": 21300
},
{
"epoch": 6.71,
"learning_rate": 7.212780421481985e-06,
"loss": 0.1264,
"step": 21400
},
{
"epoch": 6.74,
"learning_rate": 7.195785180149558e-06,
"loss": 0.1264,
"step": 21500
},
{
"epoch": 6.77,
"learning_rate": 7.178789938817132e-06,
"loss": 0.1264,
"step": 21600
},
{
"epoch": 6.8,
"learning_rate": 7.161794697484706e-06,
"loss": 0.1255,
"step": 21700
},
{
"epoch": 6.83,
"learning_rate": 7.144799456152278e-06,
"loss": 0.1263,
"step": 21800
},
{
"epoch": 6.86,
"learning_rate": 7.127804214819851e-06,
"loss": 0.1286,
"step": 21900
},
{
"epoch": 6.89,
"learning_rate": 7.110808973487424e-06,
"loss": 0.1248,
"step": 22000
},
{
"epoch": 6.92,
"learning_rate": 7.093813732154997e-06,
"loss": 0.1254,
"step": 22100
},
{
"epoch": 6.96,
"learning_rate": 7.07681849082257e-06,
"loss": 0.1235,
"step": 22200
},
{
"epoch": 6.99,
"learning_rate": 7.059823249490143e-06,
"loss": 0.1275,
"step": 22300
},
{
"epoch": 7.02,
"learning_rate": 7.042828008157716e-06,
"loss": 0.1222,
"step": 22400
},
{
"epoch": 7.05,
"learning_rate": 7.025832766825289e-06,
"loss": 0.1198,
"step": 22500
},
{
"epoch": 7.08,
"learning_rate": 7.008837525492863e-06,
"loss": 0.1207,
"step": 22600
},
{
"epoch": 7.11,
"learning_rate": 6.991842284160436e-06,
"loss": 0.1174,
"step": 22700
},
{
"epoch": 7.14,
"learning_rate": 6.974847042828009e-06,
"loss": 0.1203,
"step": 22800
},
{
"epoch": 7.18,
"learning_rate": 6.957851801495582e-06,
"loss": 0.1224,
"step": 22900
},
{
"epoch": 7.21,
"learning_rate": 6.940856560163155e-06,
"loss": 0.1171,
"step": 23000
},
{
"epoch": 7.24,
"learning_rate": 6.923861318830728e-06,
"loss": 0.119,
"step": 23100
},
{
"epoch": 7.27,
"learning_rate": 6.9068660774983006e-06,
"loss": 0.1211,
"step": 23200
},
{
"epoch": 7.3,
"learning_rate": 6.889870836165874e-06,
"loss": 0.1204,
"step": 23300
},
{
"epoch": 7.33,
"learning_rate": 6.872875594833447e-06,
"loss": 0.1174,
"step": 23400
},
{
"epoch": 7.36,
"learning_rate": 6.85588035350102e-06,
"loss": 0.1179,
"step": 23500
},
{
"epoch": 7.39,
"learning_rate": 6.838885112168593e-06,
"loss": 0.121,
"step": 23600
},
{
"epoch": 7.43,
"learning_rate": 6.821889870836167e-06,
"loss": 0.1195,
"step": 23700
},
{
"epoch": 7.46,
"learning_rate": 6.8048946295037396e-06,
"loss": 0.1188,
"step": 23800
},
{
"epoch": 7.49,
"learning_rate": 6.787899388171313e-06,
"loss": 0.119,
"step": 23900
},
{
"epoch": 7.52,
"learning_rate": 6.770904146838886e-06,
"loss": 0.1194,
"step": 24000
},
{
"epoch": 7.55,
"learning_rate": 6.753908905506459e-06,
"loss": 0.1185,
"step": 24100
},
{
"epoch": 7.58,
"learning_rate": 6.736913664174032e-06,
"loss": 0.1185,
"step": 24200
},
{
"epoch": 7.61,
"learning_rate": 6.719918422841605e-06,
"loss": 0.1185,
"step": 24300
},
{
"epoch": 7.65,
"learning_rate": 6.702923181509178e-06,
"loss": 0.1189,
"step": 24400
},
{
"epoch": 7.68,
"learning_rate": 6.685927940176751e-06,
"loss": 0.1189,
"step": 24500
},
{
"epoch": 7.71,
"learning_rate": 6.668932698844324e-06,
"loss": 0.1191,
"step": 24600
},
{
"epoch": 7.74,
"learning_rate": 6.651937457511896e-06,
"loss": 0.1189,
"step": 24700
},
{
"epoch": 7.77,
"learning_rate": 6.634942216179471e-06,
"loss": 0.1201,
"step": 24800
},
{
"epoch": 7.8,
"learning_rate": 6.617946974847044e-06,
"loss": 0.1192,
"step": 24900
},
{
"epoch": 7.83,
"learning_rate": 6.600951733514617e-06,
"loss": 0.1187,
"step": 25000
},
{
"epoch": 7.83,
"eval_loss": 0.2598571479320526,
"eval_runtime": 81.4111,
"eval_samples_per_second": 109.825,
"eval_steps_per_second": 1.72,
"step": 25000
},
{
"epoch": 7.86,
"learning_rate": 6.583956492182189e-06,
"loss": 0.1201,
"step": 25100
},
{
"epoch": 7.9,
"learning_rate": 6.566961250849763e-06,
"loss": 0.1177,
"step": 25200
},
{
"epoch": 7.93,
"learning_rate": 6.549966009517335e-06,
"loss": 0.119,
"step": 25300
},
{
"epoch": 7.96,
"learning_rate": 6.532970768184909e-06,
"loss": 0.1192,
"step": 25400
},
{
"epoch": 7.99,
"learning_rate": 6.515975526852481e-06,
"loss": 0.1155,
"step": 25500
},
{
"epoch": 8.02,
"learning_rate": 6.498980285520055e-06,
"loss": 0.1127,
"step": 25600
},
{
"epoch": 8.05,
"learning_rate": 6.4819850441876274e-06,
"loss": 0.1126,
"step": 25700
},
{
"epoch": 8.08,
"learning_rate": 6.464989802855202e-06,
"loss": 0.1129,
"step": 25800
},
{
"epoch": 8.12,
"learning_rate": 6.447994561522774e-06,
"loss": 0.1138,
"step": 25900
},
{
"epoch": 8.15,
"learning_rate": 6.430999320190348e-06,
"loss": 0.1124,
"step": 26000
},
{
"epoch": 8.18,
"learning_rate": 6.41400407885792e-06,
"loss": 0.1113,
"step": 26100
},
{
"epoch": 8.21,
"learning_rate": 6.397008837525494e-06,
"loss": 0.1125,
"step": 26200
},
{
"epoch": 8.24,
"learning_rate": 6.3800135961930664e-06,
"loss": 0.1126,
"step": 26300
},
{
"epoch": 8.27,
"learning_rate": 6.36301835486064e-06,
"loss": 0.1136,
"step": 26400
},
{
"epoch": 8.3,
"learning_rate": 6.3460231135282125e-06,
"loss": 0.1137,
"step": 26500
},
{
"epoch": 8.33,
"learning_rate": 6.329027872195785e-06,
"loss": 0.1136,
"step": 26600
},
{
"epoch": 8.37,
"learning_rate": 6.3120326308633585e-06,
"loss": 0.1108,
"step": 26700
},
{
"epoch": 8.4,
"learning_rate": 6.295037389530931e-06,
"loss": 0.1117,
"step": 26800
},
{
"epoch": 8.43,
"learning_rate": 6.278042148198505e-06,
"loss": 0.1114,
"step": 26900
},
{
"epoch": 8.46,
"learning_rate": 6.261046906866078e-06,
"loss": 0.1135,
"step": 27000
},
{
"epoch": 8.49,
"learning_rate": 6.2440516655336515e-06,
"loss": 0.1104,
"step": 27100
},
{
"epoch": 8.52,
"learning_rate": 6.227056424201224e-06,
"loss": 0.1122,
"step": 27200
},
{
"epoch": 8.55,
"learning_rate": 6.2100611828687975e-06,
"loss": 0.111,
"step": 27300
},
{
"epoch": 8.59,
"learning_rate": 6.19306594153637e-06,
"loss": 0.1118,
"step": 27400
},
{
"epoch": 8.62,
"learning_rate": 6.1760707002039436e-06,
"loss": 0.1117,
"step": 27500
},
{
"epoch": 8.65,
"learning_rate": 6.159075458871516e-06,
"loss": 0.1102,
"step": 27600
},
{
"epoch": 8.68,
"learning_rate": 6.14208021753909e-06,
"loss": 0.11,
"step": 27700
},
{
"epoch": 8.71,
"learning_rate": 6.125084976206662e-06,
"loss": 0.1087,
"step": 27800
},
{
"epoch": 8.74,
"learning_rate": 6.108089734874235e-06,
"loss": 0.1096,
"step": 27900
},
{
"epoch": 8.77,
"learning_rate": 6.091094493541809e-06,
"loss": 0.1119,
"step": 28000
},
{
"epoch": 8.8,
"learning_rate": 6.0740992522093826e-06,
"loss": 0.1102,
"step": 28100
},
{
"epoch": 8.84,
"learning_rate": 6.057104010876955e-06,
"loss": 0.1091,
"step": 28200
},
{
"epoch": 8.87,
"learning_rate": 6.040108769544528e-06,
"loss": 0.1116,
"step": 28300
},
{
"epoch": 8.9,
"learning_rate": 6.023113528212101e-06,
"loss": 0.1096,
"step": 28400
},
{
"epoch": 8.93,
"learning_rate": 6.006118286879674e-06,
"loss": 0.1116,
"step": 28500
},
{
"epoch": 8.96,
"learning_rate": 5.989123045547247e-06,
"loss": 0.11,
"step": 28600
},
{
"epoch": 8.99,
"learning_rate": 5.97212780421482e-06,
"loss": 0.1112,
"step": 28700
},
{
"epoch": 9.02,
"learning_rate": 5.955132562882393e-06,
"loss": 0.1081,
"step": 28800
},
{
"epoch": 9.06,
"learning_rate": 5.938137321549966e-06,
"loss": 0.1053,
"step": 28900
},
{
"epoch": 9.09,
"learning_rate": 5.92114208021754e-06,
"loss": 0.106,
"step": 29000
},
{
"epoch": 9.12,
"learning_rate": 5.904146838885113e-06,
"loss": 0.1043,
"step": 29100
},
{
"epoch": 9.15,
"learning_rate": 5.887151597552686e-06,
"loss": 0.1066,
"step": 29200
},
{
"epoch": 9.18,
"learning_rate": 5.870156356220259e-06,
"loss": 0.1043,
"step": 29300
},
{
"epoch": 9.21,
"learning_rate": 5.853161114887832e-06,
"loss": 0.1042,
"step": 29400
},
{
"epoch": 9.24,
"learning_rate": 5.836165873555405e-06,
"loss": 0.1086,
"step": 29500
},
{
"epoch": 9.27,
"learning_rate": 5.819170632222978e-06,
"loss": 0.1073,
"step": 29600
},
{
"epoch": 9.31,
"learning_rate": 5.802175390890551e-06,
"loss": 0.1059,
"step": 29700
},
{
"epoch": 9.34,
"learning_rate": 5.7851801495581236e-06,
"loss": 0.1069,
"step": 29800
},
{
"epoch": 9.37,
"learning_rate": 5.768184908225697e-06,
"loss": 0.109,
"step": 29900
},
{
"epoch": 9.4,
"learning_rate": 5.75118966689327e-06,
"loss": 0.1037,
"step": 30000
},
{
"epoch": 9.4,
"eval_loss": 0.24955707788467407,
"eval_runtime": 72.5131,
"eval_samples_per_second": 123.302,
"eval_steps_per_second": 1.931,
"step": 30000
},
{
"epoch": 9.43,
"learning_rate": 5.734194425560844e-06,
"loss": 0.1047,
"step": 30100
},
{
"epoch": 9.46,
"learning_rate": 5.7171991842284165e-06,
"loss": 0.1045,
"step": 30200
},
{
"epoch": 9.49,
"learning_rate": 5.70020394289599e-06,
"loss": 0.1055,
"step": 30300
},
{
"epoch": 9.53,
"learning_rate": 5.6832087015635625e-06,
"loss": 0.1038,
"step": 30400
},
{
"epoch": 9.56,
"learning_rate": 5.666213460231136e-06,
"loss": 0.1058,
"step": 30500
},
{
"epoch": 9.59,
"learning_rate": 5.649218218898709e-06,
"loss": 0.1044,
"step": 30600
},
{
"epoch": 9.62,
"learning_rate": 5.632222977566282e-06,
"loss": 0.1042,
"step": 30700
},
{
"epoch": 9.65,
"learning_rate": 5.615227736233855e-06,
"loss": 0.1048,
"step": 30800
},
{
"epoch": 9.68,
"learning_rate": 5.598232494901428e-06,
"loss": 0.107,
"step": 30900
},
{
"epoch": 9.71,
"learning_rate": 5.581237253569001e-06,
"loss": 0.1061,
"step": 31000
},
{
"epoch": 9.74,
"learning_rate": 5.564242012236573e-06,
"loss": 0.1032,
"step": 31100
},
{
"epoch": 9.78,
"learning_rate": 5.547246770904148e-06,
"loss": 0.1042,
"step": 31200
},
{
"epoch": 9.81,
"learning_rate": 5.530251529571721e-06,
"loss": 0.1045,
"step": 31300
},
{
"epoch": 9.84,
"learning_rate": 5.513256288239294e-06,
"loss": 0.1042,
"step": 31400
},
{
"epoch": 9.87,
"learning_rate": 5.496261046906867e-06,
"loss": 0.1058,
"step": 31500
},
{
"epoch": 9.9,
"learning_rate": 5.47926580557444e-06,
"loss": 0.104,
"step": 31600
},
{
"epoch": 9.93,
"learning_rate": 5.462270564242012e-06,
"loss": 0.104,
"step": 31700
},
{
"epoch": 9.96,
"learning_rate": 5.445275322909586e-06,
"loss": 0.1039,
"step": 31800
},
{
"epoch": 10.0,
"learning_rate": 5.428280081577158e-06,
"loss": 0.1041,
"step": 31900
},
{
"epoch": 10.03,
"learning_rate": 5.411284840244732e-06,
"loss": 0.0994,
"step": 32000
},
{
"epoch": 10.06,
"learning_rate": 5.394289598912304e-06,
"loss": 0.1007,
"step": 32100
},
{
"epoch": 10.09,
"learning_rate": 5.377294357579879e-06,
"loss": 0.1016,
"step": 32200
},
{
"epoch": 10.12,
"learning_rate": 5.360299116247451e-06,
"loss": 0.0977,
"step": 32300
},
{
"epoch": 10.15,
"learning_rate": 5.343303874915025e-06,
"loss": 0.0992,
"step": 32400
},
{
"epoch": 10.18,
"learning_rate": 5.326308633582597e-06,
"loss": 0.1005,
"step": 32500
},
{
"epoch": 10.21,
"learning_rate": 5.309313392250171e-06,
"loss": 0.0987,
"step": 32600
},
{
"epoch": 10.25,
"learning_rate": 5.292318150917743e-06,
"loss": 0.1007,
"step": 32700
},
{
"epoch": 10.28,
"learning_rate": 5.275322909585317e-06,
"loss": 0.0996,
"step": 32800
},
{
"epoch": 10.31,
"learning_rate": 5.258327668252889e-06,
"loss": 0.0982,
"step": 32900
},
{
"epoch": 10.34,
"learning_rate": 5.241332426920462e-06,
"loss": 0.103,
"step": 33000
},
{
"epoch": 10.37,
"learning_rate": 5.2243371855880355e-06,
"loss": 0.0993,
"step": 33100
},
{
"epoch": 10.4,
"learning_rate": 5.207341944255608e-06,
"loss": 0.0986,
"step": 33200
},
{
"epoch": 10.43,
"learning_rate": 5.190346702923182e-06,
"loss": 0.1006,
"step": 33300
},
{
"epoch": 10.47,
"learning_rate": 5.173351461590756e-06,
"loss": 0.0995,
"step": 33400
},
{
"epoch": 10.5,
"learning_rate": 5.156356220258328e-06,
"loss": 0.1022,
"step": 33500
},
{
"epoch": 10.53,
"learning_rate": 5.139360978925901e-06,
"loss": 0.1001,
"step": 33600
},
{
"epoch": 10.56,
"learning_rate": 5.1223657375934745e-06,
"loss": 0.0998,
"step": 33700
},
{
"epoch": 10.59,
"learning_rate": 5.105370496261047e-06,
"loss": 0.0978,
"step": 33800
},
{
"epoch": 10.62,
"learning_rate": 5.0883752549286205e-06,
"loss": 0.0997,
"step": 33900
},
{
"epoch": 10.65,
"learning_rate": 5.071380013596193e-06,
"loss": 0.1017,
"step": 34000
},
{
"epoch": 10.68,
"learning_rate": 5.0543847722637666e-06,
"loss": 0.0988,
"step": 34100
},
{
"epoch": 10.72,
"learning_rate": 5.037389530931339e-06,
"loss": 0.102,
"step": 34200
},
{
"epoch": 10.75,
"learning_rate": 5.020394289598912e-06,
"loss": 0.0986,
"step": 34300
},
{
"epoch": 10.78,
"learning_rate": 5.003399048266486e-06,
"loss": 0.0986,
"step": 34400
},
{
"epoch": 10.81,
"learning_rate": 4.986403806934059e-06,
"loss": 0.0983,
"step": 34500
},
{
"epoch": 10.84,
"learning_rate": 4.969408565601632e-06,
"loss": 0.0999,
"step": 34600
},
{
"epoch": 10.87,
"learning_rate": 4.9524133242692056e-06,
"loss": 0.0994,
"step": 34700
},
{
"epoch": 10.9,
"learning_rate": 4.935418082936778e-06,
"loss": 0.1001,
"step": 34800
},
{
"epoch": 10.94,
"learning_rate": 4.918422841604351e-06,
"loss": 0.0997,
"step": 34900
},
{
"epoch": 10.97,
"learning_rate": 4.901427600271924e-06,
"loss": 0.1011,
"step": 35000
},
{
"epoch": 10.97,
"eval_loss": 0.2406775802373886,
"eval_runtime": 78.2369,
"eval_samples_per_second": 114.281,
"eval_steps_per_second": 1.789,
"step": 35000
},
{
"epoch": 11.0,
"learning_rate": 4.884432358939497e-06,
"loss": 0.1,
"step": 35100
},
{
"epoch": 11.03,
"learning_rate": 4.86743711760707e-06,
"loss": 0.0959,
"step": 35200
},
{
"epoch": 11.06,
"learning_rate": 4.850441876274644e-06,
"loss": 0.094,
"step": 35300
},
{
"epoch": 11.09,
"learning_rate": 4.833446634942216e-06,
"loss": 0.0953,
"step": 35400
},
{
"epoch": 11.12,
"learning_rate": 4.81645139360979e-06,
"loss": 0.0948,
"step": 35500
},
{
"epoch": 11.15,
"learning_rate": 4.799456152277362e-06,
"loss": 0.0932,
"step": 35600
},
{
"epoch": 11.19,
"learning_rate": 4.782460910944936e-06,
"loss": 0.095,
"step": 35700
},
{
"epoch": 11.22,
"learning_rate": 4.765465669612509e-06,
"loss": 0.0953,
"step": 35800
},
{
"epoch": 11.25,
"learning_rate": 4.748470428280082e-06,
"loss": 0.0952,
"step": 35900
},
{
"epoch": 11.28,
"learning_rate": 4.731475186947655e-06,
"loss": 0.0941,
"step": 36000
},
{
"epoch": 11.31,
"learning_rate": 4.714479945615228e-06,
"loss": 0.096,
"step": 36100
},
{
"epoch": 11.34,
"learning_rate": 4.697484704282801e-06,
"loss": 0.0948,
"step": 36200
},
{
"epoch": 11.37,
"learning_rate": 4.680489462950375e-06,
"loss": 0.0945,
"step": 36300
},
{
"epoch": 11.41,
"learning_rate": 4.663494221617947e-06,
"loss": 0.0962,
"step": 36400
},
{
"epoch": 11.44,
"learning_rate": 4.64649898028552e-06,
"loss": 0.0949,
"step": 36500
},
{
"epoch": 11.47,
"learning_rate": 4.6295037389530934e-06,
"loss": 0.0953,
"step": 36600
},
{
"epoch": 11.5,
"learning_rate": 4.612508497620666e-06,
"loss": 0.0944,
"step": 36700
},
{
"epoch": 11.53,
"learning_rate": 4.5955132562882395e-06,
"loss": 0.0963,
"step": 36800
},
{
"epoch": 11.56,
"learning_rate": 4.578518014955813e-06,
"loss": 0.096,
"step": 36900
},
{
"epoch": 11.59,
"learning_rate": 4.5615227736233855e-06,
"loss": 0.0936,
"step": 37000
},
{
"epoch": 11.62,
"learning_rate": 4.544527532290959e-06,
"loss": 0.0982,
"step": 37100
},
{
"epoch": 11.66,
"learning_rate": 4.527532290958532e-06,
"loss": 0.0951,
"step": 37200
},
{
"epoch": 11.69,
"learning_rate": 4.510537049626105e-06,
"loss": 0.0943,
"step": 37300
},
{
"epoch": 11.72,
"learning_rate": 4.4935418082936785e-06,
"loss": 0.094,
"step": 37400
},
{
"epoch": 11.75,
"learning_rate": 4.476546566961251e-06,
"loss": 0.095,
"step": 37500
},
{
"epoch": 11.78,
"learning_rate": 4.4595513256288245e-06,
"loss": 0.0936,
"step": 37600
},
{
"epoch": 11.81,
"learning_rate": 4.442556084296397e-06,
"loss": 0.0953,
"step": 37700
},
{
"epoch": 11.84,
"learning_rate": 4.425560842963971e-06,
"loss": 0.0934,
"step": 37800
},
{
"epoch": 11.88,
"learning_rate": 4.408565601631544e-06,
"loss": 0.0941,
"step": 37900
},
{
"epoch": 11.91,
"learning_rate": 4.391570360299117e-06,
"loss": 0.095,
"step": 38000
},
{
"epoch": 11.94,
"learning_rate": 4.374575118966689e-06,
"loss": 0.0942,
"step": 38100
},
{
"epoch": 11.97,
"learning_rate": 4.357579877634263e-06,
"loss": 0.0967,
"step": 38200
},
{
"epoch": 12.0,
"learning_rate": 4.340584636301835e-06,
"loss": 0.0935,
"step": 38300
},
{
"epoch": 12.03,
"learning_rate": 4.323589394969409e-06,
"loss": 0.0911,
"step": 38400
},
{
"epoch": 12.06,
"learning_rate": 4.306594153636982e-06,
"loss": 0.0905,
"step": 38500
},
{
"epoch": 12.09,
"learning_rate": 4.289598912304555e-06,
"loss": 0.0913,
"step": 38600
},
{
"epoch": 12.13,
"learning_rate": 4.272603670972128e-06,
"loss": 0.0922,
"step": 38700
},
{
"epoch": 12.16,
"learning_rate": 4.255608429639701e-06,
"loss": 0.092,
"step": 38800
},
{
"epoch": 12.19,
"learning_rate": 4.238613188307274e-06,
"loss": 0.0911,
"step": 38900
},
{
"epoch": 12.22,
"learning_rate": 4.221617946974848e-06,
"loss": 0.092,
"step": 39000
},
{
"epoch": 12.25,
"learning_rate": 4.20462270564242e-06,
"loss": 0.0917,
"step": 39100
},
{
"epoch": 12.28,
"learning_rate": 4.187627464309994e-06,
"loss": 0.0909,
"step": 39200
},
{
"epoch": 12.31,
"learning_rate": 4.170632222977566e-06,
"loss": 0.0913,
"step": 39300
},
{
"epoch": 12.35,
"learning_rate": 4.15363698164514e-06,
"loss": 0.0904,
"step": 39400
},
{
"epoch": 12.38,
"learning_rate": 4.136641740312713e-06,
"loss": 0.089,
"step": 39500
},
{
"epoch": 12.41,
"learning_rate": 4.119646498980286e-06,
"loss": 0.0909,
"step": 39600
},
{
"epoch": 12.44,
"learning_rate": 4.1026512576478585e-06,
"loss": 0.0907,
"step": 39700
},
{
"epoch": 12.47,
"learning_rate": 4.085656016315432e-06,
"loss": 0.09,
"step": 39800
},
{
"epoch": 12.5,
"learning_rate": 4.0686607749830045e-06,
"loss": 0.0897,
"step": 39900
},
{
"epoch": 12.53,
"learning_rate": 4.051665533650578e-06,
"loss": 0.0905,
"step": 40000
},
{
"epoch": 12.53,
"eval_loss": 0.23563739657402039,
"eval_runtime": 76.2602,
"eval_samples_per_second": 117.243,
"eval_steps_per_second": 1.836,
"step": 40000
},
{
"epoch": 12.56,
"learning_rate": 4.034670292318151e-06,
"loss": 0.091,
"step": 40100
},
{
"epoch": 12.6,
"learning_rate": 4.017675050985724e-06,
"loss": 0.0913,
"step": 40200
},
{
"epoch": 12.63,
"learning_rate": 4.0006798096532975e-06,
"loss": 0.0901,
"step": 40300
},
{
"epoch": 12.66,
"learning_rate": 3.98368456832087e-06,
"loss": 0.0907,
"step": 40400
},
{
"epoch": 12.69,
"learning_rate": 3.9666893269884435e-06,
"loss": 0.0899,
"step": 40500
},
{
"epoch": 12.72,
"learning_rate": 3.949694085656017e-06,
"loss": 0.0891,
"step": 40600
},
{
"epoch": 12.75,
"learning_rate": 3.9326988443235896e-06,
"loss": 0.0906,
"step": 40700
},
{
"epoch": 12.78,
"learning_rate": 3.915703602991163e-06,
"loss": 0.0918,
"step": 40800
},
{
"epoch": 12.82,
"learning_rate": 3.898708361658736e-06,
"loss": 0.0908,
"step": 40900
},
{
"epoch": 12.85,
"learning_rate": 3.881713120326309e-06,
"loss": 0.0883,
"step": 41000
},
{
"epoch": 12.88,
"learning_rate": 3.8647178789938825e-06,
"loss": 0.0903,
"step": 41100
},
{
"epoch": 12.91,
"learning_rate": 3.847722637661455e-06,
"loss": 0.0906,
"step": 41200
},
{
"epoch": 12.94,
"learning_rate": 3.8307273963290285e-06,
"loss": 0.0919,
"step": 41300
},
{
"epoch": 12.97,
"learning_rate": 3.813732154996601e-06,
"loss": 0.0896,
"step": 41400
},
{
"epoch": 13.0,
"learning_rate": 3.796736913664174e-06,
"loss": 0.0908,
"step": 41500
},
{
"epoch": 13.03,
"learning_rate": 3.7797416723317476e-06,
"loss": 0.0874,
"step": 41600
},
{
"epoch": 13.07,
"learning_rate": 3.7627464309993206e-06,
"loss": 0.0872,
"step": 41700
},
{
"epoch": 13.1,
"learning_rate": 3.7457511896668937e-06,
"loss": 0.0872,
"step": 41800
},
{
"epoch": 13.13,
"learning_rate": 3.7287559483344667e-06,
"loss": 0.0866,
"step": 41900
},
{
"epoch": 13.16,
"learning_rate": 3.7117607070020393e-06,
"loss": 0.0865,
"step": 42000
},
{
"epoch": 13.19,
"learning_rate": 3.694765465669613e-06,
"loss": 0.0856,
"step": 42100
},
{
"epoch": 13.22,
"learning_rate": 3.677770224337186e-06,
"loss": 0.088,
"step": 42200
},
{
"epoch": 13.25,
"learning_rate": 3.660774983004759e-06,
"loss": 0.0881,
"step": 42300
},
{
"epoch": 13.29,
"learning_rate": 3.643779741672332e-06,
"loss": 0.0889,
"step": 42400
},
{
"epoch": 13.32,
"learning_rate": 3.626784500339905e-06,
"loss": 0.0861,
"step": 42500
},
{
"epoch": 13.35,
"learning_rate": 3.6097892590074783e-06,
"loss": 0.0891,
"step": 42600
},
{
"epoch": 13.38,
"learning_rate": 3.5927940176750513e-06,
"loss": 0.0883,
"step": 42700
},
{
"epoch": 13.41,
"learning_rate": 3.5757987763426243e-06,
"loss": 0.0859,
"step": 42800
},
{
"epoch": 13.44,
"learning_rate": 3.5588035350101974e-06,
"loss": 0.0865,
"step": 42900
},
{
"epoch": 13.47,
"learning_rate": 3.5418082936777704e-06,
"loss": 0.0851,
"step": 43000
},
{
"epoch": 13.5,
"learning_rate": 3.5248130523453434e-06,
"loss": 0.0885,
"step": 43100
},
{
"epoch": 13.54,
"learning_rate": 3.507817811012917e-06,
"loss": 0.0851,
"step": 43200
},
{
"epoch": 13.57,
"learning_rate": 3.49082256968049e-06,
"loss": 0.0862,
"step": 43300
},
{
"epoch": 13.6,
"learning_rate": 3.473827328348063e-06,
"loss": 0.0884,
"step": 43400
},
{
"epoch": 13.63,
"learning_rate": 3.456832087015636e-06,
"loss": 0.0874,
"step": 43500
},
{
"epoch": 13.66,
"learning_rate": 3.439836845683209e-06,
"loss": 0.0868,
"step": 43600
},
{
"epoch": 13.69,
"learning_rate": 3.4228416043507824e-06,
"loss": 0.0856,
"step": 43700
},
{
"epoch": 13.72,
"learning_rate": 3.4058463630183554e-06,
"loss": 0.0877,
"step": 43800
},
{
"epoch": 13.76,
"learning_rate": 3.388851121685928e-06,
"loss": 0.0863,
"step": 43900
},
{
"epoch": 13.79,
"learning_rate": 3.371855880353501e-06,
"loss": 0.0868,
"step": 44000
},
{
"epoch": 13.82,
"learning_rate": 3.354860639021074e-06,
"loss": 0.0871,
"step": 44100
},
{
"epoch": 13.85,
"learning_rate": 3.3378653976886475e-06,
"loss": 0.0845,
"step": 44200
},
{
"epoch": 13.88,
"learning_rate": 3.3208701563562205e-06,
"loss": 0.0874,
"step": 44300
},
{
"epoch": 13.91,
"learning_rate": 3.3038749150237936e-06,
"loss": 0.0906,
"step": 44400
},
{
"epoch": 13.94,
"learning_rate": 3.2868796736913666e-06,
"loss": 0.0863,
"step": 44500
},
{
"epoch": 13.97,
"learning_rate": 3.2698844323589396e-06,
"loss": 0.0868,
"step": 44600
},
{
"epoch": 14.01,
"learning_rate": 3.2528891910265126e-06,
"loss": 0.086,
"step": 44700
},
{
"epoch": 14.04,
"learning_rate": 3.235893949694086e-06,
"loss": 0.0839,
"step": 44800
},
{
"epoch": 14.07,
"learning_rate": 3.218898708361659e-06,
"loss": 0.0837,
"step": 44900
},
{
"epoch": 14.1,
"learning_rate": 3.201903467029232e-06,
"loss": 0.0827,
"step": 45000
},
{
"epoch": 14.1,
"eval_loss": 0.23157520592212677,
"eval_runtime": 66.6414,
"eval_samples_per_second": 134.166,
"eval_steps_per_second": 2.101,
"step": 45000
},
{
"epoch": 14.13,
"learning_rate": 3.184908225696805e-06,
"loss": 0.0829,
"step": 45100
},
{
"epoch": 14.16,
"learning_rate": 3.167912984364378e-06,
"loss": 0.0843,
"step": 45200
},
{
"epoch": 14.19,
"learning_rate": 3.1509177430319516e-06,
"loss": 0.0837,
"step": 45300
},
{
"epoch": 14.23,
"learning_rate": 3.1339225016995247e-06,
"loss": 0.0835,
"step": 45400
},
{
"epoch": 14.26,
"learning_rate": 3.1169272603670973e-06,
"loss": 0.0834,
"step": 45500
},
{
"epoch": 14.29,
"learning_rate": 3.0999320190346703e-06,
"loss": 0.0838,
"step": 45600
},
{
"epoch": 14.32,
"learning_rate": 3.0829367777022433e-06,
"loss": 0.0841,
"step": 45700
},
{
"epoch": 14.35,
"learning_rate": 3.0659415363698168e-06,
"loss": 0.0845,
"step": 45800
},
{
"epoch": 14.38,
"learning_rate": 3.0489462950373898e-06,
"loss": 0.0825,
"step": 45900
},
{
"epoch": 14.41,
"learning_rate": 3.031951053704963e-06,
"loss": 0.0853,
"step": 46000
},
{
"epoch": 14.44,
"learning_rate": 3.014955812372536e-06,
"loss": 0.0845,
"step": 46100
},
{
"epoch": 14.48,
"learning_rate": 2.997960571040109e-06,
"loss": 0.0839,
"step": 46200
},
{
"epoch": 14.51,
"learning_rate": 2.980965329707682e-06,
"loss": 0.0835,
"step": 46300
},
{
"epoch": 14.54,
"learning_rate": 2.9639700883752553e-06,
"loss": 0.084,
"step": 46400
},
{
"epoch": 14.57,
"learning_rate": 2.9469748470428284e-06,
"loss": 0.0848,
"step": 46500
},
{
"epoch": 14.6,
"learning_rate": 2.9299796057104014e-06,
"loss": 0.0831,
"step": 46600
},
{
"epoch": 14.63,
"learning_rate": 2.9129843643779744e-06,
"loss": 0.0811,
"step": 46700
},
{
"epoch": 14.66,
"learning_rate": 2.8959891230455474e-06,
"loss": 0.0843,
"step": 46800
},
{
"epoch": 14.7,
"learning_rate": 2.878993881713121e-06,
"loss": 0.0821,
"step": 46900
},
{
"epoch": 14.73,
"learning_rate": 2.861998640380694e-06,
"loss": 0.0827,
"step": 47000
},
{
"epoch": 14.76,
"learning_rate": 2.845003399048267e-06,
"loss": 0.0848,
"step": 47100
},
{
"epoch": 14.79,
"learning_rate": 2.8280081577158395e-06,
"loss": 0.0833,
"step": 47200
},
{
"epoch": 14.82,
"learning_rate": 2.8110129163834125e-06,
"loss": 0.0835,
"step": 47300
},
{
"epoch": 14.85,
"learning_rate": 2.794017675050986e-06,
"loss": 0.0823,
"step": 47400
},
{
"epoch": 14.88,
"learning_rate": 2.777022433718559e-06,
"loss": 0.0831,
"step": 47500
},
{
"epoch": 14.91,
"learning_rate": 2.760027192386132e-06,
"loss": 0.0848,
"step": 47600
},
{
"epoch": 14.95,
"learning_rate": 2.743031951053705e-06,
"loss": 0.0817,
"step": 47700
},
{
"epoch": 14.98,
"learning_rate": 2.726036709721278e-06,
"loss": 0.0838,
"step": 47800
},
{
"epoch": 15.01,
"learning_rate": 2.709041468388851e-06,
"loss": 0.0828,
"step": 47900
},
{
"epoch": 15.04,
"learning_rate": 2.6920462270564246e-06,
"loss": 0.0803,
"step": 48000
},
{
"epoch": 15.07,
"learning_rate": 2.6750509857239976e-06,
"loss": 0.0799,
"step": 48100
},
{
"epoch": 15.1,
"learning_rate": 2.6580557443915706e-06,
"loss": 0.0805,
"step": 48200
},
{
"epoch": 15.13,
"learning_rate": 2.6410605030591436e-06,
"loss": 0.0806,
"step": 48300
},
{
"epoch": 15.17,
"learning_rate": 2.6240652617267167e-06,
"loss": 0.0825,
"step": 48400
},
{
"epoch": 15.2,
"learning_rate": 2.60707002039429e-06,
"loss": 0.0801,
"step": 48500
},
{
"epoch": 15.23,
"learning_rate": 2.590074779061863e-06,
"loss": 0.081,
"step": 48600
},
{
"epoch": 15.26,
"learning_rate": 2.573079537729436e-06,
"loss": 0.0827,
"step": 48700
},
{
"epoch": 15.29,
"learning_rate": 2.5560842963970088e-06,
"loss": 0.0808,
"step": 48800
},
{
"epoch": 15.32,
"learning_rate": 2.5390890550645818e-06,
"loss": 0.0817,
"step": 48900
},
{
"epoch": 15.35,
"learning_rate": 2.5220938137321557e-06,
"loss": 0.0797,
"step": 49000
},
{
"epoch": 15.38,
"learning_rate": 2.5050985723997283e-06,
"loss": 0.079,
"step": 49100
},
{
"epoch": 15.42,
"learning_rate": 2.4881033310673013e-06,
"loss": 0.0811,
"step": 49200
},
{
"epoch": 15.45,
"learning_rate": 2.4711080897348743e-06,
"loss": 0.0794,
"step": 49300
},
{
"epoch": 15.48,
"learning_rate": 2.4541128484024478e-06,
"loss": 0.0812,
"step": 49400
},
{
"epoch": 15.51,
"learning_rate": 2.4371176070700208e-06,
"loss": 0.0822,
"step": 49500
},
{
"epoch": 15.54,
"learning_rate": 2.4201223657375934e-06,
"loss": 0.0815,
"step": 49600
},
{
"epoch": 15.57,
"learning_rate": 2.403127124405167e-06,
"loss": 0.079,
"step": 49700
},
{
"epoch": 15.6,
"learning_rate": 2.38613188307274e-06,
"loss": 0.0793,
"step": 49800
},
{
"epoch": 15.64,
"learning_rate": 2.369136641740313e-06,
"loss": 0.0798,
"step": 49900
},
{
"epoch": 15.67,
"learning_rate": 2.352141400407886e-06,
"loss": 0.0789,
"step": 50000
},
{
"epoch": 15.67,
"eval_loss": 0.2277790755033493,
"eval_runtime": 81.7922,
"eval_samples_per_second": 109.314,
"eval_steps_per_second": 1.712,
"step": 50000
},
{
"epoch": 15.7,
"learning_rate": 2.335146159075459e-06,
"loss": 0.0812,
"step": 50100
},
{
"epoch": 15.73,
"learning_rate": 2.3181509177430324e-06,
"loss": 0.0795,
"step": 50200
},
{
"epoch": 15.76,
"learning_rate": 2.3011556764106054e-06,
"loss": 0.0784,
"step": 50300
},
{
"epoch": 15.79,
"learning_rate": 2.284160435078178e-06,
"loss": 0.0826,
"step": 50400
},
{
"epoch": 15.82,
"learning_rate": 2.2671651937457514e-06,
"loss": 0.0813,
"step": 50500
},
{
"epoch": 15.85,
"learning_rate": 2.2501699524133245e-06,
"loss": 0.0793,
"step": 50600
},
{
"epoch": 15.89,
"learning_rate": 2.2331747110808975e-06,
"loss": 0.0803,
"step": 50700
},
{
"epoch": 15.92,
"learning_rate": 2.2161794697484705e-06,
"loss": 0.0792,
"step": 50800
},
{
"epoch": 15.95,
"learning_rate": 2.1991842284160435e-06,
"loss": 0.0808,
"step": 50900
},
{
"epoch": 15.98,
"learning_rate": 2.182188987083617e-06,
"loss": 0.0809,
"step": 51000
},
{
"epoch": 16.01,
"learning_rate": 2.16519374575119e-06,
"loss": 0.0786,
"step": 51100
},
{
"epoch": 16.04,
"learning_rate": 2.1481985044187626e-06,
"loss": 0.0793,
"step": 51200
},
{
"epoch": 16.07,
"learning_rate": 2.131203263086336e-06,
"loss": 0.0783,
"step": 51300
},
{
"epoch": 16.11,
"learning_rate": 2.114208021753909e-06,
"loss": 0.078,
"step": 51400
},
{
"epoch": 16.14,
"learning_rate": 2.097212780421482e-06,
"loss": 0.0783,
"step": 51500
},
{
"epoch": 16.17,
"learning_rate": 2.080217539089055e-06,
"loss": 0.0789,
"step": 51600
},
{
"epoch": 16.2,
"learning_rate": 2.063222297756628e-06,
"loss": 0.077,
"step": 51700
},
{
"epoch": 16.23,
"learning_rate": 2.0462270564242016e-06,
"loss": 0.0768,
"step": 51800
},
{
"epoch": 16.26,
"learning_rate": 2.0292318150917746e-06,
"loss": 0.0786,
"step": 51900
},
{
"epoch": 16.29,
"learning_rate": 2.0122365737593477e-06,
"loss": 0.0774,
"step": 52000
},
{
"epoch": 16.32,
"learning_rate": 1.9952413324269207e-06,
"loss": 0.0763,
"step": 52100
},
{
"epoch": 16.36,
"learning_rate": 1.9782460910944937e-06,
"loss": 0.0793,
"step": 52200
},
{
"epoch": 16.39,
"learning_rate": 1.9612508497620667e-06,
"loss": 0.0785,
"step": 52300
},
{
"epoch": 16.42,
"learning_rate": 1.9442556084296398e-06,
"loss": 0.0773,
"step": 52400
},
{
"epoch": 16.45,
"learning_rate": 1.9272603670972128e-06,
"loss": 0.0759,
"step": 52500
},
{
"epoch": 16.48,
"learning_rate": 1.9102651257647862e-06,
"loss": 0.0778,
"step": 52600
},
{
"epoch": 16.51,
"learning_rate": 1.893269884432359e-06,
"loss": 0.0795,
"step": 52700
},
{
"epoch": 16.54,
"learning_rate": 1.876274643099932e-06,
"loss": 0.079,
"step": 52800
},
{
"epoch": 16.58,
"learning_rate": 1.8592794017675053e-06,
"loss": 0.0771,
"step": 52900
},
{
"epoch": 16.61,
"learning_rate": 1.8422841604350783e-06,
"loss": 0.0789,
"step": 53000
},
{
"epoch": 16.64,
"learning_rate": 1.8252889191026513e-06,
"loss": 0.0785,
"step": 53100
},
{
"epoch": 16.67,
"learning_rate": 1.8082936777702246e-06,
"loss": 0.0773,
"step": 53200
},
{
"epoch": 16.7,
"learning_rate": 1.7912984364377974e-06,
"loss": 0.0763,
"step": 53300
},
{
"epoch": 16.73,
"learning_rate": 1.7743031951053708e-06,
"loss": 0.0793,
"step": 53400
},
{
"epoch": 16.76,
"learning_rate": 1.7573079537729437e-06,
"loss": 0.0771,
"step": 53500
},
{
"epoch": 16.79,
"learning_rate": 1.7403127124405167e-06,
"loss": 0.0779,
"step": 53600
},
{
"epoch": 16.83,
"learning_rate": 1.72331747110809e-06,
"loss": 0.0765,
"step": 53700
},
{
"epoch": 16.86,
"learning_rate": 1.706322229775663e-06,
"loss": 0.077,
"step": 53800
},
{
"epoch": 16.89,
"learning_rate": 1.689326988443236e-06,
"loss": 0.0774,
"step": 53900
},
{
"epoch": 16.92,
"learning_rate": 1.6723317471108092e-06,
"loss": 0.0771,
"step": 54000
},
{
"epoch": 16.95,
"learning_rate": 1.6553365057783822e-06,
"loss": 0.0783,
"step": 54100
},
{
"epoch": 16.98,
"learning_rate": 1.6383412644459555e-06,
"loss": 0.0785,
"step": 54200
},
{
"epoch": 17.01,
"learning_rate": 1.6213460231135283e-06,
"loss": 0.0771,
"step": 54300
},
{
"epoch": 17.05,
"learning_rate": 1.6043507817811013e-06,
"loss": 0.0776,
"step": 54400
},
{
"epoch": 17.08,
"learning_rate": 1.5873555404486745e-06,
"loss": 0.0753,
"step": 54500
},
{
"epoch": 17.11,
"learning_rate": 1.5703602991162476e-06,
"loss": 0.0758,
"step": 54600
},
{
"epoch": 17.14,
"learning_rate": 1.5533650577838208e-06,
"loss": 0.077,
"step": 54700
},
{
"epoch": 17.17,
"learning_rate": 1.5363698164513938e-06,
"loss": 0.0749,
"step": 54800
},
{
"epoch": 17.2,
"learning_rate": 1.5193745751189668e-06,
"loss": 0.076,
"step": 54900
},
{
"epoch": 17.23,
"learning_rate": 1.50237933378654e-06,
"loss": 0.0741,
"step": 55000
},
{
"epoch": 17.23,
"eval_loss": 0.224798783659935,
"eval_runtime": 80.315,
"eval_samples_per_second": 111.324,
"eval_steps_per_second": 1.743,
"step": 55000
},
{
"epoch": 17.26,
"learning_rate": 1.4853840924541129e-06,
"loss": 0.0763,
"step": 55100
},
{
"epoch": 17.3,
"learning_rate": 1.468388851121686e-06,
"loss": 0.0771,
"step": 55200
},
{
"epoch": 17.33,
"learning_rate": 1.4513936097892592e-06,
"loss": 0.0768,
"step": 55300
},
{
"epoch": 17.36,
"learning_rate": 1.4343983684568322e-06,
"loss": 0.0775,
"step": 55400
},
{
"epoch": 17.39,
"learning_rate": 1.4174031271244054e-06,
"loss": 0.075,
"step": 55500
},
{
"epoch": 17.42,
"learning_rate": 1.4004078857919784e-06,
"loss": 0.0734,
"step": 55600
},
{
"epoch": 17.45,
"learning_rate": 1.3834126444595515e-06,
"loss": 0.0779,
"step": 55700
},
{
"epoch": 17.48,
"learning_rate": 1.3664174031271247e-06,
"loss": 0.0746,
"step": 55800
},
{
"epoch": 17.52,
"learning_rate": 1.3494221617946975e-06,
"loss": 0.075,
"step": 55900
},
{
"epoch": 17.55,
"learning_rate": 1.3324269204622705e-06,
"loss": 0.0751,
"step": 56000
},
{
"epoch": 17.58,
"learning_rate": 1.3154316791298438e-06,
"loss": 0.0746,
"step": 56100
},
{
"epoch": 17.61,
"learning_rate": 1.2984364377974168e-06,
"loss": 0.0747,
"step": 56200
},
{
"epoch": 17.64,
"learning_rate": 1.28144119646499e-06,
"loss": 0.077,
"step": 56300
},
{
"epoch": 17.67,
"learning_rate": 1.264445955132563e-06,
"loss": 0.0731,
"step": 56400
},
{
"epoch": 17.7,
"learning_rate": 1.247450713800136e-06,
"loss": 0.077,
"step": 56500
},
{
"epoch": 17.73,
"learning_rate": 1.230455472467709e-06,
"loss": 0.0762,
"step": 56600
},
{
"epoch": 17.77,
"learning_rate": 1.2134602311352821e-06,
"loss": 0.0764,
"step": 56700
},
{
"epoch": 17.8,
"learning_rate": 1.1964649898028554e-06,
"loss": 0.0742,
"step": 56800
},
{
"epoch": 17.83,
"learning_rate": 1.1794697484704284e-06,
"loss": 0.0742,
"step": 56900
},
{
"epoch": 17.86,
"learning_rate": 1.1624745071380014e-06,
"loss": 0.0755,
"step": 57000
},
{
"epoch": 17.89,
"learning_rate": 1.1454792658055744e-06,
"loss": 0.0759,
"step": 57100
},
{
"epoch": 17.92,
"learning_rate": 1.1284840244731477e-06,
"loss": 0.075,
"step": 57200
},
{
"epoch": 17.95,
"learning_rate": 1.1114887831407207e-06,
"loss": 0.0762,
"step": 57300
},
{
"epoch": 17.99,
"learning_rate": 1.0944935418082937e-06,
"loss": 0.0741,
"step": 57400
},
{
"epoch": 18.02,
"learning_rate": 1.0774983004758667e-06,
"loss": 0.0751,
"step": 57500
},
{
"epoch": 18.05,
"learning_rate": 1.06050305914344e-06,
"loss": 0.0735,
"step": 57600
},
{
"epoch": 18.08,
"learning_rate": 1.043507817811013e-06,
"loss": 0.074,
"step": 57700
},
{
"epoch": 18.11,
"learning_rate": 1.026512576478586e-06,
"loss": 0.0745,
"step": 57800
},
{
"epoch": 18.14,
"learning_rate": 1.009517335146159e-06,
"loss": 0.0745,
"step": 57900
},
{
"epoch": 18.17,
"learning_rate": 9.925220938137323e-07,
"loss": 0.0752,
"step": 58000
},
{
"epoch": 18.2,
"learning_rate": 9.755268524813053e-07,
"loss": 0.0746,
"step": 58100
},
{
"epoch": 18.24,
"learning_rate": 9.585316111488783e-07,
"loss": 0.0755,
"step": 58200
},
{
"epoch": 18.27,
"learning_rate": 9.415363698164515e-07,
"loss": 0.0757,
"step": 58300
},
{
"epoch": 18.3,
"learning_rate": 9.245411284840246e-07,
"loss": 0.0731,
"step": 58400
},
{
"epoch": 18.33,
"learning_rate": 9.075458871515977e-07,
"loss": 0.075,
"step": 58500
},
{
"epoch": 18.36,
"learning_rate": 8.905506458191706e-07,
"loss": 0.0733,
"step": 58600
},
{
"epoch": 18.39,
"learning_rate": 8.735554044867438e-07,
"loss": 0.0751,
"step": 58700
},
{
"epoch": 18.42,
"learning_rate": 8.565601631543169e-07,
"loss": 0.0731,
"step": 58800
},
{
"epoch": 18.46,
"learning_rate": 8.3956492182189e-07,
"loss": 0.0743,
"step": 58900
},
{
"epoch": 18.49,
"learning_rate": 8.22569680489463e-07,
"loss": 0.0751,
"step": 59000
},
{
"epoch": 18.52,
"learning_rate": 8.055744391570361e-07,
"loss": 0.0729,
"step": 59100
},
{
"epoch": 18.55,
"learning_rate": 7.885791978246092e-07,
"loss": 0.0745,
"step": 59200
},
{
"epoch": 18.58,
"learning_rate": 7.715839564921823e-07,
"loss": 0.0753,
"step": 59300
},
{
"epoch": 18.61,
"learning_rate": 7.545887151597553e-07,
"loss": 0.074,
"step": 59400
},
{
"epoch": 18.64,
"learning_rate": 7.375934738273284e-07,
"loss": 0.0741,
"step": 59500
},
{
"epoch": 18.67,
"learning_rate": 7.205982324949015e-07,
"loss": 0.0727,
"step": 59600
},
{
"epoch": 18.71,
"learning_rate": 7.036029911624747e-07,
"loss": 0.0727,
"step": 59700
},
{
"epoch": 18.74,
"learning_rate": 6.866077498300476e-07,
"loss": 0.0754,
"step": 59800
},
{
"epoch": 18.77,
"learning_rate": 6.696125084976207e-07,
"loss": 0.0738,
"step": 59900
},
{
"epoch": 18.8,
"learning_rate": 6.526172671651938e-07,
"loss": 0.0714,
"step": 60000
},
{
"epoch": 18.8,
"eval_loss": 0.22282175719738007,
"eval_runtime": 115.3459,
"eval_samples_per_second": 77.515,
"eval_steps_per_second": 1.214,
"step": 60000
},
{
"epoch": 18.83,
"learning_rate": 6.35622025832767e-07,
"loss": 0.0727,
"step": 60100
},
{
"epoch": 18.86,
"learning_rate": 6.1862678450034e-07,
"loss": 0.0739,
"step": 60200
},
{
"epoch": 18.89,
"learning_rate": 6.01631543167913e-07,
"loss": 0.0729,
"step": 60300
},
{
"epoch": 18.93,
"learning_rate": 5.846363018354861e-07,
"loss": 0.0729,
"step": 60400
},
{
"epoch": 18.96,
"learning_rate": 5.676410605030592e-07,
"loss": 0.0731,
"step": 60500
},
{
"epoch": 18.99,
"learning_rate": 5.506458191706323e-07,
"loss": 0.0725,
"step": 60600
},
{
"epoch": 19.02,
"learning_rate": 5.336505778382053e-07,
"loss": 0.0727,
"step": 60700
},
{
"epoch": 19.05,
"learning_rate": 5.166553365057785e-07,
"loss": 0.0733,
"step": 60800
},
{
"epoch": 19.08,
"learning_rate": 4.996600951733515e-07,
"loss": 0.073,
"step": 60900
},
{
"epoch": 19.11,
"learning_rate": 4.826648538409246e-07,
"loss": 0.0721,
"step": 61000
},
{
"epoch": 19.14,
"learning_rate": 4.6566961250849763e-07,
"loss": 0.0731,
"step": 61100
},
{
"epoch": 19.18,
"learning_rate": 4.4867437117607076e-07,
"loss": 0.0749,
"step": 61200
},
{
"epoch": 19.21,
"learning_rate": 4.316791298436438e-07,
"loss": 0.0719,
"step": 61300
},
{
"epoch": 19.24,
"learning_rate": 4.146838885112169e-07,
"loss": 0.0706,
"step": 61400
},
{
"epoch": 19.27,
"learning_rate": 3.9768864717879e-07,
"loss": 0.0753,
"step": 61500
},
{
"epoch": 19.3,
"learning_rate": 3.8069340584636307e-07,
"loss": 0.0719,
"step": 61600
},
{
"epoch": 19.33,
"learning_rate": 3.6369816451393615e-07,
"loss": 0.0719,
"step": 61700
},
{
"epoch": 19.36,
"learning_rate": 3.467029231815092e-07,
"loss": 0.073,
"step": 61800
},
{
"epoch": 19.4,
"learning_rate": 3.297076818490823e-07,
"loss": 0.074,
"step": 61900
},
{
"epoch": 19.43,
"learning_rate": 3.127124405166554e-07,
"loss": 0.0718,
"step": 62000
},
{
"epoch": 19.46,
"learning_rate": 2.9571719918422846e-07,
"loss": 0.0728,
"step": 62100
},
{
"epoch": 19.49,
"learning_rate": 2.7872195785180153e-07,
"loss": 0.0728,
"step": 62200
},
{
"epoch": 19.52,
"learning_rate": 2.617267165193746e-07,
"loss": 0.0712,
"step": 62300
},
{
"epoch": 19.55,
"learning_rate": 2.447314751869477e-07,
"loss": 0.0747,
"step": 62400
},
{
"epoch": 19.58,
"learning_rate": 2.2773623385452074e-07,
"loss": 0.072,
"step": 62500
},
{
"epoch": 19.61,
"learning_rate": 2.1074099252209382e-07,
"loss": 0.0714,
"step": 62600
},
{
"epoch": 19.65,
"learning_rate": 1.937457511896669e-07,
"loss": 0.0717,
"step": 62700
},
{
"epoch": 19.68,
"learning_rate": 1.7675050985723997e-07,
"loss": 0.072,
"step": 62800
},
{
"epoch": 19.71,
"learning_rate": 1.5975526852481305e-07,
"loss": 0.0734,
"step": 62900
},
{
"epoch": 19.74,
"learning_rate": 1.4276002719238612e-07,
"loss": 0.0734,
"step": 63000
},
{
"epoch": 19.77,
"learning_rate": 1.2576478585995923e-07,
"loss": 0.072,
"step": 63100
},
{
"epoch": 19.8,
"learning_rate": 1.0876954452753229e-07,
"loss": 0.0731,
"step": 63200
},
{
"epoch": 19.83,
"learning_rate": 9.177430319510537e-08,
"loss": 0.0714,
"step": 63300
},
{
"epoch": 19.87,
"learning_rate": 7.477906186267846e-08,
"loss": 0.0725,
"step": 63400
},
{
"epoch": 19.9,
"learning_rate": 5.778382053025154e-08,
"loss": 0.0724,
"step": 63500
},
{
"epoch": 19.93,
"learning_rate": 4.0788579197824615e-08,
"loss": 0.0717,
"step": 63600
},
{
"epoch": 19.96,
"learning_rate": 2.3793337865397692e-08,
"loss": 0.0709,
"step": 63700
},
{
"epoch": 19.99,
"learning_rate": 6.798096532970768e-09,
"loss": 0.0726,
"step": 63800
},
{
"epoch": 20.0,
"step": 63840,
"total_flos": 1.325011889553408e+20,
"train_loss": 0.13975257427247245,
"train_runtime": 211299.4316,
"train_samples_per_second": 77.345,
"train_steps_per_second": 0.302
}
],
"max_steps": 63840,
"num_train_epochs": 21,
"total_flos": 1.325011889553408e+20,
"trial_name": null,
"trial_params": null
}