b-atmaja
wav2vec2.0 model
9c3cfb1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 21600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 9.953703703703704e-05,
"loss": 1.1352,
"step": 100
},
{
"epoch": 0.02,
"eval_accuracy": 0.7482928037643433,
"eval_loss": 0.6091228127479553,
"eval_runtime": 1960.8351,
"eval_samples_per_second": 17.625,
"eval_steps_per_second": 4.406,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 9.907407407407407e-05,
"loss": 0.5497,
"step": 200
},
{
"epoch": 0.05,
"eval_accuracy": 0.9043981432914734,
"eval_loss": 0.2794453501701355,
"eval_runtime": 2070.7116,
"eval_samples_per_second": 16.69,
"eval_steps_per_second": 4.172,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 9.861111111111112e-05,
"loss": 0.4001,
"step": 300
},
{
"epoch": 0.07,
"eval_accuracy": 0.9633391499519348,
"eval_loss": 0.1039256900548935,
"eval_runtime": 2051.9615,
"eval_samples_per_second": 16.842,
"eval_steps_per_second": 4.211,
"step": 300
},
{
"epoch": 0.09,
"learning_rate": 9.814814814814815e-05,
"loss": 0.2967,
"step": 400
},
{
"epoch": 0.09,
"eval_accuracy": 0.9760127067565918,
"eval_loss": 0.0726834237575531,
"eval_runtime": 1976.3465,
"eval_samples_per_second": 17.487,
"eval_steps_per_second": 4.372,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 9.768518518518519e-05,
"loss": 0.2572,
"step": 500
},
{
"epoch": 0.12,
"eval_accuracy": 0.9752025604248047,
"eval_loss": 0.07836401462554932,
"eval_runtime": 1953.8972,
"eval_samples_per_second": 17.688,
"eval_steps_per_second": 4.422,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 9.722222222222223e-05,
"loss": 0.1858,
"step": 600
},
{
"epoch": 0.14,
"eval_accuracy": 0.987442135810852,
"eval_loss": 0.03908771649003029,
"eval_runtime": 1933.1285,
"eval_samples_per_second": 17.878,
"eval_steps_per_second": 4.469,
"step": 600
},
{
"epoch": 0.16,
"learning_rate": 9.675925925925926e-05,
"loss": 0.1776,
"step": 700
},
{
"epoch": 0.16,
"eval_accuracy": 0.9870080947875977,
"eval_loss": 0.046012409031391144,
"eval_runtime": 2073.2574,
"eval_samples_per_second": 16.669,
"eval_steps_per_second": 4.167,
"step": 700
},
{
"epoch": 0.19,
"learning_rate": 9.62962962962963e-05,
"loss": 0.1253,
"step": 800
},
{
"epoch": 0.19,
"eval_accuracy": 0.987442135810852,
"eval_loss": 0.04302794486284256,
"eval_runtime": 2100.3436,
"eval_samples_per_second": 16.454,
"eval_steps_per_second": 4.114,
"step": 800
},
{
"epoch": 0.21,
"learning_rate": 9.583333333333334e-05,
"loss": 0.1509,
"step": 900
},
{
"epoch": 0.21,
"eval_accuracy": 0.9821469783782959,
"eval_loss": 0.06500900536775589,
"eval_runtime": 2096.1889,
"eval_samples_per_second": 16.487,
"eval_steps_per_second": 4.122,
"step": 900
},
{
"epoch": 0.23,
"learning_rate": 9.537037037037038e-05,
"loss": 0.1574,
"step": 1000
},
{
"epoch": 0.23,
"eval_accuracy": 0.9847221970558167,
"eval_loss": 0.059933874756097794,
"eval_runtime": 2109.7765,
"eval_samples_per_second": 16.381,
"eval_steps_per_second": 4.095,
"step": 1000
},
{
"epoch": 0.25,
"learning_rate": 9.490740740740742e-05,
"loss": 0.1506,
"step": 1100
},
{
"epoch": 0.25,
"eval_accuracy": 0.9896122813224792,
"eval_loss": 0.034695032984018326,
"eval_runtime": 2117.6815,
"eval_samples_per_second": 16.32,
"eval_steps_per_second": 4.08,
"step": 1100
},
{
"epoch": 0.28,
"learning_rate": 9.444444444444444e-05,
"loss": 0.118,
"step": 1200
},
{
"epoch": 0.28,
"eval_accuracy": 0.9911168813705444,
"eval_loss": 0.03316599503159523,
"eval_runtime": 2107.0764,
"eval_samples_per_second": 16.402,
"eval_steps_per_second": 4.1,
"step": 1200
},
{
"epoch": 0.3,
"learning_rate": 9.398148148148148e-05,
"loss": 0.0885,
"step": 1300
},
{
"epoch": 0.3,
"eval_accuracy": 0.9947627186775208,
"eval_loss": 0.019724205136299133,
"eval_runtime": 2100.2147,
"eval_samples_per_second": 16.455,
"eval_steps_per_second": 4.114,
"step": 1300
},
{
"epoch": 0.32,
"learning_rate": 9.351851851851852e-05,
"loss": 0.0967,
"step": 1400
},
{
"epoch": 0.32,
"eval_accuracy": 0.9936053156852722,
"eval_loss": 0.022701723501086235,
"eval_runtime": 2110.6117,
"eval_samples_per_second": 16.374,
"eval_steps_per_second": 4.094,
"step": 1400
},
{
"epoch": 0.35,
"learning_rate": 9.305555555555556e-05,
"loss": 0.0882,
"step": 1500
},
{
"epoch": 0.35,
"eval_accuracy": 0.992274284362793,
"eval_loss": 0.02855427749454975,
"eval_runtime": 2115.7419,
"eval_samples_per_second": 16.335,
"eval_steps_per_second": 4.084,
"step": 1500
},
{
"epoch": 0.37,
"learning_rate": 9.25925925925926e-05,
"loss": 0.1056,
"step": 1600
},
{
"epoch": 0.37,
"eval_accuracy": 0.9962384104728699,
"eval_loss": 0.015638431534171104,
"eval_runtime": 2093.1202,
"eval_samples_per_second": 16.511,
"eval_steps_per_second": 4.128,
"step": 1600
},
{
"epoch": 0.39,
"learning_rate": 9.212962962962963e-05,
"loss": 0.1124,
"step": 1700
},
{
"epoch": 0.39,
"eval_accuracy": 0.9942708611488342,
"eval_loss": 0.023519381880760193,
"eval_runtime": 2715.3988,
"eval_samples_per_second": 12.727,
"eval_steps_per_second": 3.182,
"step": 1700
},
{
"epoch": 0.42,
"learning_rate": 9.166666666666667e-05,
"loss": 0.0813,
"step": 1800
},
{
"epoch": 0.42,
"eval_accuracy": 0.995341420173645,
"eval_loss": 0.017750833183526993,
"eval_runtime": 2099.7025,
"eval_samples_per_second": 16.459,
"eval_steps_per_second": 4.115,
"step": 1800
},
{
"epoch": 0.44,
"learning_rate": 9.120370370370371e-05,
"loss": 0.0609,
"step": 1900
},
{
"epoch": 0.44,
"eval_accuracy": 0.9971932768821716,
"eval_loss": 0.011351389810442924,
"eval_runtime": 2142.3716,
"eval_samples_per_second": 16.132,
"eval_steps_per_second": 4.033,
"step": 1900
},
{
"epoch": 0.46,
"learning_rate": 9.074074074074075e-05,
"loss": 0.0891,
"step": 2000
},
{
"epoch": 0.46,
"eval_accuracy": 0.9973379373550415,
"eval_loss": 0.012310467660427094,
"eval_runtime": 2095.1245,
"eval_samples_per_second": 16.495,
"eval_steps_per_second": 4.124,
"step": 2000
},
{
"epoch": 0.49,
"learning_rate": 9.027777777777779e-05,
"loss": 0.0424,
"step": 2100
},
{
"epoch": 0.49,
"eval_accuracy": 0.9985821843147278,
"eval_loss": 0.00660862447693944,
"eval_runtime": 2101.8862,
"eval_samples_per_second": 16.442,
"eval_steps_per_second": 4.111,
"step": 2100
},
{
"epoch": 0.51,
"learning_rate": 8.981481481481481e-05,
"loss": 0.0546,
"step": 2200
},
{
"epoch": 0.51,
"eval_accuracy": 0.9950520992279053,
"eval_loss": 0.021980540826916695,
"eval_runtime": 2121.7281,
"eval_samples_per_second": 16.289,
"eval_steps_per_second": 4.072,
"step": 2200
},
{
"epoch": 0.53,
"learning_rate": 8.935185185185185e-05,
"loss": 0.146,
"step": 2300
},
{
"epoch": 0.53,
"eval_accuracy": 0.9940393567085266,
"eval_loss": 0.02473669871687889,
"eval_runtime": 2062.8449,
"eval_samples_per_second": 16.754,
"eval_steps_per_second": 4.188,
"step": 2300
},
{
"epoch": 0.56,
"learning_rate": 8.888888888888889e-05,
"loss": 0.1174,
"step": 2400
},
{
"epoch": 0.56,
"eval_accuracy": 0.9958622455596924,
"eval_loss": 0.01570066250860691,
"eval_runtime": 2017.5974,
"eval_samples_per_second": 17.129,
"eval_steps_per_second": 4.282,
"step": 2400
},
{
"epoch": 0.58,
"learning_rate": 8.842592592592593e-05,
"loss": 0.0848,
"step": 2500
},
{
"epoch": 0.58,
"eval_accuracy": 0.9978298544883728,
"eval_loss": 0.008064490742981434,
"eval_runtime": 2005.1771,
"eval_samples_per_second": 17.235,
"eval_steps_per_second": 4.309,
"step": 2500
},
{
"epoch": 0.6,
"learning_rate": 8.796296296296297e-05,
"loss": 0.0792,
"step": 2600
},
{
"epoch": 0.6,
"eval_accuracy": 0.9986110925674438,
"eval_loss": 0.004222337622195482,
"eval_runtime": 1999.0902,
"eval_samples_per_second": 17.288,
"eval_steps_per_second": 4.322,
"step": 2600
},
{
"epoch": 0.62,
"learning_rate": 8.75e-05,
"loss": 0.0482,
"step": 2700
},
{
"epoch": 0.62,
"eval_accuracy": 0.9971354007720947,
"eval_loss": 0.01219157688319683,
"eval_runtime": 2001.1288,
"eval_samples_per_second": 17.27,
"eval_steps_per_second": 4.318,
"step": 2700
},
{
"epoch": 0.65,
"learning_rate": 8.703703703703704e-05,
"loss": 0.0697,
"step": 2800
},
{
"epoch": 0.65,
"eval_accuracy": 0.9931133985519409,
"eval_loss": 0.027987554669380188,
"eval_runtime": 1997.1851,
"eval_samples_per_second": 17.304,
"eval_steps_per_second": 4.326,
"step": 2800
},
{
"epoch": 0.67,
"learning_rate": 8.657407407407408e-05,
"loss": 0.106,
"step": 2900
},
{
"epoch": 0.67,
"eval_accuracy": 0.9977430701255798,
"eval_loss": 0.008220946416258812,
"eval_runtime": 2609.175,
"eval_samples_per_second": 13.246,
"eval_steps_per_second": 3.311,
"step": 2900
},
{
"epoch": 0.69,
"learning_rate": 8.611111111111112e-05,
"loss": 0.052,
"step": 3000
},
{
"epoch": 0.69,
"eval_accuracy": 0.9971932768821716,
"eval_loss": 0.01051583793014288,
"eval_runtime": 1971.9035,
"eval_samples_per_second": 17.526,
"eval_steps_per_second": 4.382,
"step": 3000
},
{
"epoch": 0.72,
"learning_rate": 8.564814814814816e-05,
"loss": 0.047,
"step": 3100
},
{
"epoch": 0.72,
"eval_accuracy": 0.9978588223457336,
"eval_loss": 0.009094738401472569,
"eval_runtime": 1980.5023,
"eval_samples_per_second": 17.45,
"eval_steps_per_second": 4.363,
"step": 3100
},
{
"epoch": 0.74,
"learning_rate": 8.518518518518518e-05,
"loss": 0.0495,
"step": 3200
},
{
"epoch": 0.74,
"eval_accuracy": 0.998466432094574,
"eval_loss": 0.006100042257457972,
"eval_runtime": 2070.7502,
"eval_samples_per_second": 16.69,
"eval_steps_per_second": 4.172,
"step": 3200
},
{
"epoch": 0.76,
"learning_rate": 8.472222222222222e-05,
"loss": 0.0979,
"step": 3300
},
{
"epoch": 0.76,
"eval_accuracy": 0.9978588223457336,
"eval_loss": 0.009109850972890854,
"eval_runtime": 1970.0999,
"eval_samples_per_second": 17.542,
"eval_steps_per_second": 4.386,
"step": 3300
},
{
"epoch": 0.79,
"learning_rate": 8.425925925925926e-05,
"loss": 0.0381,
"step": 3400
},
{
"epoch": 0.79,
"eval_accuracy": 0.9951099753379822,
"eval_loss": 0.021163903176784515,
"eval_runtime": 1977.6331,
"eval_samples_per_second": 17.475,
"eval_steps_per_second": 4.369,
"step": 3400
},
{
"epoch": 0.81,
"learning_rate": 8.379629629629629e-05,
"loss": 0.0268,
"step": 3500
},
{
"epoch": 0.81,
"eval_accuracy": 0.9980034828186035,
"eval_loss": 0.008532223291695118,
"eval_runtime": 1971.1529,
"eval_samples_per_second": 17.533,
"eval_steps_per_second": 4.383,
"step": 3500
},
{
"epoch": 0.83,
"learning_rate": 8.333333333333334e-05,
"loss": 0.073,
"step": 3600
},
{
"epoch": 0.83,
"eval_accuracy": 0.9961516261100769,
"eval_loss": 0.017610933631658554,
"eval_runtime": 1957.2329,
"eval_samples_per_second": 17.658,
"eval_steps_per_second": 4.414,
"step": 3600
},
{
"epoch": 0.86,
"learning_rate": 8.287037037037037e-05,
"loss": 0.0585,
"step": 3700
},
{
"epoch": 0.86,
"eval_accuracy": 0.9971354007720947,
"eval_loss": 0.011580849066376686,
"eval_runtime": 1962.4064,
"eval_samples_per_second": 17.611,
"eval_steps_per_second": 4.403,
"step": 3700
},
{
"epoch": 0.88,
"learning_rate": 8.240740740740741e-05,
"loss": 0.0868,
"step": 3800
},
{
"epoch": 0.88,
"eval_accuracy": 0.9994502067565918,
"eval_loss": 0.00212017516605556,
"eval_runtime": 1982.4259,
"eval_samples_per_second": 17.433,
"eval_steps_per_second": 4.358,
"step": 3800
},
{
"epoch": 0.9,
"learning_rate": 8.194444444444445e-05,
"loss": 0.0496,
"step": 3900
},
{
"epoch": 0.9,
"eval_accuracy": 0.9978877305984497,
"eval_loss": 0.008284298703074455,
"eval_runtime": 1983.4898,
"eval_samples_per_second": 17.424,
"eval_steps_per_second": 4.356,
"step": 3900
},
{
"epoch": 0.93,
"learning_rate": 8.148148148148148e-05,
"loss": 0.0641,
"step": 4000
},
{
"epoch": 0.93,
"eval_accuracy": 0.9967592358589172,
"eval_loss": 0.013520145788788795,
"eval_runtime": 1998.2946,
"eval_samples_per_second": 17.295,
"eval_steps_per_second": 4.324,
"step": 4000
},
{
"epoch": 0.95,
"learning_rate": 8.101851851851853e-05,
"loss": 0.0858,
"step": 4100
},
{
"epoch": 0.95,
"eval_accuracy": 0.9989872574806213,
"eval_loss": 0.003793817013502121,
"eval_runtime": 2125.8264,
"eval_samples_per_second": 16.257,
"eval_steps_per_second": 4.064,
"step": 4100
},
{
"epoch": 0.97,
"learning_rate": 8.055555555555556e-05,
"loss": 0.0483,
"step": 4200
},
{
"epoch": 0.97,
"eval_accuracy": 0.9978588223457336,
"eval_loss": 0.009265501983463764,
"eval_runtime": 2260.2096,
"eval_samples_per_second": 15.291,
"eval_steps_per_second": 3.823,
"step": 4200
},
{
"epoch": 1.0,
"learning_rate": 8.00925925925926e-05,
"loss": 0.1115,
"step": 4300
},
{
"epoch": 1.0,
"eval_accuracy": 0.9990162253379822,
"eval_loss": 0.003752070013433695,
"eval_runtime": 1992.5237,
"eval_samples_per_second": 17.345,
"eval_steps_per_second": 4.336,
"step": 4300
},
{
"epoch": 1.02,
"learning_rate": 7.962962962962964e-05,
"loss": 0.0486,
"step": 4400
},
{
"epoch": 1.02,
"eval_accuracy": 0.9991897940635681,
"eval_loss": 0.0031358152627944946,
"eval_runtime": 1985.6758,
"eval_samples_per_second": 17.405,
"eval_steps_per_second": 4.351,
"step": 4400
},
{
"epoch": 1.04,
"learning_rate": 7.916666666666666e-05,
"loss": 0.0166,
"step": 4500
},
{
"epoch": 1.04,
"eval_accuracy": 0.9995370507240295,
"eval_loss": 0.002144153229892254,
"eval_runtime": 2034.5738,
"eval_samples_per_second": 16.986,
"eval_steps_per_second": 4.247,
"step": 4500
},
{
"epoch": 1.06,
"learning_rate": 7.870370370370372e-05,
"loss": 0.0084,
"step": 4600
},
{
"epoch": 1.06,
"eval_accuracy": 0.9986979365348816,
"eval_loss": 0.006190824322402477,
"eval_runtime": 2022.3112,
"eval_samples_per_second": 17.089,
"eval_steps_per_second": 4.272,
"step": 4600
},
{
"epoch": 1.09,
"learning_rate": 7.824074074074074e-05,
"loss": 0.0205,
"step": 4700
},
{
"epoch": 1.09,
"eval_accuracy": 0.999160885810852,
"eval_loss": 0.0034529021941125393,
"eval_runtime": 2036.3231,
"eval_samples_per_second": 16.972,
"eval_steps_per_second": 4.243,
"step": 4700
},
{
"epoch": 1.11,
"learning_rate": 7.777777777777778e-05,
"loss": 0.0217,
"step": 4800
},
{
"epoch": 1.11,
"eval_accuracy": 0.9973379373550415,
"eval_loss": 0.012433897703886032,
"eval_runtime": 2054.4934,
"eval_samples_per_second": 16.822,
"eval_steps_per_second": 4.205,
"step": 4800
},
{
"epoch": 1.13,
"learning_rate": 7.731481481481482e-05,
"loss": 0.0407,
"step": 4900
},
{
"epoch": 1.13,
"eval_accuracy": 0.9991030097007751,
"eval_loss": 0.004298593383282423,
"eval_runtime": 2009.8166,
"eval_samples_per_second": 17.196,
"eval_steps_per_second": 4.299,
"step": 4900
},
{
"epoch": 1.16,
"learning_rate": 7.685185185185185e-05,
"loss": 0.0598,
"step": 5000
},
{
"epoch": 1.16,
"eval_accuracy": 0.9981771111488342,
"eval_loss": 0.007797444239258766,
"eval_runtime": 1996.948,
"eval_samples_per_second": 17.306,
"eval_steps_per_second": 4.327,
"step": 5000
},
{
"epoch": 1.18,
"learning_rate": 7.638888888888889e-05,
"loss": 0.058,
"step": 5100
},
{
"epoch": 1.18,
"eval_accuracy": 0.9981192350387573,
"eval_loss": 0.009161165915429592,
"eval_runtime": 2007.394,
"eval_samples_per_second": 17.216,
"eval_steps_per_second": 4.304,
"step": 5100
},
{
"epoch": 1.2,
"learning_rate": 7.592592592592593e-05,
"loss": 0.0119,
"step": 5200
},
{
"epoch": 1.2,
"eval_accuracy": 0.9994502067565918,
"eval_loss": 0.0023240004666149616,
"eval_runtime": 2021.6614,
"eval_samples_per_second": 17.095,
"eval_steps_per_second": 4.274,
"step": 5200
},
{
"epoch": 1.23,
"learning_rate": 7.546296296296297e-05,
"loss": 0.08,
"step": 5300
},
{
"epoch": 1.23,
"eval_accuracy": 0.9976562261581421,
"eval_loss": 0.009528687223792076,
"eval_runtime": 2024.2247,
"eval_samples_per_second": 17.073,
"eval_steps_per_second": 4.268,
"step": 5300
},
{
"epoch": 1.25,
"learning_rate": 7.500000000000001e-05,
"loss": 0.0336,
"step": 5400
},
{
"epoch": 1.25,
"eval_accuracy": 0.9995949268341064,
"eval_loss": 0.0020153559744358063,
"eval_runtime": 2005.7373,
"eval_samples_per_second": 17.231,
"eval_steps_per_second": 4.308,
"step": 5400
},
{
"epoch": 1.27,
"learning_rate": 7.453703703703703e-05,
"loss": 0.0508,
"step": 5500
},
{
"epoch": 1.27,
"eval_accuracy": 0.9989004731178284,
"eval_loss": 0.00367682590149343,
"eval_runtime": 2024.401,
"eval_samples_per_second": 17.072,
"eval_steps_per_second": 4.268,
"step": 5500
},
{
"epoch": 1.3,
"learning_rate": 7.407407407407407e-05,
"loss": 0.0146,
"step": 5600
},
{
"epoch": 1.3,
"eval_accuracy": 0.9992766380310059,
"eval_loss": 0.002618621801957488,
"eval_runtime": 2017.6548,
"eval_samples_per_second": 17.129,
"eval_steps_per_second": 4.282,
"step": 5600
},
{
"epoch": 1.32,
"learning_rate": 7.361111111111111e-05,
"loss": 0.038,
"step": 5700
},
{
"epoch": 1.32,
"eval_accuracy": 0.9988425970077515,
"eval_loss": 0.00465565687045455,
"eval_runtime": 2002.1508,
"eval_samples_per_second": 17.261,
"eval_steps_per_second": 4.315,
"step": 5700
},
{
"epoch": 1.34,
"learning_rate": 7.314814814814815e-05,
"loss": 0.0613,
"step": 5800
},
{
"epoch": 1.34,
"eval_accuracy": 0.998379647731781,
"eval_loss": 0.005978360306471586,
"eval_runtime": 2001.292,
"eval_samples_per_second": 17.269,
"eval_steps_per_second": 4.317,
"step": 5800
},
{
"epoch": 1.37,
"learning_rate": 7.268518518518519e-05,
"loss": 0.0364,
"step": 5900
},
{
"epoch": 1.37,
"eval_accuracy": 0.9971064925193787,
"eval_loss": 0.01282673142850399,
"eval_runtime": 2012.1731,
"eval_samples_per_second": 17.175,
"eval_steps_per_second": 4.294,
"step": 5900
},
{
"epoch": 1.39,
"learning_rate": 7.222222222222222e-05,
"loss": 0.108,
"step": 6000
},
{
"epoch": 1.39,
"eval_accuracy": 0.998379647731781,
"eval_loss": 0.005587506573647261,
"eval_runtime": 2228.721,
"eval_samples_per_second": 15.507,
"eval_steps_per_second": 3.877,
"step": 6000
},
{
"epoch": 1.41,
"learning_rate": 7.175925925925926e-05,
"loss": 0.0134,
"step": 6100
},
{
"epoch": 1.41,
"eval_accuracy": 0.9985821843147278,
"eval_loss": 0.0066048940643668175,
"eval_runtime": 2000.8975,
"eval_samples_per_second": 17.272,
"eval_steps_per_second": 4.318,
"step": 6100
},
{
"epoch": 1.44,
"learning_rate": 7.12962962962963e-05,
"loss": 0.0389,
"step": 6200
},
{
"epoch": 1.44,
"eval_accuracy": 0.9972511529922485,
"eval_loss": 0.012162311002612114,
"eval_runtime": 1997.5848,
"eval_samples_per_second": 17.301,
"eval_steps_per_second": 4.325,
"step": 6200
},
{
"epoch": 1.46,
"learning_rate": 7.083333333333334e-05,
"loss": 0.0208,
"step": 6300
},
{
"epoch": 1.46,
"eval_accuracy": 0.9991030097007751,
"eval_loss": 0.0034532626159489155,
"eval_runtime": 2007.9035,
"eval_samples_per_second": 17.212,
"eval_steps_per_second": 4.303,
"step": 6300
},
{
"epoch": 1.48,
"learning_rate": 7.037037037037038e-05,
"loss": 0.0376,
"step": 6400
},
{
"epoch": 1.48,
"eval_accuracy": 0.9991897940635681,
"eval_loss": 0.004356299061328173,
"eval_runtime": 1996.8911,
"eval_samples_per_second": 17.307,
"eval_steps_per_second": 4.327,
"step": 6400
},
{
"epoch": 1.5,
"learning_rate": 6.99074074074074e-05,
"loss": 0.0346,
"step": 6500
},
{
"epoch": 1.5,
"eval_accuracy": 0.9969907402992249,
"eval_loss": 0.017812130972743034,
"eval_runtime": 2004.911,
"eval_samples_per_second": 17.238,
"eval_steps_per_second": 4.309,
"step": 6500
},
{
"epoch": 1.53,
"learning_rate": 6.944444444444444e-05,
"loss": 0.0189,
"step": 6600
},
{
"epoch": 1.53,
"eval_accuracy": 0.9987847208976746,
"eval_loss": 0.0057495711371302605,
"eval_runtime": 2011.102,
"eval_samples_per_second": 17.185,
"eval_steps_per_second": 4.296,
"step": 6600
},
{
"epoch": 1.55,
"learning_rate": 6.898148148148148e-05,
"loss": 0.0141,
"step": 6700
},
{
"epoch": 1.55,
"eval_accuracy": 0.9992766380310059,
"eval_loss": 0.003152304096147418,
"eval_runtime": 1989.9017,
"eval_samples_per_second": 17.368,
"eval_steps_per_second": 4.342,
"step": 6700
},
{
"epoch": 1.57,
"learning_rate": 6.851851851851852e-05,
"loss": 0.0719,
"step": 6800
},
{
"epoch": 1.57,
"eval_accuracy": 0.9987847208976746,
"eval_loss": 0.005420052912086248,
"eval_runtime": 1969.8998,
"eval_samples_per_second": 17.544,
"eval_steps_per_second": 4.386,
"step": 6800
},
{
"epoch": 1.6,
"learning_rate": 6.805555555555556e-05,
"loss": 0.0225,
"step": 6900
},
{
"epoch": 1.6,
"eval_accuracy": 0.9971932768821716,
"eval_loss": 0.012641699984669685,
"eval_runtime": 1981.1809,
"eval_samples_per_second": 17.444,
"eval_steps_per_second": 4.361,
"step": 6900
},
{
"epoch": 1.62,
"learning_rate": 6.759259259259259e-05,
"loss": 0.0682,
"step": 7000
},
{
"epoch": 1.62,
"eval_accuracy": 0.9989583492279053,
"eval_loss": 0.003953148610889912,
"eval_runtime": 1973.9678,
"eval_samples_per_second": 17.508,
"eval_steps_per_second": 4.377,
"step": 7000
},
{
"epoch": 1.64,
"learning_rate": 6.712962962962963e-05,
"loss": 0.0521,
"step": 7100
},
{
"epoch": 1.64,
"eval_accuracy": 0.998466432094574,
"eval_loss": 0.005261498969048262,
"eval_runtime": 1989.7692,
"eval_samples_per_second": 17.369,
"eval_steps_per_second": 4.342,
"step": 7100
},
{
"epoch": 1.67,
"learning_rate": 6.666666666666667e-05,
"loss": 0.0358,
"step": 7200
},
{
"epoch": 1.67,
"eval_accuracy": 0.9993634223937988,
"eval_loss": 0.002406924497336149,
"eval_runtime": 1975.9496,
"eval_samples_per_second": 17.49,
"eval_steps_per_second": 4.373,
"step": 7200
},
{
"epoch": 1.69,
"learning_rate": 6.620370370370371e-05,
"loss": 0.0255,
"step": 7300
},
{
"epoch": 1.69,
"eval_accuracy": 0.9984953999519348,
"eval_loss": 0.007655243389308453,
"eval_runtime": 1972.1073,
"eval_samples_per_second": 17.524,
"eval_steps_per_second": 4.381,
"step": 7300
},
{
"epoch": 1.71,
"learning_rate": 6.574074074074075e-05,
"loss": 0.0424,
"step": 7400
},
{
"epoch": 1.71,
"eval_accuracy": 0.9996238350868225,
"eval_loss": 0.0017167649930343032,
"eval_runtime": 1980.6249,
"eval_samples_per_second": 17.449,
"eval_steps_per_second": 4.362,
"step": 7400
},
{
"epoch": 1.74,
"learning_rate": 6.527777777777778e-05,
"loss": 0.0214,
"step": 7500
},
{
"epoch": 1.74,
"eval_accuracy": 0.9997106194496155,
"eval_loss": 0.0009764753049239516,
"eval_runtime": 2005.2649,
"eval_samples_per_second": 17.235,
"eval_steps_per_second": 4.309,
"step": 7500
},
{
"epoch": 1.76,
"learning_rate": 6.481481481481482e-05,
"loss": 0.0429,
"step": 7600
},
{
"epoch": 1.76,
"eval_accuracy": 0.996006965637207,
"eval_loss": 0.019011829048395157,
"eval_runtime": 2045.2435,
"eval_samples_per_second": 16.898,
"eval_steps_per_second": 4.224,
"step": 7600
},
{
"epoch": 1.78,
"learning_rate": 6.435185185185186e-05,
"loss": 0.0783,
"step": 7700
},
{
"epoch": 1.78,
"eval_accuracy": 0.9976562261581421,
"eval_loss": 0.008234655484557152,
"eval_runtime": 2041.1233,
"eval_samples_per_second": 16.932,
"eval_steps_per_second": 4.233,
"step": 7700
},
{
"epoch": 1.81,
"learning_rate": 6.388888888888888e-05,
"loss": 0.0141,
"step": 7800
},
{
"epoch": 1.81,
"eval_accuracy": 0.9996238350868225,
"eval_loss": 0.0018950661178678274,
"eval_runtime": 1994.0408,
"eval_samples_per_second": 17.332,
"eval_steps_per_second": 4.333,
"step": 7800
},
{
"epoch": 1.83,
"learning_rate": 6.342592592592594e-05,
"loss": 0.0203,
"step": 7900
},
{
"epoch": 1.83,
"eval_accuracy": 0.9994502067565918,
"eval_loss": 0.0022274223156273365,
"eval_runtime": 1978.2563,
"eval_samples_per_second": 17.47,
"eval_steps_per_second": 4.367,
"step": 7900
},
{
"epoch": 1.85,
"learning_rate": 6.296296296296296e-05,
"loss": 0.0439,
"step": 8000
},
{
"epoch": 1.85,
"eval_accuracy": 0.9979166388511658,
"eval_loss": 0.007150179240852594,
"eval_runtime": 1990.4775,
"eval_samples_per_second": 17.363,
"eval_steps_per_second": 4.341,
"step": 8000
},
{
"epoch": 1.88,
"learning_rate": 6.25e-05,
"loss": 0.0228,
"step": 8100
},
{
"epoch": 1.88,
"eval_accuracy": 0.9973090291023254,
"eval_loss": 0.010999325662851334,
"eval_runtime": 1995.6933,
"eval_samples_per_second": 17.317,
"eval_steps_per_second": 4.329,
"step": 8100
},
{
"epoch": 1.9,
"learning_rate": 6.203703703703704e-05,
"loss": 0.0386,
"step": 8200
},
{
"epoch": 1.9,
"eval_accuracy": 0.9996817111968994,
"eval_loss": 0.001689778990112245,
"eval_runtime": 1983.468,
"eval_samples_per_second": 17.424,
"eval_steps_per_second": 4.356,
"step": 8200
},
{
"epoch": 1.92,
"learning_rate": 6.157407407407407e-05,
"loss": 0.023,
"step": 8300
},
{
"epoch": 1.92,
"eval_accuracy": 0.9997106194496155,
"eval_loss": 0.001407949603162706,
"eval_runtime": 1979.2035,
"eval_samples_per_second": 17.462,
"eval_steps_per_second": 4.365,
"step": 8300
},
{
"epoch": 1.94,
"learning_rate": 6.111111111111112e-05,
"loss": 0.0188,
"step": 8400
},
{
"epoch": 1.94,
"eval_accuracy": 0.9997395873069763,
"eval_loss": 0.001248441985808313,
"eval_runtime": 1986.8349,
"eval_samples_per_second": 17.395,
"eval_steps_per_second": 4.349,
"step": 8400
},
{
"epoch": 1.97,
"learning_rate": 6.0648148148148154e-05,
"loss": 0.0301,
"step": 8500
},
{
"epoch": 1.97,
"eval_accuracy": 0.9998553395271301,
"eval_loss": 0.0005934939254075289,
"eval_runtime": 2006.6093,
"eval_samples_per_second": 17.223,
"eval_steps_per_second": 4.306,
"step": 8500
},
{
"epoch": 1.99,
"learning_rate": 6.018518518518519e-05,
"loss": 0.0077,
"step": 8600
},
{
"epoch": 1.99,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.00026703893672674894,
"eval_runtime": 2048.5713,
"eval_samples_per_second": 16.87,
"eval_steps_per_second": 4.218,
"step": 8600
},
{
"epoch": 2.01,
"learning_rate": 5.972222222222223e-05,
"loss": 0.0291,
"step": 8700
},
{
"epoch": 2.01,
"eval_accuracy": 0.9989872574806213,
"eval_loss": 0.004098657984286547,
"eval_runtime": 2031.7661,
"eval_samples_per_second": 17.01,
"eval_steps_per_second": 4.252,
"step": 8700
},
{
"epoch": 2.04,
"learning_rate": 5.925925925925926e-05,
"loss": 0.0274,
"step": 8800
},
{
"epoch": 2.04,
"eval_accuracy": 0.9995949268341064,
"eval_loss": 0.001983657479286194,
"eval_runtime": 2040.4005,
"eval_samples_per_second": 16.938,
"eval_steps_per_second": 4.234,
"step": 8800
},
{
"epoch": 2.06,
"learning_rate": 5.879629629629629e-05,
"loss": 0.0193,
"step": 8900
},
{
"epoch": 2.06,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0003717490180861205,
"eval_runtime": 2041.1098,
"eval_samples_per_second": 16.932,
"eval_steps_per_second": 4.233,
"step": 8900
},
{
"epoch": 2.08,
"learning_rate": 5.833333333333334e-05,
"loss": 0.0296,
"step": 9000
},
{
"epoch": 2.08,
"eval_accuracy": 0.9997684955596924,
"eval_loss": 0.0011291600530967116,
"eval_runtime": 2004.7261,
"eval_samples_per_second": 17.239,
"eval_steps_per_second": 4.31,
"step": 9000
},
{
"epoch": 2.11,
"learning_rate": 5.787037037037037e-05,
"loss": 0.0033,
"step": 9100
},
{
"epoch": 2.11,
"eval_accuracy": 0.9998553395271301,
"eval_loss": 0.0005978959961794317,
"eval_runtime": 2001.4449,
"eval_samples_per_second": 17.268,
"eval_steps_per_second": 4.317,
"step": 9100
},
{
"epoch": 2.13,
"learning_rate": 5.740740740740741e-05,
"loss": 0.0218,
"step": 9200
},
{
"epoch": 2.13,
"eval_accuracy": 0.999160885810852,
"eval_loss": 0.0025338120758533478,
"eval_runtime": 1990.5201,
"eval_samples_per_second": 17.362,
"eval_steps_per_second": 4.341,
"step": 9200
},
{
"epoch": 2.15,
"learning_rate": 5.6944444444444445e-05,
"loss": 0.0238,
"step": 9300
},
{
"epoch": 2.15,
"eval_accuracy": 0.999218761920929,
"eval_loss": 0.0033705937676131725,
"eval_runtime": 2019.0619,
"eval_samples_per_second": 17.117,
"eval_steps_per_second": 4.279,
"step": 9300
},
{
"epoch": 2.18,
"learning_rate": 5.648148148148148e-05,
"loss": 0.0319,
"step": 9400
},
{
"epoch": 2.18,
"eval_accuracy": 0.9994791746139526,
"eval_loss": 0.0017771282000467181,
"eval_runtime": 1997.4844,
"eval_samples_per_second": 17.302,
"eval_steps_per_second": 4.325,
"step": 9400
},
{
"epoch": 2.2,
"learning_rate": 5.6018518518518525e-05,
"loss": 0.0465,
"step": 9500
},
{
"epoch": 2.2,
"eval_accuracy": 0.9994502067565918,
"eval_loss": 0.002331700176000595,
"eval_runtime": 2012.3132,
"eval_samples_per_second": 17.174,
"eval_steps_per_second": 4.294,
"step": 9500
},
{
"epoch": 2.22,
"learning_rate": 5.555555555555556e-05,
"loss": 0.0412,
"step": 9600
},
{
"epoch": 2.22,
"eval_accuracy": 0.9997395873069763,
"eval_loss": 0.001237583113834262,
"eval_runtime": 1999.8191,
"eval_samples_per_second": 17.282,
"eval_steps_per_second": 4.32,
"step": 9600
},
{
"epoch": 2.25,
"learning_rate": 5.50925925925926e-05,
"loss": 0.02,
"step": 9700
},
{
"epoch": 2.25,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0009390079067088664,
"eval_runtime": 2008.6286,
"eval_samples_per_second": 17.206,
"eval_steps_per_second": 4.301,
"step": 9700
},
{
"epoch": 2.27,
"learning_rate": 5.462962962962963e-05,
"loss": 0.0226,
"step": 9800
},
{
"epoch": 2.27,
"eval_accuracy": 0.9995949268341064,
"eval_loss": 0.0017483533592894673,
"eval_runtime": 2005.0151,
"eval_samples_per_second": 17.237,
"eval_steps_per_second": 4.309,
"step": 9800
},
{
"epoch": 2.29,
"learning_rate": 5.4166666666666664e-05,
"loss": 0.0104,
"step": 9900
},
{
"epoch": 2.29,
"eval_accuracy": 0.9997684955596924,
"eval_loss": 0.0008292018319480121,
"eval_runtime": 1992.6545,
"eval_samples_per_second": 17.344,
"eval_steps_per_second": 4.336,
"step": 9900
},
{
"epoch": 2.31,
"learning_rate": 5.370370370370371e-05,
"loss": 0.0021,
"step": 10000
},
{
"epoch": 2.31,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.000292919430648908,
"eval_runtime": 1987.2006,
"eval_samples_per_second": 17.391,
"eval_steps_per_second": 4.348,
"step": 10000
},
{
"epoch": 2.34,
"learning_rate": 5.3240740740740744e-05,
"loss": 0.0135,
"step": 10100
},
{
"epoch": 2.34,
"eval_accuracy": 0.9987558126449585,
"eval_loss": 0.005596287082880735,
"eval_runtime": 2007.3126,
"eval_samples_per_second": 17.217,
"eval_steps_per_second": 4.304,
"step": 10100
},
{
"epoch": 2.36,
"learning_rate": 5.2777777777777784e-05,
"loss": 0.0319,
"step": 10200
},
{
"epoch": 2.36,
"eval_accuracy": 0.9995949268341064,
"eval_loss": 0.001722234534099698,
"eval_runtime": 2007.5056,
"eval_samples_per_second": 17.215,
"eval_steps_per_second": 4.304,
"step": 10200
},
{
"epoch": 2.38,
"learning_rate": 5.231481481481482e-05,
"loss": 0.0279,
"step": 10300
},
{
"epoch": 2.38,
"eval_accuracy": 0.9997106194496155,
"eval_loss": 0.001128367381170392,
"eval_runtime": 1980.2057,
"eval_samples_per_second": 17.453,
"eval_steps_per_second": 4.363,
"step": 10300
},
{
"epoch": 2.41,
"learning_rate": 5.185185185185185e-05,
"loss": 0.0017,
"step": 10400
},
{
"epoch": 2.41,
"eval_accuracy": 0.9997106194496155,
"eval_loss": 0.0013848639791831374,
"eval_runtime": 2007.1812,
"eval_samples_per_second": 17.218,
"eval_steps_per_second": 4.305,
"step": 10400
},
{
"epoch": 2.43,
"learning_rate": 5.138888888888889e-05,
"loss": 0.0296,
"step": 10500
},
{
"epoch": 2.43,
"eval_accuracy": 0.9989583492279053,
"eval_loss": 0.005161995068192482,
"eval_runtime": 1986.761,
"eval_samples_per_second": 17.395,
"eval_steps_per_second": 4.349,
"step": 10500
},
{
"epoch": 2.45,
"learning_rate": 5.092592592592593e-05,
"loss": 0.0168,
"step": 10600
},
{
"epoch": 2.45,
"eval_accuracy": 0.9997974634170532,
"eval_loss": 0.0004770481900777668,
"eval_runtime": 2003.7003,
"eval_samples_per_second": 17.248,
"eval_steps_per_second": 4.312,
"step": 10600
},
{
"epoch": 2.48,
"learning_rate": 5.046296296296297e-05,
"loss": 0.0194,
"step": 10700
},
{
"epoch": 2.48,
"eval_accuracy": 0.9997684955596924,
"eval_loss": 0.000735765672288835,
"eval_runtime": 1994.758,
"eval_samples_per_second": 17.325,
"eval_steps_per_second": 4.331,
"step": 10700
},
{
"epoch": 2.5,
"learning_rate": 5e-05,
"loss": 0.0006,
"step": 10800
},
{
"epoch": 2.5,
"eval_accuracy": 0.9998263716697693,
"eval_loss": 0.0009093827102333307,
"eval_runtime": 2000.8621,
"eval_samples_per_second": 17.273,
"eval_steps_per_second": 4.318,
"step": 10800
},
{
"epoch": 2.52,
"learning_rate": 4.9537037037037035e-05,
"loss": 0.0293,
"step": 10900
},
{
"epoch": 2.52,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.0005157970590516925,
"eval_runtime": 2011.0169,
"eval_samples_per_second": 17.185,
"eval_steps_per_second": 4.296,
"step": 10900
},
{
"epoch": 2.55,
"learning_rate": 4.9074074074074075e-05,
"loss": 0.0016,
"step": 11000
},
{
"epoch": 2.55,
"eval_accuracy": 0.9994791746139526,
"eval_loss": 0.0025301428977400064,
"eval_runtime": 2000.9133,
"eval_samples_per_second": 17.272,
"eval_steps_per_second": 4.318,
"step": 11000
},
{
"epoch": 2.57,
"learning_rate": 4.8611111111111115e-05,
"loss": 0.0069,
"step": 11100
},
{
"epoch": 2.57,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0004891157150268555,
"eval_runtime": 2006.7369,
"eval_samples_per_second": 17.222,
"eval_steps_per_second": 4.305,
"step": 11100
},
{
"epoch": 2.59,
"learning_rate": 4.814814814814815e-05,
"loss": 0.0001,
"step": 11200
},
{
"epoch": 2.59,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.00020419809152372181,
"eval_runtime": 1993.3725,
"eval_samples_per_second": 17.337,
"eval_steps_per_second": 4.334,
"step": 11200
},
{
"epoch": 2.62,
"learning_rate": 4.768518518518519e-05,
"loss": 0.0108,
"step": 11300
},
{
"epoch": 2.62,
"eval_accuracy": 0.9997974634170532,
"eval_loss": 0.0010758559219539165,
"eval_runtime": 2001.2763,
"eval_samples_per_second": 17.269,
"eval_steps_per_second": 4.317,
"step": 11300
},
{
"epoch": 2.64,
"learning_rate": 4.722222222222222e-05,
"loss": 0.0165,
"step": 11400
},
{
"epoch": 2.64,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0006313551566563547,
"eval_runtime": 1995.5247,
"eval_samples_per_second": 17.319,
"eval_steps_per_second": 4.33,
"step": 11400
},
{
"epoch": 2.66,
"learning_rate": 4.675925925925926e-05,
"loss": 0.0001,
"step": 11500
},
{
"epoch": 2.66,
"eval_accuracy": 0.9997974634170532,
"eval_loss": 0.0007648964528925717,
"eval_runtime": 2001.09,
"eval_samples_per_second": 17.271,
"eval_steps_per_second": 4.318,
"step": 11500
},
{
"epoch": 2.69,
"learning_rate": 4.62962962962963e-05,
"loss": 0.0244,
"step": 11600
},
{
"epoch": 2.69,
"eval_accuracy": 0.9998553395271301,
"eval_loss": 0.000668107473757118,
"eval_runtime": 2000.7577,
"eval_samples_per_second": 17.273,
"eval_steps_per_second": 4.318,
"step": 11600
},
{
"epoch": 2.71,
"learning_rate": 4.5833333333333334e-05,
"loss": 0.0312,
"step": 11700
},
{
"epoch": 2.71,
"eval_accuracy": 0.9995659589767456,
"eval_loss": 0.001716578146442771,
"eval_runtime": 1997.1256,
"eval_samples_per_second": 17.305,
"eval_steps_per_second": 4.326,
"step": 11700
},
{
"epoch": 2.73,
"learning_rate": 4.5370370370370374e-05,
"loss": 0.0191,
"step": 11800
},
{
"epoch": 2.73,
"eval_accuracy": 0.9997395873069763,
"eval_loss": 0.0007975550834089518,
"eval_runtime": 1967.8746,
"eval_samples_per_second": 17.562,
"eval_steps_per_second": 4.391,
"step": 11800
},
{
"epoch": 2.75,
"learning_rate": 4.490740740740741e-05,
"loss": 0.0005,
"step": 11900
},
{
"epoch": 2.75,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0004628011374734342,
"eval_runtime": 1958.7798,
"eval_samples_per_second": 17.644,
"eval_steps_per_second": 4.411,
"step": 11900
},
{
"epoch": 2.78,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.0259,
"step": 12000
},
{
"epoch": 2.78,
"eval_accuracy": 0.9996238350868225,
"eval_loss": 0.001358355744741857,
"eval_runtime": 1971.0225,
"eval_samples_per_second": 17.534,
"eval_steps_per_second": 4.384,
"step": 12000
},
{
"epoch": 2.8,
"learning_rate": 4.3981481481481486e-05,
"loss": 0.0226,
"step": 12100
},
{
"epoch": 2.8,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00019500043708831072,
"eval_runtime": 1970.5019,
"eval_samples_per_second": 17.539,
"eval_steps_per_second": 4.385,
"step": 12100
},
{
"epoch": 2.82,
"learning_rate": 4.351851851851852e-05,
"loss": 0.0,
"step": 12200
},
{
"epoch": 2.82,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.0002164940524380654,
"eval_runtime": 1961.9305,
"eval_samples_per_second": 17.615,
"eval_steps_per_second": 4.404,
"step": 12200
},
{
"epoch": 2.85,
"learning_rate": 4.305555555555556e-05,
"loss": 0.0,
"step": 12300
},
{
"epoch": 2.85,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.00010657820530468598,
"eval_runtime": 1973.4403,
"eval_samples_per_second": 17.513,
"eval_steps_per_second": 4.378,
"step": 12300
},
{
"epoch": 2.87,
"learning_rate": 4.259259259259259e-05,
"loss": 0.0145,
"step": 12400
},
{
"epoch": 2.87,
"eval_accuracy": 1.0,
"eval_loss": 4.758801151183434e-05,
"eval_runtime": 1977.328,
"eval_samples_per_second": 17.478,
"eval_steps_per_second": 4.37,
"step": 12400
},
{
"epoch": 2.89,
"learning_rate": 4.212962962962963e-05,
"loss": 0.0083,
"step": 12500
},
{
"epoch": 2.89,
"eval_accuracy": 0.9995659589767456,
"eval_loss": 0.001972577767446637,
"eval_runtime": 1962.5085,
"eval_samples_per_second": 17.61,
"eval_steps_per_second": 4.403,
"step": 12500
},
{
"epoch": 2.92,
"learning_rate": 4.166666666666667e-05,
"loss": 0.02,
"step": 12600
},
{
"epoch": 2.92,
"eval_accuracy": 0.9994791746139526,
"eval_loss": 0.00198388216085732,
"eval_runtime": 1956.5161,
"eval_samples_per_second": 17.664,
"eval_steps_per_second": 4.416,
"step": 12600
},
{
"epoch": 2.94,
"learning_rate": 4.1203703703703705e-05,
"loss": 0.0293,
"step": 12700
},
{
"epoch": 2.94,
"eval_accuracy": 0.9994212985038757,
"eval_loss": 0.0031591171864420176,
"eval_runtime": 1997.7409,
"eval_samples_per_second": 17.3,
"eval_steps_per_second": 4.325,
"step": 12700
},
{
"epoch": 2.96,
"learning_rate": 4.074074074074074e-05,
"loss": 0.0164,
"step": 12800
},
{
"epoch": 2.96,
"eval_accuracy": 0.9997395873069763,
"eval_loss": 0.0012433998053893447,
"eval_runtime": 2019.1263,
"eval_samples_per_second": 17.116,
"eval_steps_per_second": 4.279,
"step": 12800
},
{
"epoch": 2.99,
"learning_rate": 4.027777777777778e-05,
"loss": 0.0147,
"step": 12900
},
{
"epoch": 2.99,
"eval_accuracy": 0.9997684955596924,
"eval_loss": 0.001224155188538134,
"eval_runtime": 2015.6572,
"eval_samples_per_second": 17.146,
"eval_steps_per_second": 4.286,
"step": 12900
},
{
"epoch": 3.01,
"learning_rate": 3.981481481481482e-05,
"loss": 0.0112,
"step": 13000
},
{
"epoch": 3.01,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.0008148940978571773,
"eval_runtime": 1991.7022,
"eval_samples_per_second": 17.352,
"eval_steps_per_second": 4.338,
"step": 13000
},
{
"epoch": 3.03,
"learning_rate": 3.935185185185186e-05,
"loss": 0.002,
"step": 13100
},
{
"epoch": 3.03,
"eval_accuracy": 0.9997395873069763,
"eval_loss": 0.0012871942017227411,
"eval_runtime": 2016.2834,
"eval_samples_per_second": 17.14,
"eval_steps_per_second": 4.285,
"step": 13100
},
{
"epoch": 3.06,
"learning_rate": 3.888888888888889e-05,
"loss": 0.017,
"step": 13200
},
{
"epoch": 3.06,
"eval_accuracy": 0.9997106194496155,
"eval_loss": 0.0010973262833431363,
"eval_runtime": 2017.4262,
"eval_samples_per_second": 17.131,
"eval_steps_per_second": 4.283,
"step": 13200
},
{
"epoch": 3.08,
"learning_rate": 3.8425925925925924e-05,
"loss": 0.0142,
"step": 13300
},
{
"epoch": 3.08,
"eval_accuracy": 0.9996528029441833,
"eval_loss": 0.0019141812808811665,
"eval_runtime": 2002.6757,
"eval_samples_per_second": 17.257,
"eval_steps_per_second": 4.314,
"step": 13300
},
{
"epoch": 3.1,
"learning_rate": 3.7962962962962964e-05,
"loss": 0.008,
"step": 13400
},
{
"epoch": 3.1,
"eval_accuracy": 0.9997395873069763,
"eval_loss": 0.00135290517937392,
"eval_runtime": 2020.5372,
"eval_samples_per_second": 17.104,
"eval_steps_per_second": 4.276,
"step": 13400
},
{
"epoch": 3.12,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.0411,
"step": 13500
},
{
"epoch": 3.12,
"eval_accuracy": 0.9997974634170532,
"eval_loss": 0.000736766669433564,
"eval_runtime": 2162.1556,
"eval_samples_per_second": 15.984,
"eval_steps_per_second": 3.996,
"step": 13500
},
{
"epoch": 3.15,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.0262,
"step": 13600
},
{
"epoch": 3.15,
"eval_accuracy": 0.9998553395271301,
"eval_loss": 0.000846204929985106,
"eval_runtime": 2119.0303,
"eval_samples_per_second": 16.309,
"eval_steps_per_second": 4.077,
"step": 13600
},
{
"epoch": 3.17,
"learning_rate": 3.6574074074074076e-05,
"loss": 0.0198,
"step": 13700
},
{
"epoch": 3.17,
"eval_accuracy": 0.9997106194496155,
"eval_loss": 0.0010991438757628202,
"eval_runtime": 2095.0628,
"eval_samples_per_second": 16.496,
"eval_steps_per_second": 4.124,
"step": 13700
},
{
"epoch": 3.19,
"learning_rate": 3.611111111111111e-05,
"loss": 0.0178,
"step": 13800
},
{
"epoch": 3.19,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.00029710811213590205,
"eval_runtime": 2130.6792,
"eval_samples_per_second": 16.22,
"eval_steps_per_second": 4.055,
"step": 13800
},
{
"epoch": 3.22,
"learning_rate": 3.564814814814815e-05,
"loss": 0.0072,
"step": 13900
},
{
"epoch": 3.22,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00018699387146625668,
"eval_runtime": 2082.0917,
"eval_samples_per_second": 16.599,
"eval_steps_per_second": 4.15,
"step": 13900
},
{
"epoch": 3.24,
"learning_rate": 3.518518518518519e-05,
"loss": 0.0004,
"step": 14000
},
{
"epoch": 3.24,
"eval_accuracy": 0.9998263716697693,
"eval_loss": 0.0013777822023257613,
"eval_runtime": 2053.664,
"eval_samples_per_second": 16.828,
"eval_steps_per_second": 4.207,
"step": 14000
},
{
"epoch": 3.26,
"learning_rate": 3.472222222222222e-05,
"loss": 0.0191,
"step": 14100
},
{
"epoch": 3.26,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.0004184871504548937,
"eval_runtime": 2048.7946,
"eval_samples_per_second": 16.868,
"eval_steps_per_second": 4.217,
"step": 14100
},
{
"epoch": 3.29,
"learning_rate": 3.425925925925926e-05,
"loss": 0.007,
"step": 14200
},
{
"epoch": 3.29,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.0004062869702465832,
"eval_runtime": 2055.664,
"eval_samples_per_second": 16.812,
"eval_steps_per_second": 4.203,
"step": 14200
},
{
"epoch": 3.31,
"learning_rate": 3.3796296296296295e-05,
"loss": 0.0108,
"step": 14300
},
{
"epoch": 3.31,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.00011388419807190076,
"eval_runtime": 2043.6545,
"eval_samples_per_second": 16.911,
"eval_steps_per_second": 4.228,
"step": 14300
},
{
"epoch": 3.33,
"learning_rate": 3.3333333333333335e-05,
"loss": 0.0,
"step": 14400
},
{
"epoch": 3.33,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 7.532363088103011e-05,
"eval_runtime": 2040.9204,
"eval_samples_per_second": 16.934,
"eval_steps_per_second": 4.233,
"step": 14400
},
{
"epoch": 3.36,
"learning_rate": 3.2870370370370375e-05,
"loss": 0.0006,
"step": 14500
},
{
"epoch": 3.36,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.0003408396732993424,
"eval_runtime": 2057.5647,
"eval_samples_per_second": 16.797,
"eval_steps_per_second": 4.199,
"step": 14500
},
{
"epoch": 3.38,
"learning_rate": 3.240740740740741e-05,
"loss": 0.0085,
"step": 14600
},
{
"epoch": 3.38,
"eval_accuracy": 0.9992766380310059,
"eval_loss": 0.0034337618853896856,
"eval_runtime": 2027.249,
"eval_samples_per_second": 17.048,
"eval_steps_per_second": 4.262,
"step": 14600
},
{
"epoch": 3.4,
"learning_rate": 3.194444444444444e-05,
"loss": 0.0002,
"step": 14700
},
{
"epoch": 3.4,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.0006225552642717957,
"eval_runtime": 2004.1478,
"eval_samples_per_second": 17.244,
"eval_steps_per_second": 4.311,
"step": 14700
},
{
"epoch": 3.43,
"learning_rate": 3.148148148148148e-05,
"loss": 0.0181,
"step": 14800
},
{
"epoch": 3.43,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.000251033779932186,
"eval_runtime": 2016.4131,
"eval_samples_per_second": 17.139,
"eval_steps_per_second": 4.285,
"step": 14800
},
{
"epoch": 3.45,
"learning_rate": 3.101851851851852e-05,
"loss": 0.0021,
"step": 14900
},
{
"epoch": 3.45,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.00040141510544344783,
"eval_runtime": 2000.3942,
"eval_samples_per_second": 17.277,
"eval_steps_per_second": 4.319,
"step": 14900
},
{
"epoch": 3.47,
"learning_rate": 3.055555555555556e-05,
"loss": 0.0069,
"step": 15000
},
{
"epoch": 3.47,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0006463331519626081,
"eval_runtime": 2015.2783,
"eval_samples_per_second": 17.149,
"eval_steps_per_second": 4.287,
"step": 15000
},
{
"epoch": 3.5,
"learning_rate": 3.0092592592592593e-05,
"loss": 0.0156,
"step": 15100
},
{
"epoch": 3.5,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.0001428252726327628,
"eval_runtime": 1995.7618,
"eval_samples_per_second": 17.317,
"eval_steps_per_second": 4.329,
"step": 15100
},
{
"epoch": 3.52,
"learning_rate": 2.962962962962963e-05,
"loss": 0.0042,
"step": 15200
},
{
"epoch": 3.52,
"eval_accuracy": 0.9997974634170532,
"eval_loss": 0.000510143639985472,
"eval_runtime": 2000.972,
"eval_samples_per_second": 17.272,
"eval_steps_per_second": 4.318,
"step": 15200
},
{
"epoch": 3.54,
"learning_rate": 2.916666666666667e-05,
"loss": 0.0233,
"step": 15300
},
{
"epoch": 3.54,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00019888828683178872,
"eval_runtime": 2002.1598,
"eval_samples_per_second": 17.261,
"eval_steps_per_second": 4.315,
"step": 15300
},
{
"epoch": 3.56,
"learning_rate": 2.8703703703703706e-05,
"loss": 0.003,
"step": 15400
},
{
"epoch": 3.56,
"eval_accuracy": 0.9997974634170532,
"eval_loss": 0.0006905001355335116,
"eval_runtime": 2000.419,
"eval_samples_per_second": 17.276,
"eval_steps_per_second": 4.319,
"step": 15400
},
{
"epoch": 3.59,
"learning_rate": 2.824074074074074e-05,
"loss": 0.0149,
"step": 15500
},
{
"epoch": 3.59,
"eval_accuracy": 0.9998553395271301,
"eval_loss": 0.000585312838666141,
"eval_runtime": 1997.3791,
"eval_samples_per_second": 17.303,
"eval_steps_per_second": 4.326,
"step": 15500
},
{
"epoch": 3.61,
"learning_rate": 2.777777777777778e-05,
"loss": 0.0072,
"step": 15600
},
{
"epoch": 3.61,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.000229826764552854,
"eval_runtime": 2001.2597,
"eval_samples_per_second": 17.269,
"eval_steps_per_second": 4.317,
"step": 15600
},
{
"epoch": 3.63,
"learning_rate": 2.7314814814814816e-05,
"loss": 0.0004,
"step": 15700
},
{
"epoch": 3.63,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 5.024338679504581e-05,
"eval_runtime": 2013.5805,
"eval_samples_per_second": 17.163,
"eval_steps_per_second": 4.291,
"step": 15700
},
{
"epoch": 3.66,
"learning_rate": 2.6851851851851855e-05,
"loss": 0.0001,
"step": 15800
},
{
"epoch": 3.66,
"eval_accuracy": 0.999913215637207,
"eval_loss": 0.00017916383512783796,
"eval_runtime": 1994.345,
"eval_samples_per_second": 17.329,
"eval_steps_per_second": 4.332,
"step": 15800
},
{
"epoch": 3.68,
"learning_rate": 2.6388888888888892e-05,
"loss": 0.0186,
"step": 15900
},
{
"epoch": 3.68,
"eval_accuracy": 1.0,
"eval_loss": 9.207503353536595e-06,
"eval_runtime": 2056.2161,
"eval_samples_per_second": 16.808,
"eval_steps_per_second": 4.202,
"step": 15900
},
{
"epoch": 3.7,
"learning_rate": 2.5925925925925925e-05,
"loss": 0.0115,
"step": 16000
},
{
"epoch": 3.7,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00022165325935930014,
"eval_runtime": 2044.6907,
"eval_samples_per_second": 16.902,
"eval_steps_per_second": 4.226,
"step": 16000
},
{
"epoch": 3.73,
"learning_rate": 2.5462962962962965e-05,
"loss": 0.0011,
"step": 16100
},
{
"epoch": 3.73,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00027788631268776953,
"eval_runtime": 2046.6409,
"eval_samples_per_second": 16.886,
"eval_steps_per_second": 4.222,
"step": 16100
},
{
"epoch": 3.75,
"learning_rate": 2.5e-05,
"loss": 0.0048,
"step": 16200
},
{
"epoch": 3.75,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 5.909843821427785e-05,
"eval_runtime": 2008.3137,
"eval_samples_per_second": 17.208,
"eval_steps_per_second": 4.302,
"step": 16200
},
{
"epoch": 3.77,
"learning_rate": 2.4537037037037038e-05,
"loss": 0.0042,
"step": 16300
},
{
"epoch": 3.77,
"eval_accuracy": 1.0,
"eval_loss": 6.828932328062365e-06,
"eval_runtime": 2129.8226,
"eval_samples_per_second": 16.227,
"eval_steps_per_second": 4.057,
"step": 16300
},
{
"epoch": 3.8,
"learning_rate": 2.4074074074074074e-05,
"loss": 0.0024,
"step": 16400
},
{
"epoch": 3.8,
"eval_accuracy": 1.0,
"eval_loss": 8.2383139670128e-06,
"eval_runtime": 2113.6583,
"eval_samples_per_second": 16.351,
"eval_steps_per_second": 4.088,
"step": 16400
},
{
"epoch": 3.82,
"learning_rate": 2.361111111111111e-05,
"loss": 0.0,
"step": 16500
},
{
"epoch": 3.82,
"eval_accuracy": 1.0,
"eval_loss": 5.800426606583642e-06,
"eval_runtime": 2122.3997,
"eval_samples_per_second": 16.283,
"eval_steps_per_second": 4.071,
"step": 16500
},
{
"epoch": 3.84,
"learning_rate": 2.314814814814815e-05,
"loss": 0.0003,
"step": 16600
},
{
"epoch": 3.84,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00010272156214341521,
"eval_runtime": 2128.9658,
"eval_samples_per_second": 16.233,
"eval_steps_per_second": 4.058,
"step": 16600
},
{
"epoch": 3.87,
"learning_rate": 2.2685185185185187e-05,
"loss": 0.0,
"step": 16700
},
{
"epoch": 3.87,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 7.889495464041829e-05,
"eval_runtime": 2147.2327,
"eval_samples_per_second": 16.095,
"eval_steps_per_second": 4.024,
"step": 16700
},
{
"epoch": 3.89,
"learning_rate": 2.2222222222222223e-05,
"loss": 0.0,
"step": 16800
},
{
"epoch": 3.89,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 7.938377530081198e-05,
"eval_runtime": 2139.4855,
"eval_samples_per_second": 16.153,
"eval_steps_per_second": 4.038,
"step": 16800
},
{
"epoch": 3.91,
"learning_rate": 2.175925925925926e-05,
"loss": 0.0029,
"step": 16900
},
{
"epoch": 3.91,
"eval_accuracy": 0.9998842477798462,
"eval_loss": 0.0005274215945973992,
"eval_runtime": 2142.1862,
"eval_samples_per_second": 16.133,
"eval_steps_per_second": 4.033,
"step": 16900
},
{
"epoch": 3.94,
"learning_rate": 2.1296296296296296e-05,
"loss": 0.0066,
"step": 17000
},
{
"epoch": 3.94,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00019657429947983474,
"eval_runtime": 2149.2032,
"eval_samples_per_second": 16.08,
"eval_steps_per_second": 4.02,
"step": 17000
},
{
"epoch": 3.96,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.0079,
"step": 17100
},
{
"epoch": 3.96,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 5.7856173953041434e-05,
"eval_runtime": 2135.9752,
"eval_samples_per_second": 16.18,
"eval_steps_per_second": 4.045,
"step": 17100
},
{
"epoch": 3.98,
"learning_rate": 2.037037037037037e-05,
"loss": 0.0091,
"step": 17200
},
{
"epoch": 3.98,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.00015575718134641647,
"eval_runtime": 2158.5953,
"eval_samples_per_second": 16.01,
"eval_steps_per_second": 4.003,
"step": 17200
},
{
"epoch": 4.0,
"learning_rate": 1.990740740740741e-05,
"loss": 0.0951,
"step": 17300
},
{
"epoch": 4.0,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 6.823511648690328e-05,
"eval_runtime": 2106.2766,
"eval_samples_per_second": 16.408,
"eval_steps_per_second": 4.102,
"step": 17300
},
{
"epoch": 4.03,
"learning_rate": 1.9444444444444445e-05,
"loss": 0.0578,
"step": 17400
},
{
"epoch": 4.03,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.00031872568069957197,
"eval_runtime": 2091.5056,
"eval_samples_per_second": 16.524,
"eval_steps_per_second": 4.131,
"step": 17400
},
{
"epoch": 4.05,
"learning_rate": 1.8981481481481482e-05,
"loss": 0.0171,
"step": 17500
},
{
"epoch": 4.05,
"eval_accuracy": 0.9999421238899231,
"eval_loss": 0.0003504869237076491,
"eval_runtime": 2076.2302,
"eval_samples_per_second": 16.646,
"eval_steps_per_second": 4.161,
"step": 17500
},
{
"epoch": 4.07,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.0305,
"step": 17600
},
{
"epoch": 4.07,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00012279333896003664,
"eval_runtime": 2072.7643,
"eval_samples_per_second": 16.673,
"eval_steps_per_second": 4.168,
"step": 17600
},
{
"epoch": 4.1,
"learning_rate": 1.8055555555555555e-05,
"loss": 0.0449,
"step": 17700
},
{
"epoch": 4.1,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00021972648391965777,
"eval_runtime": 2090.628,
"eval_samples_per_second": 16.531,
"eval_steps_per_second": 4.133,
"step": 17700
},
{
"epoch": 4.12,
"learning_rate": 1.7592592592592595e-05,
"loss": 0.0161,
"step": 17800
},
{
"epoch": 4.12,
"eval_accuracy": 1.0,
"eval_loss": 2.7198611860512756e-05,
"eval_runtime": 2085.7289,
"eval_samples_per_second": 16.57,
"eval_steps_per_second": 4.142,
"step": 17800
},
{
"epoch": 4.14,
"learning_rate": 1.712962962962963e-05,
"loss": 0.0322,
"step": 17900
},
{
"epoch": 4.14,
"eval_accuracy": 1.0,
"eval_loss": 2.2180371161084622e-05,
"eval_runtime": 2061.6769,
"eval_samples_per_second": 16.763,
"eval_steps_per_second": 4.191,
"step": 17900
},
{
"epoch": 4.17,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.0358,
"step": 18000
},
{
"epoch": 4.17,
"eval_accuracy": 0.9999710917472839,
"eval_loss": 0.00010751090303529054,
"eval_runtime": 2107.1409,
"eval_samples_per_second": 16.401,
"eval_steps_per_second": 4.1,
"step": 18000
},
{
"epoch": 4.19,
"learning_rate": 1.6203703703703704e-05,
"loss": 0.0264,
"step": 18100
},
{
"epoch": 4.19,
"eval_accuracy": 1.0,
"eval_loss": 6.194192792463582e-06,
"eval_runtime": 2091.7086,
"eval_samples_per_second": 16.522,
"eval_steps_per_second": 4.131,
"step": 18100
},
{
"epoch": 4.21,
"learning_rate": 1.574074074074074e-05,
"loss": 0.0199,
"step": 18200
},
{
"epoch": 4.21,
"eval_accuracy": 1.0,
"eval_loss": 6.114233656262513e-06,
"eval_runtime": 2093.6259,
"eval_samples_per_second": 16.507,
"eval_steps_per_second": 4.127,
"step": 18200
},
{
"epoch": 4.24,
"learning_rate": 1.527777777777778e-05,
"loss": 0.0266,
"step": 18300
},
{
"epoch": 4.24,
"eval_accuracy": 1.0,
"eval_loss": 6.532317456731107e-06,
"eval_runtime": 2103.3039,
"eval_samples_per_second": 16.431,
"eval_steps_per_second": 4.108,
"step": 18300
},
{
"epoch": 4.26,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.0162,
"step": 18400
},
{
"epoch": 4.26,
"eval_accuracy": 1.0,
"eval_loss": 6.056379334040685e-06,
"eval_runtime": 2141.6719,
"eval_samples_per_second": 16.137,
"eval_steps_per_second": 4.034,
"step": 18400
},
{
"epoch": 4.28,
"learning_rate": 1.4351851851851853e-05,
"loss": 0.0142,
"step": 18500
},
{
"epoch": 4.28,
"eval_accuracy": 1.0,
"eval_loss": 6.732083420502022e-06,
"eval_runtime": 2137.4831,
"eval_samples_per_second": 16.169,
"eval_steps_per_second": 4.042,
"step": 18500
},
{
"epoch": 4.31,
"learning_rate": 1.388888888888889e-05,
"loss": 0.0353,
"step": 18600
},
{
"epoch": 4.31,
"eval_accuracy": 1.0,
"eval_loss": 5.884473466721829e-06,
"eval_runtime": 2111.534,
"eval_samples_per_second": 16.367,
"eval_steps_per_second": 4.092,
"step": 18600
},
{
"epoch": 4.33,
"learning_rate": 1.3425925925925928e-05,
"loss": 0.0435,
"step": 18700
},
{
"epoch": 4.33,
"eval_accuracy": 1.0,
"eval_loss": 6.438468062697211e-06,
"eval_runtime": 2127.2273,
"eval_samples_per_second": 16.247,
"eval_steps_per_second": 4.062,
"step": 18700
},
{
"epoch": 4.35,
"learning_rate": 1.2962962962962962e-05,
"loss": 0.0067,
"step": 18800
},
{
"epoch": 4.35,
"eval_accuracy": 1.0,
"eval_loss": 9.256172234017868e-06,
"eval_runtime": 2183.0463,
"eval_samples_per_second": 15.831,
"eval_steps_per_second": 3.958,
"step": 18800
},
{
"epoch": 4.38,
"learning_rate": 1.25e-05,
"loss": 0.0299,
"step": 18900
},
{
"epoch": 4.38,
"eval_accuracy": 1.0,
"eval_loss": 6.904490419401554e-06,
"eval_runtime": 2110.4592,
"eval_samples_per_second": 16.376,
"eval_steps_per_second": 4.094,
"step": 18900
},
{
"epoch": 4.4,
"learning_rate": 1.2037037037037037e-05,
"loss": 0.0063,
"step": 19000
},
{
"epoch": 4.4,
"eval_accuracy": 1.0,
"eval_loss": 6.991323061811272e-06,
"eval_runtime": 2074.4391,
"eval_samples_per_second": 16.66,
"eval_steps_per_second": 4.165,
"step": 19000
},
{
"epoch": 4.42,
"learning_rate": 1.1574074074074075e-05,
"loss": 0.0117,
"step": 19100
},
{
"epoch": 4.42,
"eval_accuracy": 1.0,
"eval_loss": 5.223146672506118e-06,
"eval_runtime": 2093.0232,
"eval_samples_per_second": 16.512,
"eval_steps_per_second": 4.128,
"step": 19100
},
{
"epoch": 4.44,
"learning_rate": 1.1111111111111112e-05,
"loss": 0.0107,
"step": 19200
},
{
"epoch": 4.44,
"eval_accuracy": 1.0,
"eval_loss": 7.764682777633425e-06,
"eval_runtime": 2092.3489,
"eval_samples_per_second": 16.517,
"eval_steps_per_second": 4.129,
"step": 19200
},
{
"epoch": 4.47,
"learning_rate": 1.0648148148148148e-05,
"loss": 0.0162,
"step": 19300
},
{
"epoch": 4.47,
"eval_accuracy": 1.0,
"eval_loss": 5.700497240468394e-06,
"eval_runtime": 2095.1985,
"eval_samples_per_second": 16.495,
"eval_steps_per_second": 4.124,
"step": 19300
},
{
"epoch": 4.49,
"learning_rate": 1.0185185185185185e-05,
"loss": 0.0138,
"step": 19400
},
{
"epoch": 4.49,
"eval_accuracy": 1.0,
"eval_loss": 5.209324172028573e-06,
"eval_runtime": 2073.7497,
"eval_samples_per_second": 16.665,
"eval_steps_per_second": 4.166,
"step": 19400
},
{
"epoch": 4.51,
"learning_rate": 9.722222222222223e-06,
"loss": 0.0124,
"step": 19500
},
{
"epoch": 4.51,
"eval_accuracy": 1.0,
"eval_loss": 5.243016858003102e-06,
"eval_runtime": 2056.3515,
"eval_samples_per_second": 16.806,
"eval_steps_per_second": 4.202,
"step": 19500
},
{
"epoch": 4.54,
"learning_rate": 9.259259259259259e-06,
"loss": 0.0083,
"step": 19600
},
{
"epoch": 4.54,
"eval_accuracy": 1.0,
"eval_loss": 5.0634776016522665e-06,
"eval_runtime": 2077.8389,
"eval_samples_per_second": 16.633,
"eval_steps_per_second": 4.158,
"step": 19600
},
{
"epoch": 4.56,
"learning_rate": 8.796296296296297e-06,
"loss": 0.0066,
"step": 19700
},
{
"epoch": 4.56,
"eval_accuracy": 1.0,
"eval_loss": 4.925776011077687e-06,
"eval_runtime": 2073.5316,
"eval_samples_per_second": 16.667,
"eval_steps_per_second": 4.167,
"step": 19700
},
{
"epoch": 4.58,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0058,
"step": 19800
},
{
"epoch": 4.58,
"eval_accuracy": 1.0,
"eval_loss": 4.750945663545281e-06,
"eval_runtime": 2057.702,
"eval_samples_per_second": 16.795,
"eval_steps_per_second": 4.199,
"step": 19800
},
{
"epoch": 4.61,
"learning_rate": 7.87037037037037e-06,
"loss": 0.0032,
"step": 19900
},
{
"epoch": 4.61,
"eval_accuracy": 1.0,
"eval_loss": 6.96109145792434e-06,
"eval_runtime": 2071.5479,
"eval_samples_per_second": 16.683,
"eval_steps_per_second": 4.171,
"step": 19900
},
{
"epoch": 4.63,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.0205,
"step": 20000
},
{
"epoch": 4.63,
"eval_accuracy": 1.0,
"eval_loss": 4.608726612786995e-06,
"eval_runtime": 2066.372,
"eval_samples_per_second": 16.725,
"eval_steps_per_second": 4.181,
"step": 20000
},
{
"epoch": 4.65,
"learning_rate": 6.944444444444445e-06,
"loss": 0.0094,
"step": 20100
},
{
"epoch": 4.65,
"eval_accuracy": 1.0,
"eval_loss": 4.8284973672707565e-06,
"eval_runtime": 2054.9166,
"eval_samples_per_second": 16.818,
"eval_steps_per_second": 4.205,
"step": 20100
},
{
"epoch": 4.68,
"learning_rate": 6.481481481481481e-06,
"loss": 0.003,
"step": 20200
},
{
"epoch": 4.68,
"eval_accuracy": 1.0,
"eval_loss": 4.495966550166486e-06,
"eval_runtime": 2072.6571,
"eval_samples_per_second": 16.674,
"eval_steps_per_second": 4.169,
"step": 20200
},
{
"epoch": 4.7,
"learning_rate": 6.0185185185185185e-06,
"loss": 0.0035,
"step": 20300
},
{
"epoch": 4.7,
"eval_accuracy": 1.0,
"eval_loss": 5.835635420226026e-06,
"eval_runtime": 2047.8141,
"eval_samples_per_second": 16.877,
"eval_steps_per_second": 4.219,
"step": 20300
},
{
"epoch": 4.72,
"learning_rate": 5.555555555555556e-06,
"loss": 0.0257,
"step": 20400
},
{
"epoch": 4.72,
"eval_accuracy": 1.0,
"eval_loss": 5.829508609167533e-06,
"eval_runtime": 2091.8646,
"eval_samples_per_second": 16.521,
"eval_steps_per_second": 4.13,
"step": 20400
},
{
"epoch": 4.75,
"learning_rate": 5.092592592592592e-06,
"loss": 0.0019,
"step": 20500
},
{
"epoch": 4.75,
"eval_accuracy": 1.0,
"eval_loss": 6.3429124566027895e-06,
"eval_runtime": 2040.9379,
"eval_samples_per_second": 16.933,
"eval_steps_per_second": 4.233,
"step": 20500
},
{
"epoch": 4.77,
"learning_rate": 4.6296296296296296e-06,
"loss": 0.0023,
"step": 20600
},
{
"epoch": 4.77,
"eval_accuracy": 1.0,
"eval_loss": 8.131992217386141e-06,
"eval_runtime": 2048.3614,
"eval_samples_per_second": 16.872,
"eval_steps_per_second": 4.218,
"step": 20600
},
{
"epoch": 4.79,
"learning_rate": 4.166666666666667e-06,
"loss": 0.0062,
"step": 20700
},
{
"epoch": 4.79,
"eval_accuracy": 1.0,
"eval_loss": 8.594151950092055e-06,
"eval_runtime": 2094.5382,
"eval_samples_per_second": 16.5,
"eval_steps_per_second": 4.125,
"step": 20700
},
{
"epoch": 4.81,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.0039,
"step": 20800
},
{
"epoch": 4.81,
"eval_accuracy": 1.0,
"eval_loss": 7.4294948717579246e-06,
"eval_runtime": 2104.354,
"eval_samples_per_second": 16.423,
"eval_steps_per_second": 4.106,
"step": 20800
},
{
"epoch": 4.84,
"learning_rate": 3.2407407407407406e-06,
"loss": 0.0144,
"step": 20900
},
{
"epoch": 4.84,
"eval_accuracy": 1.0,
"eval_loss": 6.862039299448952e-06,
"eval_runtime": 2101.0817,
"eval_samples_per_second": 16.449,
"eval_steps_per_second": 4.112,
"step": 20900
},
{
"epoch": 4.86,
"learning_rate": 2.777777777777778e-06,
"loss": 0.0109,
"step": 21000
},
{
"epoch": 4.86,
"eval_accuracy": 1.0,
"eval_loss": 6.136932825029362e-06,
"eval_runtime": 2119.5964,
"eval_samples_per_second": 16.305,
"eval_steps_per_second": 4.076,
"step": 21000
},
{
"epoch": 4.88,
"learning_rate": 2.3148148148148148e-06,
"loss": 0.0148,
"step": 21100
},
{
"epoch": 4.88,
"eval_accuracy": 1.0,
"eval_loss": 6.497817139461404e-06,
"eval_runtime": 2115.6009,
"eval_samples_per_second": 16.336,
"eval_steps_per_second": 4.084,
"step": 21100
},
{
"epoch": 4.91,
"learning_rate": 1.8518518518518519e-06,
"loss": 0.0308,
"step": 21200
},
{
"epoch": 4.91,
"eval_accuracy": 1.0,
"eval_loss": 7.753816134936642e-06,
"eval_runtime": 2118.9207,
"eval_samples_per_second": 16.31,
"eval_steps_per_second": 4.078,
"step": 21200
},
{
"epoch": 4.93,
"learning_rate": 1.388888888888889e-06,
"loss": 0.0023,
"step": 21300
},
{
"epoch": 4.93,
"eval_accuracy": 1.0,
"eval_loss": 7.5415960054669995e-06,
"eval_runtime": 2120.9953,
"eval_samples_per_second": 16.294,
"eval_steps_per_second": 4.074,
"step": 21300
},
{
"epoch": 4.95,
"learning_rate": 9.259259259259259e-07,
"loss": 0.0243,
"step": 21400
},
{
"epoch": 4.95,
"eval_accuracy": 1.0,
"eval_loss": 7.68591053201817e-06,
"eval_runtime": 2120.6941,
"eval_samples_per_second": 16.297,
"eval_steps_per_second": 4.074,
"step": 21400
},
{
"epoch": 4.98,
"learning_rate": 4.6296296296296297e-07,
"loss": 0.0031,
"step": 21500
},
{
"epoch": 4.98,
"eval_accuracy": 1.0,
"eval_loss": 7.5350230872572865e-06,
"eval_runtime": 2105.7948,
"eval_samples_per_second": 16.412,
"eval_steps_per_second": 4.103,
"step": 21500
},
{
"epoch": 5.0,
"learning_rate": 0.0,
"loss": 0.0272,
"step": 21600
},
{
"epoch": 5.0,
"eval_accuracy": 1.0,
"eval_loss": 7.493398243241245e-06,
"eval_runtime": 2100.1734,
"eval_samples_per_second": 16.456,
"eval_steps_per_second": 4.114,
"step": 21600
},
{
"epoch": 5.0,
"step": 21600,
"total_flos": 2.295560541703184e+19,
"train_loss": 0.003923701412147946,
"train_runtime": 97975.3145,
"train_samples_per_second": 1.764,
"train_steps_per_second": 0.22
}
],
"max_steps": 21600,
"num_train_epochs": 5,
"total_flos": 2.295560541703184e+19,
"trial_name": null,
"trial_params": null
}