bart-large-scientific-papers / trainer_state.json
parteeksj's picture
bart-large fine tuned on the scientific papers dataset
99c6c0f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"global_step": 32850,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.997199391171994e-05,
"loss": 2.9757,
"step": 25
},
{
"epoch": 0.02,
"learning_rate": 3.994155251141553e-05,
"loss": 2.7383,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 3.9911111111111114e-05,
"loss": 2.5865,
"step": 75
},
{
"epoch": 0.03,
"learning_rate": 3.98806697108067e-05,
"loss": 2.6042,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 3.9850228310502285e-05,
"loss": 2.7077,
"step": 125
},
{
"epoch": 0.05,
"learning_rate": 3.982100456621005e-05,
"loss": 2.8069,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 3.9790563165905637e-05,
"loss": 2.5831,
"step": 175
},
{
"epoch": 0.06,
"learning_rate": 3.976012176560122e-05,
"loss": 2.5526,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 3.972968036529681e-05,
"loss": 2.5865,
"step": 225
},
{
"epoch": 0.08,
"learning_rate": 3.9699238964992394e-05,
"loss": 2.4963,
"step": 250
},
{
"epoch": 0.08,
"learning_rate": 3.966879756468798e-05,
"loss": 2.5408,
"step": 275
},
{
"epoch": 0.09,
"learning_rate": 3.9638356164383565e-05,
"loss": 2.7108,
"step": 300
},
{
"epoch": 0.1,
"learning_rate": 3.960791476407915e-05,
"loss": 2.6353,
"step": 325
},
{
"epoch": 0.11,
"learning_rate": 3.9577473363774736e-05,
"loss": 2.538,
"step": 350
},
{
"epoch": 0.11,
"learning_rate": 3.954703196347032e-05,
"loss": 2.6099,
"step": 375
},
{
"epoch": 0.12,
"learning_rate": 3.951659056316591e-05,
"loss": 2.569,
"step": 400
},
{
"epoch": 0.13,
"learning_rate": 3.948614916286149e-05,
"loss": 2.4251,
"step": 425
},
{
"epoch": 0.14,
"learning_rate": 3.945570776255708e-05,
"loss": 2.4954,
"step": 450
},
{
"epoch": 0.14,
"learning_rate": 3.9425266362252665e-05,
"loss": 2.3763,
"step": 475
},
{
"epoch": 0.15,
"learning_rate": 3.939482496194826e-05,
"loss": 2.5156,
"step": 500
},
{
"epoch": 0.16,
"learning_rate": 3.936438356164384e-05,
"loss": 2.4705,
"step": 525
},
{
"epoch": 0.17,
"learning_rate": 3.933394216133942e-05,
"loss": 2.5698,
"step": 550
},
{
"epoch": 0.18,
"learning_rate": 3.930350076103501e-05,
"loss": 2.497,
"step": 575
},
{
"epoch": 0.18,
"learning_rate": 3.92730593607306e-05,
"loss": 2.3479,
"step": 600
},
{
"epoch": 0.19,
"learning_rate": 3.9242617960426186e-05,
"loss": 2.4028,
"step": 625
},
{
"epoch": 0.2,
"learning_rate": 3.921217656012177e-05,
"loss": 2.4164,
"step": 650
},
{
"epoch": 0.21,
"learning_rate": 3.918173515981735e-05,
"loss": 2.5796,
"step": 675
},
{
"epoch": 0.21,
"learning_rate": 3.915129375951294e-05,
"loss": 2.5134,
"step": 700
},
{
"epoch": 0.22,
"learning_rate": 3.912085235920853e-05,
"loss": 2.4413,
"step": 725
},
{
"epoch": 0.23,
"learning_rate": 3.9090410958904114e-05,
"loss": 2.4925,
"step": 750
},
{
"epoch": 0.24,
"learning_rate": 3.90599695585997e-05,
"loss": 2.4394,
"step": 775
},
{
"epoch": 0.24,
"learning_rate": 3.9029528158295285e-05,
"loss": 2.4334,
"step": 800
},
{
"epoch": 0.25,
"learning_rate": 3.899908675799087e-05,
"loss": 2.3809,
"step": 825
},
{
"epoch": 0.26,
"learning_rate": 3.896864535768646e-05,
"loss": 2.4713,
"step": 850
},
{
"epoch": 0.27,
"learning_rate": 3.893820395738204e-05,
"loss": 2.3322,
"step": 875
},
{
"epoch": 0.27,
"learning_rate": 3.890776255707763e-05,
"loss": 2.3121,
"step": 900
},
{
"epoch": 0.28,
"learning_rate": 3.8877321156773214e-05,
"loss": 2.3795,
"step": 925
},
{
"epoch": 0.29,
"learning_rate": 3.88468797564688e-05,
"loss": 2.377,
"step": 950
},
{
"epoch": 0.3,
"learning_rate": 3.8816438356164385e-05,
"loss": 2.4454,
"step": 975
},
{
"epoch": 0.3,
"learning_rate": 3.878599695585998e-05,
"loss": 2.4502,
"step": 1000
},
{
"epoch": 0.31,
"learning_rate": 3.8755555555555556e-05,
"loss": 2.423,
"step": 1025
},
{
"epoch": 0.32,
"learning_rate": 3.872511415525114e-05,
"loss": 2.3717,
"step": 1050
},
{
"epoch": 0.33,
"learning_rate": 3.869467275494673e-05,
"loss": 2.4881,
"step": 1075
},
{
"epoch": 0.33,
"learning_rate": 3.866423135464232e-05,
"loss": 2.3898,
"step": 1100
},
{
"epoch": 0.34,
"learning_rate": 3.8633789954337906e-05,
"loss": 2.4626,
"step": 1125
},
{
"epoch": 0.35,
"learning_rate": 3.860334855403349e-05,
"loss": 2.3674,
"step": 1150
},
{
"epoch": 0.36,
"learning_rate": 3.857290715372907e-05,
"loss": 2.3929,
"step": 1175
},
{
"epoch": 0.37,
"learning_rate": 3.854246575342466e-05,
"loss": 2.3222,
"step": 1200
},
{
"epoch": 0.37,
"learning_rate": 3.851202435312025e-05,
"loss": 2.453,
"step": 1225
},
{
"epoch": 0.38,
"learning_rate": 3.8481582952815834e-05,
"loss": 2.4782,
"step": 1250
},
{
"epoch": 0.39,
"learning_rate": 3.845114155251142e-05,
"loss": 2.5002,
"step": 1275
},
{
"epoch": 0.4,
"learning_rate": 3.8420700152207006e-05,
"loss": 2.3431,
"step": 1300
},
{
"epoch": 0.4,
"learning_rate": 3.839025875190259e-05,
"loss": 2.3825,
"step": 1325
},
{
"epoch": 0.41,
"learning_rate": 3.835981735159818e-05,
"loss": 2.4681,
"step": 1350
},
{
"epoch": 0.42,
"learning_rate": 3.832937595129376e-05,
"loss": 2.4481,
"step": 1375
},
{
"epoch": 0.43,
"learning_rate": 3.829893455098935e-05,
"loss": 2.3781,
"step": 1400
},
{
"epoch": 0.43,
"learning_rate": 3.8268493150684934e-05,
"loss": 2.309,
"step": 1425
},
{
"epoch": 0.44,
"learning_rate": 3.823805175038052e-05,
"loss": 2.4879,
"step": 1450
},
{
"epoch": 0.45,
"learning_rate": 3.8207610350076105e-05,
"loss": 2.3736,
"step": 1475
},
{
"epoch": 0.46,
"learning_rate": 3.81771689497717e-05,
"loss": 2.5061,
"step": 1500
},
{
"epoch": 0.46,
"learning_rate": 3.814672754946728e-05,
"loss": 2.3825,
"step": 1525
},
{
"epoch": 0.47,
"learning_rate": 3.811628614916286e-05,
"loss": 2.3851,
"step": 1550
},
{
"epoch": 0.48,
"learning_rate": 3.808584474885845e-05,
"loss": 2.3361,
"step": 1575
},
{
"epoch": 0.49,
"learning_rate": 3.805540334855404e-05,
"loss": 2.4237,
"step": 1600
},
{
"epoch": 0.49,
"learning_rate": 3.8024961948249626e-05,
"loss": 2.3025,
"step": 1625
},
{
"epoch": 0.5,
"learning_rate": 3.7994520547945205e-05,
"loss": 2.3862,
"step": 1650
},
{
"epoch": 0.51,
"learning_rate": 3.796407914764079e-05,
"loss": 2.3268,
"step": 1675
},
{
"epoch": 0.52,
"learning_rate": 3.793363774733638e-05,
"loss": 2.3134,
"step": 1700
},
{
"epoch": 0.53,
"learning_rate": 3.790319634703197e-05,
"loss": 2.3043,
"step": 1725
},
{
"epoch": 0.53,
"learning_rate": 3.7872754946727555e-05,
"loss": 2.2921,
"step": 1750
},
{
"epoch": 0.54,
"learning_rate": 3.784231354642314e-05,
"loss": 2.3658,
"step": 1775
},
{
"epoch": 0.55,
"learning_rate": 3.7811872146118726e-05,
"loss": 2.3773,
"step": 1800
},
{
"epoch": 0.56,
"learning_rate": 3.778143074581431e-05,
"loss": 2.3391,
"step": 1825
},
{
"epoch": 0.56,
"learning_rate": 3.77509893455099e-05,
"loss": 2.3664,
"step": 1850
},
{
"epoch": 0.57,
"learning_rate": 3.772054794520548e-05,
"loss": 2.3339,
"step": 1875
},
{
"epoch": 0.58,
"learning_rate": 3.769010654490107e-05,
"loss": 2.2858,
"step": 1900
},
{
"epoch": 0.59,
"learning_rate": 3.7659665144596654e-05,
"loss": 2.344,
"step": 1925
},
{
"epoch": 0.59,
"learning_rate": 3.762922374429224e-05,
"loss": 2.2802,
"step": 1950
},
{
"epoch": 0.6,
"learning_rate": 3.7598782343987826e-05,
"loss": 2.2779,
"step": 1975
},
{
"epoch": 0.61,
"learning_rate": 3.756834094368341e-05,
"loss": 2.3558,
"step": 2000
},
{
"epoch": 0.62,
"learning_rate": 3.7537899543379e-05,
"loss": 2.3082,
"step": 2025
},
{
"epoch": 0.62,
"learning_rate": 3.750745814307458e-05,
"loss": 2.3199,
"step": 2050
},
{
"epoch": 0.63,
"learning_rate": 3.747701674277017e-05,
"loss": 2.4683,
"step": 2075
},
{
"epoch": 0.64,
"learning_rate": 3.744657534246576e-05,
"loss": 2.4058,
"step": 2100
},
{
"epoch": 0.65,
"learning_rate": 3.741613394216135e-05,
"loss": 2.3419,
"step": 2125
},
{
"epoch": 0.65,
"learning_rate": 3.7385692541856926e-05,
"loss": 2.2594,
"step": 2150
},
{
"epoch": 0.66,
"learning_rate": 3.735525114155251e-05,
"loss": 2.224,
"step": 2175
},
{
"epoch": 0.67,
"learning_rate": 3.7324809741248104e-05,
"loss": 2.4244,
"step": 2200
},
{
"epoch": 0.68,
"learning_rate": 3.729436834094369e-05,
"loss": 2.2928,
"step": 2225
},
{
"epoch": 0.68,
"learning_rate": 3.7263926940639275e-05,
"loss": 2.2732,
"step": 2250
},
{
"epoch": 0.69,
"learning_rate": 3.7233485540334854e-05,
"loss": 2.4067,
"step": 2275
},
{
"epoch": 0.7,
"learning_rate": 3.7203044140030446e-05,
"loss": 2.3029,
"step": 2300
},
{
"epoch": 0.71,
"learning_rate": 3.717260273972603e-05,
"loss": 2.3482,
"step": 2325
},
{
"epoch": 0.72,
"learning_rate": 3.714216133942162e-05,
"loss": 2.2777,
"step": 2350
},
{
"epoch": 0.72,
"learning_rate": 3.7111719939117203e-05,
"loss": 2.2948,
"step": 2375
},
{
"epoch": 0.73,
"learning_rate": 3.708127853881279e-05,
"loss": 2.3709,
"step": 2400
},
{
"epoch": 0.74,
"learning_rate": 3.7050837138508375e-05,
"loss": 2.3924,
"step": 2425
},
{
"epoch": 0.75,
"learning_rate": 3.702039573820396e-05,
"loss": 2.3185,
"step": 2450
},
{
"epoch": 0.75,
"learning_rate": 3.6989954337899546e-05,
"loss": 2.3042,
"step": 2475
},
{
"epoch": 0.76,
"learning_rate": 3.695951293759513e-05,
"loss": 2.3062,
"step": 2500
},
{
"epoch": 0.77,
"learning_rate": 3.692907153729072e-05,
"loss": 2.2792,
"step": 2525
},
{
"epoch": 0.78,
"learning_rate": 3.68986301369863e-05,
"loss": 2.2511,
"step": 2550
},
{
"epoch": 0.78,
"learning_rate": 3.686818873668189e-05,
"loss": 2.2938,
"step": 2575
},
{
"epoch": 0.79,
"learning_rate": 3.683774733637748e-05,
"loss": 2.3167,
"step": 2600
},
{
"epoch": 0.8,
"learning_rate": 3.680730593607306e-05,
"loss": 2.3586,
"step": 2625
},
{
"epoch": 0.81,
"learning_rate": 3.6776864535768646e-05,
"loss": 2.2705,
"step": 2650
},
{
"epoch": 0.81,
"learning_rate": 3.674642313546423e-05,
"loss": 2.1603,
"step": 2675
},
{
"epoch": 0.82,
"learning_rate": 3.6715981735159824e-05,
"loss": 2.3594,
"step": 2700
},
{
"epoch": 0.83,
"learning_rate": 3.668554033485541e-05,
"loss": 2.308,
"step": 2725
},
{
"epoch": 0.84,
"learning_rate": 3.6655098934550995e-05,
"loss": 2.3469,
"step": 2750
},
{
"epoch": 0.84,
"learning_rate": 3.6624657534246574e-05,
"loss": 2.3153,
"step": 2775
},
{
"epoch": 0.85,
"learning_rate": 3.659421613394217e-05,
"loss": 2.3836,
"step": 2800
},
{
"epoch": 0.86,
"learning_rate": 3.656377473363775e-05,
"loss": 2.2617,
"step": 2825
},
{
"epoch": 0.87,
"learning_rate": 3.653333333333334e-05,
"loss": 2.3834,
"step": 2850
},
{
"epoch": 0.88,
"learning_rate": 3.6502891933028924e-05,
"loss": 2.2582,
"step": 2875
},
{
"epoch": 0.88,
"learning_rate": 3.647245053272451e-05,
"loss": 2.2326,
"step": 2900
},
{
"epoch": 0.89,
"learning_rate": 3.6442009132420095e-05,
"loss": 2.2157,
"step": 2925
},
{
"epoch": 0.9,
"learning_rate": 3.641156773211568e-05,
"loss": 2.2988,
"step": 2950
},
{
"epoch": 0.91,
"learning_rate": 3.6381126331811267e-05,
"loss": 2.2304,
"step": 2975
},
{
"epoch": 0.91,
"learning_rate": 3.635068493150685e-05,
"loss": 2.1554,
"step": 3000
},
{
"epoch": 0.92,
"learning_rate": 3.632024353120244e-05,
"loss": 2.2901,
"step": 3025
},
{
"epoch": 0.93,
"learning_rate": 3.6289802130898024e-05,
"loss": 2.1346,
"step": 3050
},
{
"epoch": 0.94,
"learning_rate": 3.625936073059361e-05,
"loss": 2.2658,
"step": 3075
},
{
"epoch": 0.94,
"learning_rate": 3.62289193302892e-05,
"loss": 2.2892,
"step": 3100
},
{
"epoch": 0.95,
"learning_rate": 3.619847792998478e-05,
"loss": 2.4276,
"step": 3125
},
{
"epoch": 0.96,
"learning_rate": 3.6168036529680366e-05,
"loss": 2.3805,
"step": 3150
},
{
"epoch": 0.97,
"learning_rate": 3.613759512937595e-05,
"loss": 2.3277,
"step": 3175
},
{
"epoch": 0.97,
"learning_rate": 3.6107153729071544e-05,
"loss": 2.253,
"step": 3200
},
{
"epoch": 0.98,
"learning_rate": 3.607671232876713e-05,
"loss": 2.2018,
"step": 3225
},
{
"epoch": 0.99,
"learning_rate": 3.604627092846271e-05,
"loss": 2.3574,
"step": 3250
},
{
"epoch": 1.0,
"learning_rate": 3.6015829528158295e-05,
"loss": 2.3147,
"step": 3275
},
{
"epoch": 1.0,
"learning_rate": 3.598538812785389e-05,
"loss": 2.247,
"step": 3300
},
{
"epoch": 1.01,
"learning_rate": 3.595494672754947e-05,
"loss": 1.9512,
"step": 3325
},
{
"epoch": 1.02,
"learning_rate": 3.592450532724506e-05,
"loss": 2.0597,
"step": 3350
},
{
"epoch": 1.03,
"learning_rate": 3.5894063926940644e-05,
"loss": 2.0195,
"step": 3375
},
{
"epoch": 1.04,
"learning_rate": 3.586362252663623e-05,
"loss": 1.9563,
"step": 3400
},
{
"epoch": 1.04,
"learning_rate": 3.5833181126331816e-05,
"loss": 1.9845,
"step": 3425
},
{
"epoch": 1.05,
"learning_rate": 3.58027397260274e-05,
"loss": 2.0274,
"step": 3450
},
{
"epoch": 1.06,
"learning_rate": 3.577229832572299e-05,
"loss": 2.051,
"step": 3475
},
{
"epoch": 1.07,
"learning_rate": 3.574185692541857e-05,
"loss": 1.9961,
"step": 3500
},
{
"epoch": 1.07,
"learning_rate": 3.571141552511416e-05,
"loss": 1.9761,
"step": 3525
},
{
"epoch": 1.08,
"learning_rate": 3.5680974124809744e-05,
"loss": 2.0424,
"step": 3550
},
{
"epoch": 1.09,
"learning_rate": 3.565053272450533e-05,
"loss": 1.9622,
"step": 3575
},
{
"epoch": 1.1,
"learning_rate": 3.5620091324200915e-05,
"loss": 1.992,
"step": 3600
},
{
"epoch": 1.1,
"learning_rate": 3.55896499238965e-05,
"loss": 2.0852,
"step": 3625
},
{
"epoch": 1.11,
"learning_rate": 3.555920852359209e-05,
"loss": 2.1086,
"step": 3650
},
{
"epoch": 1.12,
"learning_rate": 3.552876712328767e-05,
"loss": 2.0231,
"step": 3675
},
{
"epoch": 1.13,
"learning_rate": 3.5498325722983265e-05,
"loss": 1.9472,
"step": 3700
},
{
"epoch": 1.13,
"learning_rate": 3.546788432267885e-05,
"loss": 1.9655,
"step": 3725
},
{
"epoch": 1.14,
"learning_rate": 3.543744292237443e-05,
"loss": 2.04,
"step": 3750
},
{
"epoch": 1.15,
"learning_rate": 3.5407001522070015e-05,
"loss": 2.1005,
"step": 3775
},
{
"epoch": 1.16,
"learning_rate": 3.537656012176561e-05,
"loss": 2.1219,
"step": 3800
},
{
"epoch": 1.16,
"learning_rate": 3.534611872146119e-05,
"loss": 2.046,
"step": 3825
},
{
"epoch": 1.17,
"learning_rate": 3.531567732115678e-05,
"loss": 2.0985,
"step": 3850
},
{
"epoch": 1.18,
"learning_rate": 3.528523592085236e-05,
"loss": 2.0795,
"step": 3875
},
{
"epoch": 1.19,
"learning_rate": 3.525479452054795e-05,
"loss": 2.0962,
"step": 3900
},
{
"epoch": 1.19,
"learning_rate": 3.5224353120243536e-05,
"loss": 2.052,
"step": 3925
},
{
"epoch": 1.2,
"learning_rate": 3.519391171993912e-05,
"loss": 2.1188,
"step": 3950
},
{
"epoch": 1.21,
"learning_rate": 3.516347031963471e-05,
"loss": 2.0075,
"step": 3975
},
{
"epoch": 1.22,
"learning_rate": 3.513302891933029e-05,
"loss": 2.0194,
"step": 4000
},
{
"epoch": 1.23,
"learning_rate": 3.510258751902588e-05,
"loss": 2.0319,
"step": 4025
},
{
"epoch": 1.23,
"learning_rate": 3.5072146118721464e-05,
"loss": 1.9817,
"step": 4050
},
{
"epoch": 1.24,
"learning_rate": 3.504170471841705e-05,
"loss": 2.0719,
"step": 4075
},
{
"epoch": 1.25,
"learning_rate": 3.5011263318112636e-05,
"loss": 1.9931,
"step": 4100
},
{
"epoch": 1.26,
"learning_rate": 3.498082191780822e-05,
"loss": 1.9983,
"step": 4125
},
{
"epoch": 1.26,
"learning_rate": 3.495038051750381e-05,
"loss": 2.1237,
"step": 4150
},
{
"epoch": 1.27,
"learning_rate": 3.491993911719939e-05,
"loss": 2.011,
"step": 4175
},
{
"epoch": 1.28,
"learning_rate": 3.4889497716894985e-05,
"loss": 2.0294,
"step": 4200
},
{
"epoch": 1.29,
"learning_rate": 3.4859056316590564e-05,
"loss": 2.063,
"step": 4225
},
{
"epoch": 1.29,
"learning_rate": 3.482861491628615e-05,
"loss": 2.0753,
"step": 4250
},
{
"epoch": 1.3,
"learning_rate": 3.4798173515981735e-05,
"loss": 2.0859,
"step": 4275
},
{
"epoch": 1.31,
"learning_rate": 3.476773211567733e-05,
"loss": 1.9657,
"step": 4300
},
{
"epoch": 1.32,
"learning_rate": 3.4737290715372914e-05,
"loss": 2.0901,
"step": 4325
},
{
"epoch": 1.32,
"learning_rate": 3.47068493150685e-05,
"loss": 2.0066,
"step": 4350
},
{
"epoch": 1.33,
"learning_rate": 3.467640791476408e-05,
"loss": 2.0134,
"step": 4375
},
{
"epoch": 1.34,
"learning_rate": 3.464596651445967e-05,
"loss": 2.0645,
"step": 4400
},
{
"epoch": 1.35,
"learning_rate": 3.4615525114155256e-05,
"loss": 2.0559,
"step": 4425
},
{
"epoch": 1.35,
"learning_rate": 3.458508371385084e-05,
"loss": 2.1435,
"step": 4450
},
{
"epoch": 1.36,
"learning_rate": 3.455464231354643e-05,
"loss": 1.9392,
"step": 4475
},
{
"epoch": 1.37,
"learning_rate": 3.452420091324201e-05,
"loss": 2.0839,
"step": 4500
},
{
"epoch": 1.38,
"learning_rate": 3.44937595129376e-05,
"loss": 2.1076,
"step": 4525
},
{
"epoch": 1.39,
"learning_rate": 3.4463318112633185e-05,
"loss": 2.0119,
"step": 4550
},
{
"epoch": 1.39,
"learning_rate": 3.443287671232877e-05,
"loss": 2.1092,
"step": 4575
},
{
"epoch": 1.4,
"learning_rate": 3.4402435312024356e-05,
"loss": 2.0383,
"step": 4600
},
{
"epoch": 1.41,
"learning_rate": 3.437199391171994e-05,
"loss": 2.1265,
"step": 4625
},
{
"epoch": 1.42,
"learning_rate": 3.434155251141553e-05,
"loss": 2.07,
"step": 4650
},
{
"epoch": 1.42,
"learning_rate": 3.431111111111111e-05,
"loss": 2.0152,
"step": 4675
},
{
"epoch": 1.43,
"learning_rate": 3.42806697108067e-05,
"loss": 2.008,
"step": 4700
},
{
"epoch": 1.44,
"learning_rate": 3.4250228310502284e-05,
"loss": 1.9992,
"step": 4725
},
{
"epoch": 1.45,
"learning_rate": 3.421978691019787e-05,
"loss": 2.0191,
"step": 4750
},
{
"epoch": 1.45,
"learning_rate": 3.4189345509893456e-05,
"loss": 2.0505,
"step": 4775
},
{
"epoch": 1.46,
"learning_rate": 3.415890410958904e-05,
"loss": 2.0542,
"step": 4800
},
{
"epoch": 1.47,
"learning_rate": 3.4128462709284634e-05,
"loss": 2.1455,
"step": 4825
},
{
"epoch": 1.48,
"learning_rate": 3.409802130898021e-05,
"loss": 1.9989,
"step": 4850
},
{
"epoch": 1.48,
"learning_rate": 3.40675799086758e-05,
"loss": 2.0734,
"step": 4875
},
{
"epoch": 1.49,
"learning_rate": 3.4037138508371384e-05,
"loss": 1.9921,
"step": 4900
},
{
"epoch": 1.5,
"learning_rate": 3.400669710806698e-05,
"loss": 2.1307,
"step": 4925
},
{
"epoch": 1.51,
"learning_rate": 3.397625570776256e-05,
"loss": 2.0824,
"step": 4950
},
{
"epoch": 1.51,
"learning_rate": 3.394581430745815e-05,
"loss": 1.9928,
"step": 4975
},
{
"epoch": 1.52,
"learning_rate": 3.391537290715373e-05,
"loss": 2.0448,
"step": 5000
},
{
"epoch": 1.53,
"learning_rate": 3.388493150684932e-05,
"loss": 2.0779,
"step": 5025
},
{
"epoch": 1.54,
"learning_rate": 3.3854490106544905e-05,
"loss": 2.0343,
"step": 5050
},
{
"epoch": 1.54,
"learning_rate": 3.382404870624049e-05,
"loss": 1.9332,
"step": 5075
},
{
"epoch": 1.55,
"learning_rate": 3.3793607305936076e-05,
"loss": 2.1341,
"step": 5100
},
{
"epoch": 1.56,
"learning_rate": 3.376316590563166e-05,
"loss": 1.978,
"step": 5125
},
{
"epoch": 1.57,
"learning_rate": 3.373272450532725e-05,
"loss": 1.9498,
"step": 5150
},
{
"epoch": 1.58,
"learning_rate": 3.3702283105022834e-05,
"loss": 1.9063,
"step": 5175
},
{
"epoch": 1.58,
"learning_rate": 3.367184170471842e-05,
"loss": 2.0495,
"step": 5200
},
{
"epoch": 1.59,
"learning_rate": 3.3641400304414005e-05,
"loss": 2.02,
"step": 5225
},
{
"epoch": 1.6,
"learning_rate": 3.361095890410959e-05,
"loss": 1.973,
"step": 5250
},
{
"epoch": 1.61,
"learning_rate": 3.3580517503805176e-05,
"loss": 1.96,
"step": 5275
},
{
"epoch": 1.61,
"learning_rate": 3.355007610350076e-05,
"loss": 2.0725,
"step": 5300
},
{
"epoch": 1.62,
"learning_rate": 3.351963470319635e-05,
"loss": 2.1469,
"step": 5325
},
{
"epoch": 1.63,
"learning_rate": 3.348919330289193e-05,
"loss": 2.0721,
"step": 5350
},
{
"epoch": 1.64,
"learning_rate": 3.345875190258752e-05,
"loss": 2.1235,
"step": 5375
},
{
"epoch": 1.64,
"learning_rate": 3.3428310502283105e-05,
"loss": 1.9994,
"step": 5400
},
{
"epoch": 1.65,
"learning_rate": 3.33978691019787e-05,
"loss": 2.0654,
"step": 5425
},
{
"epoch": 1.66,
"learning_rate": 3.336742770167428e-05,
"loss": 2.0628,
"step": 5450
},
{
"epoch": 1.67,
"learning_rate": 3.333698630136986e-05,
"loss": 2.0541,
"step": 5475
},
{
"epoch": 1.67,
"learning_rate": 3.330654490106545e-05,
"loss": 2.0388,
"step": 5500
},
{
"epoch": 1.68,
"learning_rate": 3.327610350076104e-05,
"loss": 1.9874,
"step": 5525
},
{
"epoch": 1.69,
"learning_rate": 3.3245662100456625e-05,
"loss": 2.0574,
"step": 5550
},
{
"epoch": 1.7,
"learning_rate": 3.321522070015221e-05,
"loss": 1.9692,
"step": 5575
},
{
"epoch": 1.7,
"learning_rate": 3.31847792998478e-05,
"loss": 1.932,
"step": 5600
},
{
"epoch": 1.71,
"learning_rate": 3.315433789954338e-05,
"loss": 1.8747,
"step": 5625
},
{
"epoch": 1.72,
"learning_rate": 3.312389649923897e-05,
"loss": 2.0928,
"step": 5650
},
{
"epoch": 1.73,
"learning_rate": 3.3093455098934554e-05,
"loss": 2.1114,
"step": 5675
},
{
"epoch": 1.74,
"learning_rate": 3.306301369863014e-05,
"loss": 2.0353,
"step": 5700
},
{
"epoch": 1.74,
"learning_rate": 3.3032572298325725e-05,
"loss": 1.9485,
"step": 5725
},
{
"epoch": 1.75,
"learning_rate": 3.300213089802131e-05,
"loss": 2.0028,
"step": 5750
},
{
"epoch": 1.76,
"learning_rate": 3.2971689497716897e-05,
"loss": 2.026,
"step": 5775
},
{
"epoch": 1.77,
"learning_rate": 3.294124809741248e-05,
"loss": 2.1633,
"step": 5800
},
{
"epoch": 1.77,
"learning_rate": 3.291080669710807e-05,
"loss": 2.0141,
"step": 5825
},
{
"epoch": 1.78,
"learning_rate": 3.2880365296803654e-05,
"loss": 1.9247,
"step": 5850
},
{
"epoch": 1.79,
"learning_rate": 3.284992389649924e-05,
"loss": 2.005,
"step": 5875
},
{
"epoch": 1.8,
"learning_rate": 3.2819482496194825e-05,
"loss": 2.0607,
"step": 5900
},
{
"epoch": 1.8,
"learning_rate": 3.278904109589042e-05,
"loss": 2.0251,
"step": 5925
},
{
"epoch": 1.81,
"learning_rate": 3.2758599695585996e-05,
"loss": 2.1067,
"step": 5950
},
{
"epoch": 1.82,
"learning_rate": 3.272815829528158e-05,
"loss": 1.97,
"step": 5975
},
{
"epoch": 1.83,
"learning_rate": 3.269771689497717e-05,
"loss": 2.0772,
"step": 6000
},
{
"epoch": 1.83,
"learning_rate": 3.266727549467276e-05,
"loss": 2.0554,
"step": 6025
},
{
"epoch": 1.84,
"learning_rate": 3.2636834094368346e-05,
"loss": 1.9819,
"step": 6050
},
{
"epoch": 1.85,
"learning_rate": 3.260639269406393e-05,
"loss": 2.0674,
"step": 6075
},
{
"epoch": 1.86,
"learning_rate": 3.257595129375951e-05,
"loss": 2.0516,
"step": 6100
},
{
"epoch": 1.86,
"learning_rate": 3.25455098934551e-05,
"loss": 2.0866,
"step": 6125
},
{
"epoch": 1.87,
"learning_rate": 3.251506849315069e-05,
"loss": 1.9859,
"step": 6150
},
{
"epoch": 1.88,
"learning_rate": 3.2484627092846274e-05,
"loss": 1.9645,
"step": 6175
},
{
"epoch": 1.89,
"learning_rate": 3.245418569254186e-05,
"loss": 2.0103,
"step": 6200
},
{
"epoch": 1.89,
"learning_rate": 3.2423744292237446e-05,
"loss": 2.052,
"step": 6225
},
{
"epoch": 1.9,
"learning_rate": 3.239330289193303e-05,
"loss": 2.0134,
"step": 6250
},
{
"epoch": 1.91,
"learning_rate": 3.236286149162862e-05,
"loss": 2.025,
"step": 6275
},
{
"epoch": 1.92,
"learning_rate": 3.23324200913242e-05,
"loss": 2.0338,
"step": 6300
},
{
"epoch": 1.93,
"learning_rate": 3.230197869101979e-05,
"loss": 2.0614,
"step": 6325
},
{
"epoch": 1.93,
"learning_rate": 3.2271537290715374e-05,
"loss": 1.9784,
"step": 6350
},
{
"epoch": 1.94,
"learning_rate": 3.224109589041096e-05,
"loss": 1.9976,
"step": 6375
},
{
"epoch": 1.95,
"learning_rate": 3.2210654490106545e-05,
"loss": 2.0695,
"step": 6400
},
{
"epoch": 1.96,
"learning_rate": 3.218021308980214e-05,
"loss": 1.8607,
"step": 6425
},
{
"epoch": 1.96,
"learning_rate": 3.214977168949772e-05,
"loss": 2.0003,
"step": 6450
},
{
"epoch": 1.97,
"learning_rate": 3.21193302891933e-05,
"loss": 1.9529,
"step": 6475
},
{
"epoch": 1.98,
"learning_rate": 3.208888888888889e-05,
"loss": 1.9775,
"step": 6500
},
{
"epoch": 1.99,
"learning_rate": 3.205844748858448e-05,
"loss": 2.1202,
"step": 6525
},
{
"epoch": 1.99,
"learning_rate": 3.2028006088280066e-05,
"loss": 2.0416,
"step": 6550
},
{
"epoch": 2.0,
"learning_rate": 3.1997564687975645e-05,
"loss": 1.939,
"step": 6575
},
{
"epoch": 2.01,
"learning_rate": 3.196712328767123e-05,
"loss": 1.8248,
"step": 6600
},
{
"epoch": 2.02,
"learning_rate": 3.193668188736682e-05,
"loss": 1.768,
"step": 6625
},
{
"epoch": 2.02,
"learning_rate": 3.190624048706241e-05,
"loss": 1.7417,
"step": 6650
},
{
"epoch": 2.03,
"learning_rate": 3.1875799086757995e-05,
"loss": 1.787,
"step": 6675
},
{
"epoch": 2.04,
"learning_rate": 3.184657534246576e-05,
"loss": 1.8256,
"step": 6700
},
{
"epoch": 2.05,
"learning_rate": 3.181613394216134e-05,
"loss": 1.7764,
"step": 6725
},
{
"epoch": 2.05,
"learning_rate": 3.1785692541856925e-05,
"loss": 1.7467,
"step": 6750
},
{
"epoch": 2.06,
"learning_rate": 3.175525114155251e-05,
"loss": 1.7261,
"step": 6775
},
{
"epoch": 2.07,
"learning_rate": 3.17248097412481e-05,
"loss": 1.7825,
"step": 6800
},
{
"epoch": 2.08,
"learning_rate": 3.169436834094369e-05,
"loss": 1.7536,
"step": 6825
},
{
"epoch": 2.09,
"learning_rate": 3.1663926940639274e-05,
"loss": 1.7507,
"step": 6850
},
{
"epoch": 2.09,
"learning_rate": 3.1633485540334853e-05,
"loss": 1.7534,
"step": 6875
},
{
"epoch": 2.1,
"learning_rate": 3.1603044140030446e-05,
"loss": 1.6879,
"step": 6900
},
{
"epoch": 2.11,
"learning_rate": 3.157260273972603e-05,
"loss": 1.6767,
"step": 6925
},
{
"epoch": 2.12,
"learning_rate": 3.154216133942162e-05,
"loss": 1.7718,
"step": 6950
},
{
"epoch": 2.12,
"learning_rate": 3.15117199391172e-05,
"loss": 1.7115,
"step": 6975
},
{
"epoch": 2.13,
"learning_rate": 3.148127853881279e-05,
"loss": 1.7024,
"step": 7000
},
{
"epoch": 2.14,
"learning_rate": 3.1450837138508374e-05,
"loss": 1.7389,
"step": 7025
},
{
"epoch": 2.15,
"learning_rate": 3.142039573820396e-05,
"loss": 1.7123,
"step": 7050
},
{
"epoch": 2.15,
"learning_rate": 3.1389954337899546e-05,
"loss": 1.835,
"step": 7075
},
{
"epoch": 2.16,
"learning_rate": 3.135951293759513e-05,
"loss": 1.7925,
"step": 7100
},
{
"epoch": 2.17,
"learning_rate": 3.132907153729072e-05,
"loss": 1.7361,
"step": 7125
},
{
"epoch": 2.18,
"learning_rate": 3.12986301369863e-05,
"loss": 1.775,
"step": 7150
},
{
"epoch": 2.18,
"learning_rate": 3.126818873668189e-05,
"loss": 1.699,
"step": 7175
},
{
"epoch": 2.19,
"learning_rate": 3.123774733637748e-05,
"loss": 1.8377,
"step": 7200
},
{
"epoch": 2.2,
"learning_rate": 3.120730593607306e-05,
"loss": 1.6677,
"step": 7225
},
{
"epoch": 2.21,
"learning_rate": 3.1176864535768645e-05,
"loss": 1.7617,
"step": 7250
},
{
"epoch": 2.21,
"learning_rate": 3.114642313546423e-05,
"loss": 1.7213,
"step": 7275
},
{
"epoch": 2.22,
"learning_rate": 3.1115981735159824e-05,
"loss": 1.8376,
"step": 7300
},
{
"epoch": 2.23,
"learning_rate": 3.108554033485541e-05,
"loss": 1.792,
"step": 7325
},
{
"epoch": 2.24,
"learning_rate": 3.105509893455099e-05,
"loss": 1.8017,
"step": 7350
},
{
"epoch": 2.25,
"learning_rate": 3.1024657534246574e-05,
"loss": 1.7817,
"step": 7375
},
{
"epoch": 2.25,
"learning_rate": 3.0994216133942166e-05,
"loss": 1.8186,
"step": 7400
},
{
"epoch": 2.26,
"learning_rate": 3.096377473363775e-05,
"loss": 1.7081,
"step": 7425
},
{
"epoch": 2.27,
"learning_rate": 3.093333333333334e-05,
"loss": 1.7725,
"step": 7450
},
{
"epoch": 2.28,
"learning_rate": 3.090289193302892e-05,
"loss": 1.8943,
"step": 7475
},
{
"epoch": 2.28,
"learning_rate": 3.087245053272451e-05,
"loss": 1.8723,
"step": 7500
},
{
"epoch": 2.29,
"learning_rate": 3.0842009132420095e-05,
"loss": 1.8089,
"step": 7525
},
{
"epoch": 2.3,
"learning_rate": 3.081156773211568e-05,
"loss": 1.7635,
"step": 7550
},
{
"epoch": 2.31,
"learning_rate": 3.0781126331811266e-05,
"loss": 1.7497,
"step": 7575
},
{
"epoch": 2.31,
"learning_rate": 3.075068493150685e-05,
"loss": 1.738,
"step": 7600
},
{
"epoch": 2.32,
"learning_rate": 3.072024353120244e-05,
"loss": 1.7198,
"step": 7625
},
{
"epoch": 2.33,
"learning_rate": 3.068980213089802e-05,
"loss": 1.7462,
"step": 7650
},
{
"epoch": 2.34,
"learning_rate": 3.065936073059361e-05,
"loss": 1.8347,
"step": 7675
},
{
"epoch": 2.34,
"learning_rate": 3.0628919330289194e-05,
"loss": 1.8402,
"step": 7700
},
{
"epoch": 2.35,
"learning_rate": 3.059847792998478e-05,
"loss": 1.8079,
"step": 7725
},
{
"epoch": 2.36,
"learning_rate": 3.0568036529680366e-05,
"loss": 1.7424,
"step": 7750
},
{
"epoch": 2.37,
"learning_rate": 3.053759512937595e-05,
"loss": 1.7832,
"step": 7775
},
{
"epoch": 2.37,
"learning_rate": 3.050715372907154e-05,
"loss": 1.7963,
"step": 7800
},
{
"epoch": 2.38,
"learning_rate": 3.0477929984779303e-05,
"loss": 1.8091,
"step": 7825
},
{
"epoch": 2.39,
"learning_rate": 3.044748858447489e-05,
"loss": 1.6953,
"step": 7850
},
{
"epoch": 2.4,
"learning_rate": 3.0417047184170478e-05,
"loss": 1.8256,
"step": 7875
},
{
"epoch": 2.4,
"learning_rate": 3.038660578386606e-05,
"loss": 1.8461,
"step": 7900
},
{
"epoch": 2.41,
"learning_rate": 3.0356164383561646e-05,
"loss": 1.6912,
"step": 7925
},
{
"epoch": 2.42,
"learning_rate": 3.032572298325723e-05,
"loss": 1.7326,
"step": 7950
},
{
"epoch": 2.43,
"learning_rate": 3.029528158295282e-05,
"loss": 1.7778,
"step": 7975
},
{
"epoch": 2.44,
"learning_rate": 3.0264840182648406e-05,
"loss": 1.7841,
"step": 8000
},
{
"epoch": 2.44,
"learning_rate": 3.023439878234399e-05,
"loss": 1.8748,
"step": 8025
},
{
"epoch": 2.45,
"learning_rate": 3.0203957382039574e-05,
"loss": 1.7683,
"step": 8050
},
{
"epoch": 2.46,
"learning_rate": 3.0173515981735163e-05,
"loss": 1.8536,
"step": 8075
},
{
"epoch": 2.47,
"learning_rate": 3.014307458143075e-05,
"loss": 1.865,
"step": 8100
},
{
"epoch": 2.47,
"learning_rate": 3.0112633181126334e-05,
"loss": 1.7282,
"step": 8125
},
{
"epoch": 2.48,
"learning_rate": 3.008219178082192e-05,
"loss": 1.8071,
"step": 8150
},
{
"epoch": 2.49,
"learning_rate": 3.005175038051751e-05,
"loss": 1.7984,
"step": 8175
},
{
"epoch": 2.5,
"learning_rate": 3.002130898021309e-05,
"loss": 1.8904,
"step": 8200
},
{
"epoch": 2.5,
"learning_rate": 2.9990867579908677e-05,
"loss": 1.8645,
"step": 8225
},
{
"epoch": 2.51,
"learning_rate": 2.9960426179604263e-05,
"loss": 1.8624,
"step": 8250
},
{
"epoch": 2.52,
"learning_rate": 2.9929984779299852e-05,
"loss": 1.7069,
"step": 8275
},
{
"epoch": 2.53,
"learning_rate": 2.9899543378995438e-05,
"loss": 1.8513,
"step": 8300
},
{
"epoch": 2.53,
"learning_rate": 2.9869101978691023e-05,
"loss": 1.695,
"step": 8325
},
{
"epoch": 2.54,
"learning_rate": 2.9838660578386606e-05,
"loss": 1.789,
"step": 8350
},
{
"epoch": 2.55,
"learning_rate": 2.9808219178082195e-05,
"loss": 1.8332,
"step": 8375
},
{
"epoch": 2.56,
"learning_rate": 2.977777777777778e-05,
"loss": 1.781,
"step": 8400
},
{
"epoch": 2.56,
"learning_rate": 2.9747336377473366e-05,
"loss": 1.7331,
"step": 8425
},
{
"epoch": 2.57,
"learning_rate": 2.971689497716895e-05,
"loss": 1.7355,
"step": 8450
},
{
"epoch": 2.58,
"learning_rate": 2.968645357686454e-05,
"loss": 1.7736,
"step": 8475
},
{
"epoch": 2.59,
"learning_rate": 2.9656012176560126e-05,
"loss": 1.8316,
"step": 8500
},
{
"epoch": 2.6,
"learning_rate": 2.962557077625571e-05,
"loss": 1.7823,
"step": 8525
},
{
"epoch": 2.6,
"learning_rate": 2.9595129375951294e-05,
"loss": 1.8136,
"step": 8550
},
{
"epoch": 2.61,
"learning_rate": 2.9564687975646883e-05,
"loss": 1.8344,
"step": 8575
},
{
"epoch": 2.62,
"learning_rate": 2.953424657534247e-05,
"loss": 1.8676,
"step": 8600
},
{
"epoch": 2.63,
"learning_rate": 2.9503805175038055e-05,
"loss": 1.8041,
"step": 8625
},
{
"epoch": 2.63,
"learning_rate": 2.9473363774733637e-05,
"loss": 1.7538,
"step": 8650
},
{
"epoch": 2.64,
"learning_rate": 2.944292237442923e-05,
"loss": 1.7551,
"step": 8675
},
{
"epoch": 2.65,
"learning_rate": 2.9412480974124812e-05,
"loss": 1.8191,
"step": 8700
},
{
"epoch": 2.66,
"learning_rate": 2.9382039573820398e-05,
"loss": 1.6856,
"step": 8725
},
{
"epoch": 2.66,
"learning_rate": 2.9351598173515983e-05,
"loss": 1.8142,
"step": 8750
},
{
"epoch": 2.67,
"learning_rate": 2.9321156773211572e-05,
"loss": 1.7193,
"step": 8775
},
{
"epoch": 2.68,
"learning_rate": 2.9290715372907158e-05,
"loss": 1.8532,
"step": 8800
},
{
"epoch": 2.69,
"learning_rate": 2.926027397260274e-05,
"loss": 1.7633,
"step": 8825
},
{
"epoch": 2.69,
"learning_rate": 2.9229832572298326e-05,
"loss": 1.8355,
"step": 8850
},
{
"epoch": 2.7,
"learning_rate": 2.9199391171993915e-05,
"loss": 1.7773,
"step": 8875
},
{
"epoch": 2.71,
"learning_rate": 2.91689497716895e-05,
"loss": 1.8063,
"step": 8900
},
{
"epoch": 2.72,
"learning_rate": 2.9138508371385086e-05,
"loss": 1.8132,
"step": 8925
},
{
"epoch": 2.72,
"learning_rate": 2.9108066971080672e-05,
"loss": 1.8179,
"step": 8950
},
{
"epoch": 2.73,
"learning_rate": 2.907762557077626e-05,
"loss": 1.8025,
"step": 8975
},
{
"epoch": 2.74,
"learning_rate": 2.9047184170471843e-05,
"loss": 1.9039,
"step": 9000
},
{
"epoch": 2.75,
"learning_rate": 2.901674277016743e-05,
"loss": 1.8192,
"step": 9025
},
{
"epoch": 2.75,
"learning_rate": 2.8986301369863015e-05,
"loss": 1.7935,
"step": 9050
},
{
"epoch": 2.76,
"learning_rate": 2.8955859969558604e-05,
"loss": 1.8275,
"step": 9075
},
{
"epoch": 2.77,
"learning_rate": 2.892541856925419e-05,
"loss": 1.818,
"step": 9100
},
{
"epoch": 2.78,
"learning_rate": 2.8894977168949775e-05,
"loss": 1.8142,
"step": 9125
},
{
"epoch": 2.79,
"learning_rate": 2.8864535768645357e-05,
"loss": 1.8139,
"step": 9150
},
{
"epoch": 2.79,
"learning_rate": 2.8834094368340947e-05,
"loss": 1.8355,
"step": 9175
},
{
"epoch": 2.8,
"learning_rate": 2.8803652968036532e-05,
"loss": 1.7334,
"step": 9200
},
{
"epoch": 2.81,
"learning_rate": 2.8773211567732118e-05,
"loss": 1.7533,
"step": 9225
},
{
"epoch": 2.82,
"learning_rate": 2.8742770167427704e-05,
"loss": 1.7938,
"step": 9250
},
{
"epoch": 2.82,
"learning_rate": 2.8712328767123293e-05,
"loss": 1.7383,
"step": 9275
},
{
"epoch": 2.83,
"learning_rate": 2.868188736681888e-05,
"loss": 1.7569,
"step": 9300
},
{
"epoch": 2.84,
"learning_rate": 2.865144596651446e-05,
"loss": 1.891,
"step": 9325
},
{
"epoch": 2.85,
"learning_rate": 2.8621004566210046e-05,
"loss": 1.8103,
"step": 9350
},
{
"epoch": 2.85,
"learning_rate": 2.8590563165905635e-05,
"loss": 1.8334,
"step": 9375
},
{
"epoch": 2.86,
"learning_rate": 2.856012176560122e-05,
"loss": 1.7458,
"step": 9400
},
{
"epoch": 2.87,
"learning_rate": 2.8529680365296807e-05,
"loss": 1.8501,
"step": 9425
},
{
"epoch": 2.88,
"learning_rate": 2.849923896499239e-05,
"loss": 1.7494,
"step": 9450
},
{
"epoch": 2.88,
"learning_rate": 2.846879756468798e-05,
"loss": 1.7331,
"step": 9475
},
{
"epoch": 2.89,
"learning_rate": 2.8438356164383564e-05,
"loss": 1.8252,
"step": 9500
},
{
"epoch": 2.9,
"learning_rate": 2.840791476407915e-05,
"loss": 1.808,
"step": 9525
},
{
"epoch": 2.91,
"learning_rate": 2.8377473363774735e-05,
"loss": 1.7964,
"step": 9550
},
{
"epoch": 2.91,
"learning_rate": 2.8347031963470324e-05,
"loss": 1.8049,
"step": 9575
},
{
"epoch": 2.92,
"learning_rate": 2.831659056316591e-05,
"loss": 1.8175,
"step": 9600
},
{
"epoch": 2.93,
"learning_rate": 2.8286149162861492e-05,
"loss": 1.768,
"step": 9625
},
{
"epoch": 2.94,
"learning_rate": 2.8255707762557078e-05,
"loss": 1.8176,
"step": 9650
},
{
"epoch": 2.95,
"learning_rate": 2.8225266362252667e-05,
"loss": 1.7983,
"step": 9675
},
{
"epoch": 2.95,
"learning_rate": 2.8194824961948253e-05,
"loss": 1.8383,
"step": 9700
},
{
"epoch": 2.96,
"learning_rate": 2.8164383561643838e-05,
"loss": 1.77,
"step": 9725
},
{
"epoch": 2.97,
"learning_rate": 2.8133942161339424e-05,
"loss": 1.8007,
"step": 9750
},
{
"epoch": 2.98,
"learning_rate": 2.8103500761035013e-05,
"loss": 1.7941,
"step": 9775
},
{
"epoch": 2.98,
"learning_rate": 2.8073059360730595e-05,
"loss": 1.8826,
"step": 9800
},
{
"epoch": 2.99,
"learning_rate": 2.804261796042618e-05,
"loss": 1.8164,
"step": 9825
},
{
"epoch": 3.0,
"learning_rate": 2.8012176560121767e-05,
"loss": 1.8422,
"step": 9850
},
{
"epoch": 3.01,
"learning_rate": 2.7981735159817356e-05,
"loss": 1.607,
"step": 9875
},
{
"epoch": 3.01,
"learning_rate": 2.795129375951294e-05,
"loss": 1.5504,
"step": 9900
},
{
"epoch": 3.02,
"learning_rate": 2.7920852359208527e-05,
"loss": 1.5181,
"step": 9925
},
{
"epoch": 3.03,
"learning_rate": 2.789041095890411e-05,
"loss": 1.5856,
"step": 9950
},
{
"epoch": 3.04,
"learning_rate": 2.78599695585997e-05,
"loss": 1.4975,
"step": 9975
},
{
"epoch": 3.04,
"learning_rate": 2.7829528158295284e-05,
"loss": 1.5684,
"step": 10000
},
{
"epoch": 3.05,
"learning_rate": 2.779908675799087e-05,
"loss": 1.6031,
"step": 10025
},
{
"epoch": 3.06,
"learning_rate": 2.7768645357686455e-05,
"loss": 1.5398,
"step": 10050
},
{
"epoch": 3.07,
"learning_rate": 2.7738203957382045e-05,
"loss": 1.5564,
"step": 10075
},
{
"epoch": 3.07,
"learning_rate": 2.770776255707763e-05,
"loss": 1.5395,
"step": 10100
},
{
"epoch": 3.08,
"learning_rate": 2.7677321156773213e-05,
"loss": 1.5022,
"step": 10125
},
{
"epoch": 3.09,
"learning_rate": 2.7646879756468798e-05,
"loss": 1.4697,
"step": 10150
},
{
"epoch": 3.1,
"learning_rate": 2.7616438356164387e-05,
"loss": 1.5704,
"step": 10175
},
{
"epoch": 3.11,
"learning_rate": 2.7585996955859973e-05,
"loss": 1.5571,
"step": 10200
},
{
"epoch": 3.11,
"learning_rate": 2.755555555555556e-05,
"loss": 1.5742,
"step": 10225
},
{
"epoch": 3.12,
"learning_rate": 2.752511415525114e-05,
"loss": 1.5312,
"step": 10250
},
{
"epoch": 3.13,
"learning_rate": 2.7494672754946733e-05,
"loss": 1.4847,
"step": 10275
},
{
"epoch": 3.14,
"learning_rate": 2.7464231354642316e-05,
"loss": 1.5724,
"step": 10300
},
{
"epoch": 3.14,
"learning_rate": 2.74337899543379e-05,
"loss": 1.5509,
"step": 10325
},
{
"epoch": 3.15,
"learning_rate": 2.7403348554033487e-05,
"loss": 1.5007,
"step": 10350
},
{
"epoch": 3.16,
"learning_rate": 2.7372907153729076e-05,
"loss": 1.5213,
"step": 10375
},
{
"epoch": 3.17,
"learning_rate": 2.7342465753424662e-05,
"loss": 1.5834,
"step": 10400
},
{
"epoch": 3.17,
"learning_rate": 2.7312024353120244e-05,
"loss": 1.5101,
"step": 10425
},
{
"epoch": 3.18,
"learning_rate": 2.728158295281583e-05,
"loss": 1.5733,
"step": 10450
},
{
"epoch": 3.19,
"learning_rate": 2.725114155251142e-05,
"loss": 1.5903,
"step": 10475
},
{
"epoch": 3.2,
"learning_rate": 2.7220700152207005e-05,
"loss": 1.5721,
"step": 10500
},
{
"epoch": 3.2,
"learning_rate": 2.719025875190259e-05,
"loss": 1.5293,
"step": 10525
},
{
"epoch": 3.21,
"learning_rate": 2.7159817351598176e-05,
"loss": 1.6016,
"step": 10550
},
{
"epoch": 3.22,
"learning_rate": 2.7129375951293765e-05,
"loss": 1.4873,
"step": 10575
},
{
"epoch": 3.23,
"learning_rate": 2.7098934550989347e-05,
"loss": 1.6021,
"step": 10600
},
{
"epoch": 3.23,
"learning_rate": 2.7068493150684933e-05,
"loss": 1.6195,
"step": 10625
},
{
"epoch": 3.24,
"learning_rate": 2.703805175038052e-05,
"loss": 1.6156,
"step": 10650
},
{
"epoch": 3.25,
"learning_rate": 2.7007610350076108e-05,
"loss": 1.5061,
"step": 10675
},
{
"epoch": 3.26,
"learning_rate": 2.6977168949771693e-05,
"loss": 1.5679,
"step": 10700
},
{
"epoch": 3.26,
"learning_rate": 2.694672754946728e-05,
"loss": 1.6087,
"step": 10725
},
{
"epoch": 3.27,
"learning_rate": 2.691628614916286e-05,
"loss": 1.5767,
"step": 10750
},
{
"epoch": 3.28,
"learning_rate": 2.688584474885845e-05,
"loss": 1.5589,
"step": 10775
},
{
"epoch": 3.29,
"learning_rate": 2.6855403348554036e-05,
"loss": 1.5384,
"step": 10800
},
{
"epoch": 3.3,
"learning_rate": 2.6824961948249622e-05,
"loss": 1.5174,
"step": 10825
},
{
"epoch": 3.3,
"learning_rate": 2.6794520547945207e-05,
"loss": 1.585,
"step": 10850
},
{
"epoch": 3.31,
"learning_rate": 2.6764079147640796e-05,
"loss": 1.5766,
"step": 10875
},
{
"epoch": 3.32,
"learning_rate": 2.6733637747336382e-05,
"loss": 1.5724,
"step": 10900
},
{
"epoch": 3.33,
"learning_rate": 2.6703196347031964e-05,
"loss": 1.5789,
"step": 10925
},
{
"epoch": 3.33,
"learning_rate": 2.667275494672755e-05,
"loss": 1.566,
"step": 10950
},
{
"epoch": 3.34,
"learning_rate": 2.664231354642314e-05,
"loss": 1.5702,
"step": 10975
},
{
"epoch": 3.35,
"learning_rate": 2.6611872146118725e-05,
"loss": 1.5913,
"step": 11000
},
{
"epoch": 3.36,
"learning_rate": 2.658143074581431e-05,
"loss": 1.5544,
"step": 11025
},
{
"epoch": 3.36,
"learning_rate": 2.6550989345509893e-05,
"loss": 1.4748,
"step": 11050
},
{
"epoch": 3.37,
"learning_rate": 2.6520547945205485e-05,
"loss": 1.5538,
"step": 11075
},
{
"epoch": 3.38,
"learning_rate": 2.6490106544901068e-05,
"loss": 1.5493,
"step": 11100
},
{
"epoch": 3.39,
"learning_rate": 2.6459665144596653e-05,
"loss": 1.5631,
"step": 11125
},
{
"epoch": 3.39,
"learning_rate": 2.642922374429224e-05,
"loss": 1.6231,
"step": 11150
},
{
"epoch": 3.4,
"learning_rate": 2.6398782343987828e-05,
"loss": 1.5592,
"step": 11175
},
{
"epoch": 3.41,
"learning_rate": 2.6368340943683414e-05,
"loss": 1.4599,
"step": 11200
},
{
"epoch": 3.42,
"learning_rate": 2.6337899543378996e-05,
"loss": 1.5991,
"step": 11225
},
{
"epoch": 3.42,
"learning_rate": 2.630745814307458e-05,
"loss": 1.5998,
"step": 11250
},
{
"epoch": 3.43,
"learning_rate": 2.627701674277017e-05,
"loss": 1.5227,
"step": 11275
},
{
"epoch": 3.44,
"learning_rate": 2.6246575342465756e-05,
"loss": 1.5894,
"step": 11300
},
{
"epoch": 3.45,
"learning_rate": 2.6216133942161342e-05,
"loss": 1.5064,
"step": 11325
},
{
"epoch": 3.46,
"learning_rate": 2.6185692541856928e-05,
"loss": 1.5878,
"step": 11350
},
{
"epoch": 3.46,
"learning_rate": 2.6155251141552517e-05,
"loss": 1.608,
"step": 11375
},
{
"epoch": 3.47,
"learning_rate": 2.61248097412481e-05,
"loss": 1.6658,
"step": 11400
},
{
"epoch": 3.48,
"learning_rate": 2.6094368340943685e-05,
"loss": 1.5905,
"step": 11425
},
{
"epoch": 3.49,
"learning_rate": 2.606392694063927e-05,
"loss": 1.6711,
"step": 11450
},
{
"epoch": 3.49,
"learning_rate": 2.603348554033486e-05,
"loss": 1.5699,
"step": 11475
},
{
"epoch": 3.5,
"learning_rate": 2.6003044140030445e-05,
"loss": 1.6112,
"step": 11500
},
{
"epoch": 3.51,
"learning_rate": 2.597260273972603e-05,
"loss": 1.5901,
"step": 11525
},
{
"epoch": 3.52,
"learning_rate": 2.5942161339421613e-05,
"loss": 1.5447,
"step": 11550
},
{
"epoch": 3.52,
"learning_rate": 2.5911719939117202e-05,
"loss": 1.5636,
"step": 11575
},
{
"epoch": 3.53,
"learning_rate": 2.5881278538812788e-05,
"loss": 1.6046,
"step": 11600
},
{
"epoch": 3.54,
"learning_rate": 2.5850837138508374e-05,
"loss": 1.5229,
"step": 11625
},
{
"epoch": 3.55,
"learning_rate": 2.582039573820396e-05,
"loss": 1.6017,
"step": 11650
},
{
"epoch": 3.55,
"learning_rate": 2.578995433789955e-05,
"loss": 1.5262,
"step": 11675
},
{
"epoch": 3.56,
"learning_rate": 2.5759512937595134e-05,
"loss": 1.519,
"step": 11700
},
{
"epoch": 3.57,
"learning_rate": 2.5729071537290716e-05,
"loss": 1.5743,
"step": 11725
},
{
"epoch": 3.58,
"learning_rate": 2.5698630136986302e-05,
"loss": 1.566,
"step": 11750
},
{
"epoch": 3.58,
"learning_rate": 2.566818873668189e-05,
"loss": 1.5854,
"step": 11775
},
{
"epoch": 3.59,
"learning_rate": 2.5637747336377477e-05,
"loss": 1.6292,
"step": 11800
},
{
"epoch": 3.6,
"learning_rate": 2.5607305936073062e-05,
"loss": 1.6367,
"step": 11825
},
{
"epoch": 3.61,
"learning_rate": 2.5576864535768645e-05,
"loss": 1.6502,
"step": 11850
},
{
"epoch": 3.61,
"learning_rate": 2.5546423135464237e-05,
"loss": 1.6127,
"step": 11875
},
{
"epoch": 3.62,
"learning_rate": 2.551598173515982e-05,
"loss": 1.5351,
"step": 11900
},
{
"epoch": 3.63,
"learning_rate": 2.5485540334855405e-05,
"loss": 1.5574,
"step": 11925
},
{
"epoch": 3.64,
"learning_rate": 2.545509893455099e-05,
"loss": 1.6821,
"step": 11950
},
{
"epoch": 3.65,
"learning_rate": 2.542465753424658e-05,
"loss": 1.4367,
"step": 11975
},
{
"epoch": 3.65,
"learning_rate": 2.5394216133942166e-05,
"loss": 1.6209,
"step": 12000
},
{
"epoch": 3.66,
"learning_rate": 2.5363774733637748e-05,
"loss": 1.5987,
"step": 12025
},
{
"epoch": 3.67,
"learning_rate": 2.5333333333333334e-05,
"loss": 1.5591,
"step": 12050
},
{
"epoch": 3.68,
"learning_rate": 2.5302891933028923e-05,
"loss": 1.6609,
"step": 12075
},
{
"epoch": 3.68,
"learning_rate": 2.527245053272451e-05,
"loss": 1.5506,
"step": 12100
},
{
"epoch": 3.69,
"learning_rate": 2.5242009132420094e-05,
"loss": 1.6036,
"step": 12125
},
{
"epoch": 3.7,
"learning_rate": 2.521156773211568e-05,
"loss": 1.6555,
"step": 12150
},
{
"epoch": 3.71,
"learning_rate": 2.518112633181127e-05,
"loss": 1.6392,
"step": 12175
},
{
"epoch": 3.71,
"learning_rate": 2.515068493150685e-05,
"loss": 1.6013,
"step": 12200
},
{
"epoch": 3.72,
"learning_rate": 2.5120243531202437e-05,
"loss": 1.6,
"step": 12225
},
{
"epoch": 3.73,
"learning_rate": 2.5089802130898022e-05,
"loss": 1.5195,
"step": 12250
},
{
"epoch": 3.74,
"learning_rate": 2.505936073059361e-05,
"loss": 1.5349,
"step": 12275
},
{
"epoch": 3.74,
"learning_rate": 2.5028919330289197e-05,
"loss": 1.6064,
"step": 12300
},
{
"epoch": 3.75,
"learning_rate": 2.4998477929984783e-05,
"loss": 1.5887,
"step": 12325
},
{
"epoch": 3.76,
"learning_rate": 2.4969254185692542e-05,
"loss": 1.6195,
"step": 12350
},
{
"epoch": 3.77,
"learning_rate": 2.493881278538813e-05,
"loss": 1.614,
"step": 12375
},
{
"epoch": 3.77,
"learning_rate": 2.4908371385083717e-05,
"loss": 1.6406,
"step": 12400
},
{
"epoch": 3.78,
"learning_rate": 2.4877929984779302e-05,
"loss": 1.5978,
"step": 12425
},
{
"epoch": 3.79,
"learning_rate": 2.4847488584474885e-05,
"loss": 1.662,
"step": 12450
},
{
"epoch": 3.8,
"learning_rate": 2.4817047184170474e-05,
"loss": 1.5427,
"step": 12475
},
{
"epoch": 3.81,
"learning_rate": 2.478660578386606e-05,
"loss": 1.6268,
"step": 12500
},
{
"epoch": 3.81,
"learning_rate": 2.4756164383561645e-05,
"loss": 1.6355,
"step": 12525
},
{
"epoch": 3.82,
"learning_rate": 2.472572298325723e-05,
"loss": 1.5728,
"step": 12550
},
{
"epoch": 3.83,
"learning_rate": 2.469528158295282e-05,
"loss": 1.6309,
"step": 12575
},
{
"epoch": 3.84,
"learning_rate": 2.4664840182648405e-05,
"loss": 1.5316,
"step": 12600
},
{
"epoch": 3.84,
"learning_rate": 2.4634398782343988e-05,
"loss": 1.55,
"step": 12625
},
{
"epoch": 3.85,
"learning_rate": 2.4603957382039573e-05,
"loss": 1.6023,
"step": 12650
},
{
"epoch": 3.86,
"learning_rate": 2.4573515981735162e-05,
"loss": 1.5844,
"step": 12675
},
{
"epoch": 3.87,
"learning_rate": 2.4543074581430748e-05,
"loss": 1.6453,
"step": 12700
},
{
"epoch": 3.87,
"learning_rate": 2.4512633181126334e-05,
"loss": 1.509,
"step": 12725
},
{
"epoch": 3.88,
"learning_rate": 2.448219178082192e-05,
"loss": 1.5422,
"step": 12750
},
{
"epoch": 3.89,
"learning_rate": 2.445175038051751e-05,
"loss": 1.6115,
"step": 12775
},
{
"epoch": 3.9,
"learning_rate": 2.442130898021309e-05,
"loss": 1.5477,
"step": 12800
},
{
"epoch": 3.9,
"learning_rate": 2.4390867579908677e-05,
"loss": 1.6476,
"step": 12825
},
{
"epoch": 3.91,
"learning_rate": 2.4360426179604262e-05,
"loss": 1.6505,
"step": 12850
},
{
"epoch": 3.92,
"learning_rate": 2.432998477929985e-05,
"loss": 1.5546,
"step": 12875
},
{
"epoch": 3.93,
"learning_rate": 2.4299543378995437e-05,
"loss": 1.5796,
"step": 12900
},
{
"epoch": 3.93,
"learning_rate": 2.426910197869102e-05,
"loss": 1.5756,
"step": 12925
},
{
"epoch": 3.94,
"learning_rate": 2.4238660578386605e-05,
"loss": 1.5754,
"step": 12950
},
{
"epoch": 3.95,
"learning_rate": 2.4208219178082194e-05,
"loss": 1.5819,
"step": 12975
},
{
"epoch": 3.96,
"learning_rate": 2.417777777777778e-05,
"loss": 1.5826,
"step": 13000
},
{
"epoch": 3.96,
"learning_rate": 2.4147336377473365e-05,
"loss": 1.6289,
"step": 13025
},
{
"epoch": 3.97,
"learning_rate": 2.411689497716895e-05,
"loss": 1.5559,
"step": 13050
},
{
"epoch": 3.98,
"learning_rate": 2.408645357686454e-05,
"loss": 1.6191,
"step": 13075
},
{
"epoch": 3.99,
"learning_rate": 2.4056012176560122e-05,
"loss": 1.5491,
"step": 13100
},
{
"epoch": 4.0,
"learning_rate": 2.4025570776255708e-05,
"loss": 1.6345,
"step": 13125
},
{
"epoch": 4.0,
"learning_rate": 2.3995129375951294e-05,
"loss": 1.4829,
"step": 13150
},
{
"epoch": 4.01,
"learning_rate": 2.3964687975646883e-05,
"loss": 1.4067,
"step": 13175
},
{
"epoch": 4.02,
"learning_rate": 2.393424657534247e-05,
"loss": 1.3637,
"step": 13200
},
{
"epoch": 4.03,
"learning_rate": 2.3903805175038054e-05,
"loss": 1.2731,
"step": 13225
},
{
"epoch": 4.03,
"learning_rate": 2.3873363774733636e-05,
"loss": 1.3727,
"step": 13250
},
{
"epoch": 4.04,
"learning_rate": 2.3842922374429226e-05,
"loss": 1.2957,
"step": 13275
},
{
"epoch": 4.05,
"learning_rate": 2.381248097412481e-05,
"loss": 1.3658,
"step": 13300
},
{
"epoch": 4.06,
"learning_rate": 2.3782039573820397e-05,
"loss": 1.4263,
"step": 13325
},
{
"epoch": 4.06,
"learning_rate": 2.3751598173515983e-05,
"loss": 1.36,
"step": 13350
},
{
"epoch": 4.07,
"learning_rate": 2.372115677321157e-05,
"loss": 1.2446,
"step": 13375
},
{
"epoch": 4.08,
"learning_rate": 2.3690715372907157e-05,
"loss": 1.3264,
"step": 13400
},
{
"epoch": 4.09,
"learning_rate": 2.366027397260274e-05,
"loss": 1.3395,
"step": 13425
},
{
"epoch": 4.09,
"learning_rate": 2.3629832572298325e-05,
"loss": 1.3421,
"step": 13450
},
{
"epoch": 4.1,
"learning_rate": 2.3599391171993914e-05,
"loss": 1.4379,
"step": 13475
},
{
"epoch": 4.11,
"learning_rate": 2.35689497716895e-05,
"loss": 1.3355,
"step": 13500
},
{
"epoch": 4.12,
"learning_rate": 2.3538508371385086e-05,
"loss": 1.4226,
"step": 13525
},
{
"epoch": 4.12,
"learning_rate": 2.3508066971080668e-05,
"loss": 1.4034,
"step": 13550
},
{
"epoch": 4.13,
"learning_rate": 2.347762557077626e-05,
"loss": 1.3015,
"step": 13575
},
{
"epoch": 4.14,
"learning_rate": 2.3447184170471843e-05,
"loss": 1.3953,
"step": 13600
},
{
"epoch": 4.15,
"learning_rate": 2.341674277016743e-05,
"loss": 1.3534,
"step": 13625
},
{
"epoch": 4.16,
"learning_rate": 2.3386301369863014e-05,
"loss": 1.3378,
"step": 13650
},
{
"epoch": 4.16,
"learning_rate": 2.3355859969558603e-05,
"loss": 1.345,
"step": 13675
},
{
"epoch": 4.17,
"learning_rate": 2.332541856925419e-05,
"loss": 1.4098,
"step": 13700
},
{
"epoch": 4.18,
"learning_rate": 2.329497716894977e-05,
"loss": 1.3938,
"step": 13725
},
{
"epoch": 4.19,
"learning_rate": 2.3264535768645357e-05,
"loss": 1.4573,
"step": 13750
},
{
"epoch": 4.19,
"learning_rate": 2.3234094368340946e-05,
"loss": 1.396,
"step": 13775
},
{
"epoch": 4.2,
"learning_rate": 2.320365296803653e-05,
"loss": 1.3915,
"step": 13800
},
{
"epoch": 4.21,
"learning_rate": 2.3173211567732117e-05,
"loss": 1.4149,
"step": 13825
},
{
"epoch": 4.22,
"learning_rate": 2.3142770167427703e-05,
"loss": 1.4299,
"step": 13850
},
{
"epoch": 4.22,
"learning_rate": 2.3112328767123292e-05,
"loss": 1.388,
"step": 13875
},
{
"epoch": 4.23,
"learning_rate": 2.3081887366818874e-05,
"loss": 1.4334,
"step": 13900
},
{
"epoch": 4.24,
"learning_rate": 2.305144596651446e-05,
"loss": 1.358,
"step": 13925
},
{
"epoch": 4.25,
"learning_rate": 2.3021004566210046e-05,
"loss": 1.3849,
"step": 13950
},
{
"epoch": 4.25,
"learning_rate": 2.2990563165905635e-05,
"loss": 1.2529,
"step": 13975
},
{
"epoch": 4.26,
"learning_rate": 2.296012176560122e-05,
"loss": 1.406,
"step": 14000
},
{
"epoch": 4.27,
"learning_rate": 2.2929680365296806e-05,
"loss": 1.3589,
"step": 14025
},
{
"epoch": 4.28,
"learning_rate": 2.289923896499239e-05,
"loss": 1.3382,
"step": 14050
},
{
"epoch": 4.28,
"learning_rate": 2.2868797564687977e-05,
"loss": 1.4394,
"step": 14075
},
{
"epoch": 4.29,
"learning_rate": 2.2838356164383563e-05,
"loss": 1.4327,
"step": 14100
},
{
"epoch": 4.3,
"learning_rate": 2.280791476407915e-05,
"loss": 1.3955,
"step": 14125
},
{
"epoch": 4.31,
"learning_rate": 2.2777473363774735e-05,
"loss": 1.38,
"step": 14150
},
{
"epoch": 4.32,
"learning_rate": 2.2747031963470324e-05,
"loss": 1.3289,
"step": 14175
},
{
"epoch": 4.32,
"learning_rate": 2.271659056316591e-05,
"loss": 1.3282,
"step": 14200
},
{
"epoch": 4.33,
"learning_rate": 2.268614916286149e-05,
"loss": 1.4225,
"step": 14225
},
{
"epoch": 4.34,
"learning_rate": 2.2655707762557077e-05,
"loss": 1.358,
"step": 14250
},
{
"epoch": 4.35,
"learning_rate": 2.2625266362252666e-05,
"loss": 1.3493,
"step": 14275
},
{
"epoch": 4.35,
"learning_rate": 2.2594824961948252e-05,
"loss": 1.4606,
"step": 14300
},
{
"epoch": 4.36,
"learning_rate": 2.2564383561643838e-05,
"loss": 1.3846,
"step": 14325
},
{
"epoch": 4.37,
"learning_rate": 2.253394216133942e-05,
"loss": 1.3729,
"step": 14350
},
{
"epoch": 4.38,
"learning_rate": 2.2503500761035012e-05,
"loss": 1.3396,
"step": 14375
},
{
"epoch": 4.38,
"learning_rate": 2.2473059360730595e-05,
"loss": 1.3955,
"step": 14400
},
{
"epoch": 4.39,
"learning_rate": 2.244261796042618e-05,
"loss": 1.3225,
"step": 14425
},
{
"epoch": 4.4,
"learning_rate": 2.2412176560121766e-05,
"loss": 1.3583,
"step": 14450
},
{
"epoch": 4.41,
"learning_rate": 2.2381735159817355e-05,
"loss": 1.3146,
"step": 14475
},
{
"epoch": 4.41,
"learning_rate": 2.235129375951294e-05,
"loss": 1.4239,
"step": 14500
},
{
"epoch": 4.42,
"learning_rate": 2.2320852359208523e-05,
"loss": 1.3779,
"step": 14525
},
{
"epoch": 4.43,
"learning_rate": 2.229041095890411e-05,
"loss": 1.4361,
"step": 14550
},
{
"epoch": 4.44,
"learning_rate": 2.2259969558599698e-05,
"loss": 1.3766,
"step": 14575
},
{
"epoch": 4.44,
"learning_rate": 2.2229528158295284e-05,
"loss": 1.3609,
"step": 14600
},
{
"epoch": 4.45,
"learning_rate": 2.219908675799087e-05,
"loss": 1.4332,
"step": 14625
},
{
"epoch": 4.46,
"learning_rate": 2.2168645357686455e-05,
"loss": 1.4219,
"step": 14650
},
{
"epoch": 4.47,
"learning_rate": 2.2138203957382044e-05,
"loss": 1.4023,
"step": 14675
},
{
"epoch": 4.47,
"learning_rate": 2.2107762557077626e-05,
"loss": 1.4445,
"step": 14700
},
{
"epoch": 4.48,
"learning_rate": 2.2077321156773212e-05,
"loss": 1.3996,
"step": 14725
},
{
"epoch": 4.49,
"learning_rate": 2.2048097412480978e-05,
"loss": 1.3752,
"step": 14750
},
{
"epoch": 4.5,
"learning_rate": 2.2017656012176563e-05,
"loss": 1.3732,
"step": 14775
},
{
"epoch": 4.51,
"learning_rate": 2.198721461187215e-05,
"loss": 1.4234,
"step": 14800
},
{
"epoch": 4.51,
"learning_rate": 2.195677321156773e-05,
"loss": 1.3971,
"step": 14825
},
{
"epoch": 4.52,
"learning_rate": 2.192633181126332e-05,
"loss": 1.3729,
"step": 14850
},
{
"epoch": 4.53,
"learning_rate": 2.1895890410958906e-05,
"loss": 1.3405,
"step": 14875
},
{
"epoch": 4.54,
"learning_rate": 2.1865449010654492e-05,
"loss": 1.3572,
"step": 14900
},
{
"epoch": 4.54,
"learning_rate": 2.1835007610350077e-05,
"loss": 1.3331,
"step": 14925
},
{
"epoch": 4.55,
"learning_rate": 2.1804566210045667e-05,
"loss": 1.368,
"step": 14950
},
{
"epoch": 4.56,
"learning_rate": 2.1774124809741252e-05,
"loss": 1.3649,
"step": 14975
},
{
"epoch": 4.57,
"learning_rate": 2.1743683409436835e-05,
"loss": 1.389,
"step": 15000
},
{
"epoch": 4.57,
"learning_rate": 2.171324200913242e-05,
"loss": 1.4163,
"step": 15025
},
{
"epoch": 4.58,
"learning_rate": 2.168280060882801e-05,
"loss": 1.3442,
"step": 15050
},
{
"epoch": 4.59,
"learning_rate": 2.1652359208523595e-05,
"loss": 1.3446,
"step": 15075
},
{
"epoch": 4.6,
"learning_rate": 2.162191780821918e-05,
"loss": 1.3772,
"step": 15100
},
{
"epoch": 4.6,
"learning_rate": 2.1591476407914763e-05,
"loss": 1.4261,
"step": 15125
},
{
"epoch": 4.61,
"learning_rate": 2.1561035007610352e-05,
"loss": 1.4058,
"step": 15150
},
{
"epoch": 4.62,
"learning_rate": 2.1530593607305938e-05,
"loss": 1.3876,
"step": 15175
},
{
"epoch": 4.63,
"learning_rate": 2.1500152207001523e-05,
"loss": 1.3427,
"step": 15200
},
{
"epoch": 4.63,
"learning_rate": 2.146971080669711e-05,
"loss": 1.3778,
"step": 15225
},
{
"epoch": 4.64,
"learning_rate": 2.1439269406392698e-05,
"loss": 1.3358,
"step": 15250
},
{
"epoch": 4.65,
"learning_rate": 2.1408828006088284e-05,
"loss": 1.4508,
"step": 15275
},
{
"epoch": 4.66,
"learning_rate": 2.1378386605783866e-05,
"loss": 1.3823,
"step": 15300
},
{
"epoch": 4.67,
"learning_rate": 2.1347945205479452e-05,
"loss": 1.4207,
"step": 15325
},
{
"epoch": 4.67,
"learning_rate": 2.131750380517504e-05,
"loss": 1.3759,
"step": 15350
},
{
"epoch": 4.68,
"learning_rate": 2.1287062404870626e-05,
"loss": 1.4415,
"step": 15375
},
{
"epoch": 4.69,
"learning_rate": 2.1256621004566212e-05,
"loss": 1.3673,
"step": 15400
},
{
"epoch": 4.7,
"learning_rate": 2.1226179604261798e-05,
"loss": 1.4189,
"step": 15425
},
{
"epoch": 4.7,
"learning_rate": 2.1195738203957387e-05,
"loss": 1.3827,
"step": 15450
},
{
"epoch": 4.71,
"learning_rate": 2.116529680365297e-05,
"loss": 1.3898,
"step": 15475
},
{
"epoch": 4.72,
"learning_rate": 2.1134855403348555e-05,
"loss": 1.444,
"step": 15500
},
{
"epoch": 4.73,
"learning_rate": 2.110441400304414e-05,
"loss": 1.3941,
"step": 15525
},
{
"epoch": 4.73,
"learning_rate": 2.107397260273973e-05,
"loss": 1.4336,
"step": 15550
},
{
"epoch": 4.74,
"learning_rate": 2.1043531202435315e-05,
"loss": 1.4159,
"step": 15575
},
{
"epoch": 4.75,
"learning_rate": 2.10130898021309e-05,
"loss": 1.351,
"step": 15600
},
{
"epoch": 4.76,
"learning_rate": 2.0982648401826483e-05,
"loss": 1.336,
"step": 15625
},
{
"epoch": 4.76,
"learning_rate": 2.0952207001522072e-05,
"loss": 1.4279,
"step": 15650
},
{
"epoch": 4.77,
"learning_rate": 2.0921765601217658e-05,
"loss": 1.3384,
"step": 15675
},
{
"epoch": 4.78,
"learning_rate": 2.0891324200913244e-05,
"loss": 1.3338,
"step": 15700
},
{
"epoch": 4.79,
"learning_rate": 2.086088280060883e-05,
"loss": 1.3422,
"step": 15725
},
{
"epoch": 4.79,
"learning_rate": 2.083044140030442e-05,
"loss": 1.4765,
"step": 15750
},
{
"epoch": 4.8,
"learning_rate": 2.08e-05,
"loss": 1.4493,
"step": 15775
},
{
"epoch": 4.81,
"learning_rate": 2.0769558599695586e-05,
"loss": 1.4215,
"step": 15800
},
{
"epoch": 4.82,
"learning_rate": 2.0739117199391172e-05,
"loss": 1.4274,
"step": 15825
},
{
"epoch": 4.82,
"learning_rate": 2.070867579908676e-05,
"loss": 1.4245,
"step": 15850
},
{
"epoch": 4.83,
"learning_rate": 2.0678234398782347e-05,
"loss": 1.4564,
"step": 15875
},
{
"epoch": 4.84,
"learning_rate": 2.0647792998477933e-05,
"loss": 1.3672,
"step": 15900
},
{
"epoch": 4.85,
"learning_rate": 2.0617351598173515e-05,
"loss": 1.4063,
"step": 15925
},
{
"epoch": 4.86,
"learning_rate": 2.0586910197869104e-05,
"loss": 1.4454,
"step": 15950
},
{
"epoch": 4.86,
"learning_rate": 2.055646879756469e-05,
"loss": 1.4042,
"step": 15975
},
{
"epoch": 4.87,
"learning_rate": 2.0526027397260275e-05,
"loss": 1.3176,
"step": 16000
},
{
"epoch": 4.88,
"learning_rate": 2.049558599695586e-05,
"loss": 1.3937,
"step": 16025
},
{
"epoch": 4.89,
"learning_rate": 2.046514459665145e-05,
"loss": 1.4184,
"step": 16050
},
{
"epoch": 4.89,
"learning_rate": 2.0434703196347036e-05,
"loss": 1.3941,
"step": 16075
},
{
"epoch": 4.9,
"learning_rate": 2.0404261796042618e-05,
"loss": 1.3824,
"step": 16100
},
{
"epoch": 4.91,
"learning_rate": 2.0373820395738204e-05,
"loss": 1.3031,
"step": 16125
},
{
"epoch": 4.92,
"learning_rate": 2.0343378995433793e-05,
"loss": 1.3937,
"step": 16150
},
{
"epoch": 4.92,
"learning_rate": 2.031293759512938e-05,
"loss": 1.3045,
"step": 16175
},
{
"epoch": 4.93,
"learning_rate": 2.0282496194824964e-05,
"loss": 1.4681,
"step": 16200
},
{
"epoch": 4.94,
"learning_rate": 2.025205479452055e-05,
"loss": 1.4129,
"step": 16225
},
{
"epoch": 4.95,
"learning_rate": 2.022161339421614e-05,
"loss": 1.4236,
"step": 16250
},
{
"epoch": 4.95,
"learning_rate": 2.019117199391172e-05,
"loss": 1.4638,
"step": 16275
},
{
"epoch": 4.96,
"learning_rate": 2.0160730593607307e-05,
"loss": 1.4065,
"step": 16300
},
{
"epoch": 4.97,
"learning_rate": 2.0130289193302892e-05,
"loss": 1.395,
"step": 16325
},
{
"epoch": 4.98,
"learning_rate": 2.009984779299848e-05,
"loss": 1.441,
"step": 16350
},
{
"epoch": 4.98,
"learning_rate": 2.0069406392694067e-05,
"loss": 1.4944,
"step": 16375
},
{
"epoch": 4.99,
"learning_rate": 2.003896499238965e-05,
"loss": 1.4018,
"step": 16400
},
{
"epoch": 5.0,
"learning_rate": 2.0008523592085235e-05,
"loss": 1.4352,
"step": 16425
},
{
"epoch": 5.01,
"learning_rate": 1.9978082191780824e-05,
"loss": 1.1718,
"step": 16450
},
{
"epoch": 5.02,
"learning_rate": 1.994764079147641e-05,
"loss": 1.2079,
"step": 16475
},
{
"epoch": 5.02,
"learning_rate": 1.9917199391171996e-05,
"loss": 1.1671,
"step": 16500
},
{
"epoch": 5.03,
"learning_rate": 1.988675799086758e-05,
"loss": 1.1194,
"step": 16525
},
{
"epoch": 5.04,
"learning_rate": 1.9856316590563167e-05,
"loss": 1.2303,
"step": 16550
},
{
"epoch": 5.05,
"learning_rate": 1.9825875190258753e-05,
"loss": 1.1932,
"step": 16575
},
{
"epoch": 5.05,
"learning_rate": 1.979543378995434e-05,
"loss": 1.2365,
"step": 16600
},
{
"epoch": 5.06,
"learning_rate": 1.9764992389649927e-05,
"loss": 1.1352,
"step": 16625
},
{
"epoch": 5.07,
"learning_rate": 1.973455098934551e-05,
"loss": 1.205,
"step": 16650
},
{
"epoch": 5.08,
"learning_rate": 1.97041095890411e-05,
"loss": 1.2005,
"step": 16675
},
{
"epoch": 5.08,
"learning_rate": 1.9673668188736684e-05,
"loss": 1.2812,
"step": 16700
},
{
"epoch": 5.09,
"learning_rate": 1.964322678843227e-05,
"loss": 1.1773,
"step": 16725
},
{
"epoch": 5.1,
"learning_rate": 1.9612785388127856e-05,
"loss": 1.1904,
"step": 16750
},
{
"epoch": 5.11,
"learning_rate": 1.958234398782344e-05,
"loss": 1.2193,
"step": 16775
},
{
"epoch": 5.11,
"learning_rate": 1.9551902587519027e-05,
"loss": 1.1983,
"step": 16800
},
{
"epoch": 5.12,
"learning_rate": 1.9521461187214613e-05,
"loss": 1.1261,
"step": 16825
},
{
"epoch": 5.13,
"learning_rate": 1.94910197869102e-05,
"loss": 1.2459,
"step": 16850
},
{
"epoch": 5.14,
"learning_rate": 1.9460578386605788e-05,
"loss": 1.2696,
"step": 16875
},
{
"epoch": 5.14,
"learning_rate": 1.943013698630137e-05,
"loss": 1.1993,
"step": 16900
},
{
"epoch": 5.15,
"learning_rate": 1.939969558599696e-05,
"loss": 1.1595,
"step": 16925
},
{
"epoch": 5.16,
"learning_rate": 1.9369254185692545e-05,
"loss": 1.1438,
"step": 16950
},
{
"epoch": 5.17,
"learning_rate": 1.933881278538813e-05,
"loss": 1.2023,
"step": 16975
},
{
"epoch": 5.18,
"learning_rate": 1.9308371385083716e-05,
"loss": 1.2327,
"step": 17000
},
{
"epoch": 5.18,
"learning_rate": 1.92779299847793e-05,
"loss": 1.2655,
"step": 17025
},
{
"epoch": 5.19,
"learning_rate": 1.9247488584474887e-05,
"loss": 1.1495,
"step": 17050
},
{
"epoch": 5.2,
"learning_rate": 1.9217047184170473e-05,
"loss": 1.2121,
"step": 17075
},
{
"epoch": 5.21,
"learning_rate": 1.918660578386606e-05,
"loss": 1.2187,
"step": 17100
},
{
"epoch": 5.21,
"learning_rate": 1.9156164383561648e-05,
"loss": 1.2401,
"step": 17125
},
{
"epoch": 5.22,
"learning_rate": 1.912572298325723e-05,
"loss": 1.1772,
"step": 17150
},
{
"epoch": 5.23,
"learning_rate": 1.909528158295282e-05,
"loss": 1.2103,
"step": 17175
},
{
"epoch": 5.24,
"learning_rate": 1.90648401826484e-05,
"loss": 1.2021,
"step": 17200
},
{
"epoch": 5.24,
"learning_rate": 1.9035616438356167e-05,
"loss": 1.1839,
"step": 17225
},
{
"epoch": 5.25,
"learning_rate": 1.9005175038051753e-05,
"loss": 1.2107,
"step": 17250
},
{
"epoch": 5.26,
"learning_rate": 1.897473363774734e-05,
"loss": 1.2437,
"step": 17275
},
{
"epoch": 5.27,
"learning_rate": 1.8944292237442924e-05,
"loss": 1.2521,
"step": 17300
},
{
"epoch": 5.27,
"learning_rate": 1.8915068493150687e-05,
"loss": 1.1999,
"step": 17325
},
{
"epoch": 5.28,
"learning_rate": 1.8884627092846272e-05,
"loss": 1.1996,
"step": 17350
},
{
"epoch": 5.29,
"learning_rate": 1.8854185692541858e-05,
"loss": 1.245,
"step": 17375
},
{
"epoch": 5.3,
"learning_rate": 1.8823744292237444e-05,
"loss": 1.2139,
"step": 17400
},
{
"epoch": 5.3,
"learning_rate": 1.879330289193303e-05,
"loss": 1.196,
"step": 17425
},
{
"epoch": 5.31,
"learning_rate": 1.8762861491628615e-05,
"loss": 1.267,
"step": 17450
},
{
"epoch": 5.32,
"learning_rate": 1.87324200913242e-05,
"loss": 1.2152,
"step": 17475
},
{
"epoch": 5.33,
"learning_rate": 1.870197869101979e-05,
"loss": 1.2098,
"step": 17500
},
{
"epoch": 5.33,
"learning_rate": 1.8671537290715372e-05,
"loss": 1.2526,
"step": 17525
},
{
"epoch": 5.34,
"learning_rate": 1.864109589041096e-05,
"loss": 1.1557,
"step": 17550
},
{
"epoch": 5.35,
"learning_rate": 1.8610654490106547e-05,
"loss": 1.2998,
"step": 17575
},
{
"epoch": 5.36,
"learning_rate": 1.8580213089802133e-05,
"loss": 1.192,
"step": 17600
},
{
"epoch": 5.37,
"learning_rate": 1.8549771689497718e-05,
"loss": 1.1579,
"step": 17625
},
{
"epoch": 5.37,
"learning_rate": 1.8519330289193304e-05,
"loss": 1.2424,
"step": 17650
},
{
"epoch": 5.38,
"learning_rate": 1.848888888888889e-05,
"loss": 1.2178,
"step": 17675
},
{
"epoch": 5.39,
"learning_rate": 1.8458447488584475e-05,
"loss": 1.2272,
"step": 17700
},
{
"epoch": 5.4,
"learning_rate": 1.842800608828006e-05,
"loss": 1.2794,
"step": 17725
},
{
"epoch": 5.4,
"learning_rate": 1.839756468797565e-05,
"loss": 1.1844,
"step": 17750
},
{
"epoch": 5.41,
"learning_rate": 1.8367123287671232e-05,
"loss": 1.2341,
"step": 17775
},
{
"epoch": 5.42,
"learning_rate": 1.833668188736682e-05,
"loss": 1.1533,
"step": 17800
},
{
"epoch": 5.43,
"learning_rate": 1.8306240487062407e-05,
"loss": 1.2264,
"step": 17825
},
{
"epoch": 5.43,
"learning_rate": 1.8275799086757993e-05,
"loss": 1.222,
"step": 17850
},
{
"epoch": 5.44,
"learning_rate": 1.824535768645358e-05,
"loss": 1.1864,
"step": 17875
},
{
"epoch": 5.45,
"learning_rate": 1.8214916286149164e-05,
"loss": 1.1615,
"step": 17900
},
{
"epoch": 5.46,
"learning_rate": 1.818447488584475e-05,
"loss": 1.2101,
"step": 17925
},
{
"epoch": 5.46,
"learning_rate": 1.8154033485540335e-05,
"loss": 1.2291,
"step": 17950
},
{
"epoch": 5.47,
"learning_rate": 1.812359208523592e-05,
"loss": 1.1934,
"step": 17975
},
{
"epoch": 5.48,
"learning_rate": 1.809315068493151e-05,
"loss": 1.1713,
"step": 18000
},
{
"epoch": 5.49,
"learning_rate": 1.8062709284627092e-05,
"loss": 1.1374,
"step": 18025
},
{
"epoch": 5.49,
"learning_rate": 1.803226788432268e-05,
"loss": 1.1841,
"step": 18050
},
{
"epoch": 5.5,
"learning_rate": 1.8001826484018264e-05,
"loss": 1.2757,
"step": 18075
},
{
"epoch": 5.51,
"learning_rate": 1.7971385083713853e-05,
"loss": 1.225,
"step": 18100
},
{
"epoch": 5.52,
"learning_rate": 1.794094368340944e-05,
"loss": 1.1889,
"step": 18125
},
{
"epoch": 5.53,
"learning_rate": 1.7910502283105024e-05,
"loss": 1.2067,
"step": 18150
},
{
"epoch": 5.53,
"learning_rate": 1.788006088280061e-05,
"loss": 1.2683,
"step": 18175
},
{
"epoch": 5.54,
"learning_rate": 1.7849619482496196e-05,
"loss": 1.194,
"step": 18200
},
{
"epoch": 5.55,
"learning_rate": 1.781917808219178e-05,
"loss": 1.2623,
"step": 18225
},
{
"epoch": 5.56,
"learning_rate": 1.7788736681887367e-05,
"loss": 1.2462,
"step": 18250
},
{
"epoch": 5.56,
"learning_rate": 1.7758295281582953e-05,
"loss": 1.2488,
"step": 18275
},
{
"epoch": 5.57,
"learning_rate": 1.7727853881278542e-05,
"loss": 1.1575,
"step": 18300
},
{
"epoch": 5.58,
"learning_rate": 1.7697412480974124e-05,
"loss": 1.3261,
"step": 18325
},
{
"epoch": 5.59,
"learning_rate": 1.7666971080669713e-05,
"loss": 1.1545,
"step": 18350
},
{
"epoch": 5.59,
"learning_rate": 1.76365296803653e-05,
"loss": 1.2144,
"step": 18375
},
{
"epoch": 5.6,
"learning_rate": 1.7606088280060884e-05,
"loss": 1.2061,
"step": 18400
},
{
"epoch": 5.61,
"learning_rate": 1.757564687975647e-05,
"loss": 1.2538,
"step": 18425
},
{
"epoch": 5.62,
"learning_rate": 1.7545205479452056e-05,
"loss": 1.238,
"step": 18450
},
{
"epoch": 5.62,
"learning_rate": 1.751476407914764e-05,
"loss": 1.1711,
"step": 18475
},
{
"epoch": 5.63,
"learning_rate": 1.7484322678843227e-05,
"loss": 1.2815,
"step": 18500
},
{
"epoch": 5.64,
"learning_rate": 1.7453881278538813e-05,
"loss": 1.2561,
"step": 18525
},
{
"epoch": 5.65,
"learning_rate": 1.7423439878234402e-05,
"loss": 1.2703,
"step": 18550
},
{
"epoch": 5.65,
"learning_rate": 1.7392998477929984e-05,
"loss": 1.2548,
"step": 18575
},
{
"epoch": 5.66,
"learning_rate": 1.7362557077625573e-05,
"loss": 1.213,
"step": 18600
},
{
"epoch": 5.67,
"learning_rate": 1.733211567732116e-05,
"loss": 1.2203,
"step": 18625
},
{
"epoch": 5.68,
"learning_rate": 1.7301674277016745e-05,
"loss": 1.2282,
"step": 18650
},
{
"epoch": 5.68,
"learning_rate": 1.727123287671233e-05,
"loss": 1.1951,
"step": 18675
},
{
"epoch": 5.69,
"learning_rate": 1.7240791476407916e-05,
"loss": 1.2652,
"step": 18700
},
{
"epoch": 5.7,
"learning_rate": 1.72103500761035e-05,
"loss": 1.1692,
"step": 18725
},
{
"epoch": 5.71,
"learning_rate": 1.7179908675799087e-05,
"loss": 1.2708,
"step": 18750
},
{
"epoch": 5.72,
"learning_rate": 1.7149467275494673e-05,
"loss": 1.1725,
"step": 18775
},
{
"epoch": 5.72,
"learning_rate": 1.7119025875190262e-05,
"loss": 1.1644,
"step": 18800
},
{
"epoch": 5.73,
"learning_rate": 1.7088584474885844e-05,
"loss": 1.2263,
"step": 18825
},
{
"epoch": 5.74,
"learning_rate": 1.7058143074581433e-05,
"loss": 1.2848,
"step": 18850
},
{
"epoch": 5.75,
"learning_rate": 1.7027701674277016e-05,
"loss": 1.2118,
"step": 18875
},
{
"epoch": 5.75,
"learning_rate": 1.6997260273972605e-05,
"loss": 1.2332,
"step": 18900
},
{
"epoch": 5.76,
"learning_rate": 1.696681887366819e-05,
"loss": 1.1837,
"step": 18925
},
{
"epoch": 5.77,
"learning_rate": 1.6936377473363776e-05,
"loss": 1.2719,
"step": 18950
},
{
"epoch": 5.78,
"learning_rate": 1.6905936073059362e-05,
"loss": 1.1825,
"step": 18975
},
{
"epoch": 5.78,
"learning_rate": 1.6875494672754948e-05,
"loss": 1.2161,
"step": 19000
},
{
"epoch": 5.79,
"learning_rate": 1.6845053272450533e-05,
"loss": 1.2578,
"step": 19025
},
{
"epoch": 5.8,
"learning_rate": 1.681461187214612e-05,
"loss": 1.2,
"step": 19050
},
{
"epoch": 5.81,
"learning_rate": 1.6784170471841705e-05,
"loss": 1.2705,
"step": 19075
},
{
"epoch": 5.81,
"learning_rate": 1.6753729071537294e-05,
"loss": 1.2378,
"step": 19100
},
{
"epoch": 5.82,
"learning_rate": 1.6723287671232876e-05,
"loss": 1.2475,
"step": 19125
},
{
"epoch": 5.83,
"learning_rate": 1.6692846270928465e-05,
"loss": 1.2566,
"step": 19150
},
{
"epoch": 5.84,
"learning_rate": 1.666240487062405e-05,
"loss": 1.3268,
"step": 19175
},
{
"epoch": 5.84,
"learning_rate": 1.6631963470319636e-05,
"loss": 1.244,
"step": 19200
},
{
"epoch": 5.85,
"learning_rate": 1.6601522070015222e-05,
"loss": 1.2251,
"step": 19225
},
{
"epoch": 5.86,
"learning_rate": 1.6571080669710808e-05,
"loss": 1.1907,
"step": 19250
},
{
"epoch": 5.87,
"learning_rate": 1.6540639269406393e-05,
"loss": 1.1661,
"step": 19275
},
{
"epoch": 5.88,
"learning_rate": 1.651019786910198e-05,
"loss": 1.1985,
"step": 19300
},
{
"epoch": 5.88,
"learning_rate": 1.6479756468797565e-05,
"loss": 1.1985,
"step": 19325
},
{
"epoch": 5.89,
"learning_rate": 1.6449315068493154e-05,
"loss": 1.2457,
"step": 19350
},
{
"epoch": 5.9,
"learning_rate": 1.6418873668188736e-05,
"loss": 1.1869,
"step": 19375
},
{
"epoch": 5.91,
"learning_rate": 1.6388432267884325e-05,
"loss": 1.1581,
"step": 19400
},
{
"epoch": 5.91,
"learning_rate": 1.635799086757991e-05,
"loss": 1.2958,
"step": 19425
},
{
"epoch": 5.92,
"learning_rate": 1.6327549467275497e-05,
"loss": 1.1531,
"step": 19450
},
{
"epoch": 5.93,
"learning_rate": 1.6297108066971082e-05,
"loss": 1.2319,
"step": 19475
},
{
"epoch": 5.94,
"learning_rate": 1.6266666666666668e-05,
"loss": 1.2317,
"step": 19500
},
{
"epoch": 5.94,
"learning_rate": 1.6236225266362254e-05,
"loss": 1.1822,
"step": 19525
},
{
"epoch": 5.95,
"learning_rate": 1.620578386605784e-05,
"loss": 1.2323,
"step": 19550
},
{
"epoch": 5.96,
"learning_rate": 1.6175342465753425e-05,
"loss": 1.2421,
"step": 19575
},
{
"epoch": 5.97,
"learning_rate": 1.6144901065449014e-05,
"loss": 1.1909,
"step": 19600
},
{
"epoch": 5.97,
"learning_rate": 1.6114459665144596e-05,
"loss": 1.1956,
"step": 19625
},
{
"epoch": 5.98,
"learning_rate": 1.6084018264840185e-05,
"loss": 1.2423,
"step": 19650
},
{
"epoch": 5.99,
"learning_rate": 1.6053576864535768e-05,
"loss": 1.1934,
"step": 19675
},
{
"epoch": 6.0,
"learning_rate": 1.6023135464231357e-05,
"loss": 1.2855,
"step": 19700
},
{
"epoch": 6.0,
"learning_rate": 1.5992694063926942e-05,
"loss": 1.1549,
"step": 19725
},
{
"epoch": 6.01,
"learning_rate": 1.5962252663622528e-05,
"loss": 1.0158,
"step": 19750
},
{
"epoch": 6.02,
"learning_rate": 1.5931811263318114e-05,
"loss": 1.1329,
"step": 19775
},
{
"epoch": 6.03,
"learning_rate": 1.59013698630137e-05,
"loss": 1.0144,
"step": 19800
},
{
"epoch": 6.04,
"learning_rate": 1.5870928462709285e-05,
"loss": 1.0526,
"step": 19825
},
{
"epoch": 6.04,
"learning_rate": 1.584048706240487e-05,
"loss": 1.0858,
"step": 19850
},
{
"epoch": 6.05,
"learning_rate": 1.5810045662100456e-05,
"loss": 1.0668,
"step": 19875
},
{
"epoch": 6.06,
"learning_rate": 1.5779604261796046e-05,
"loss": 1.0202,
"step": 19900
},
{
"epoch": 6.07,
"learning_rate": 1.5749162861491628e-05,
"loss": 1.0321,
"step": 19925
},
{
"epoch": 6.07,
"learning_rate": 1.5718721461187217e-05,
"loss": 1.0682,
"step": 19950
},
{
"epoch": 6.08,
"learning_rate": 1.5688280060882803e-05,
"loss": 1.0644,
"step": 19975
},
{
"epoch": 6.09,
"learning_rate": 1.5657838660578388e-05,
"loss": 1.1266,
"step": 20000
},
{
"epoch": 6.1,
"learning_rate": 1.5627397260273974e-05,
"loss": 1.0861,
"step": 20025
},
{
"epoch": 6.1,
"learning_rate": 1.559695585996956e-05,
"loss": 1.0446,
"step": 20050
},
{
"epoch": 6.11,
"learning_rate": 1.5566514459665145e-05,
"loss": 1.0408,
"step": 20075
},
{
"epoch": 6.12,
"learning_rate": 1.553607305936073e-05,
"loss": 1.0181,
"step": 20100
},
{
"epoch": 6.13,
"learning_rate": 1.5505631659056317e-05,
"loss": 1.0721,
"step": 20125
},
{
"epoch": 6.13,
"learning_rate": 1.5475190258751906e-05,
"loss": 1.08,
"step": 20150
},
{
"epoch": 6.14,
"learning_rate": 1.5444748858447488e-05,
"loss": 1.1263,
"step": 20175
},
{
"epoch": 6.15,
"learning_rate": 1.5414307458143077e-05,
"loss": 1.0331,
"step": 20200
},
{
"epoch": 6.16,
"learning_rate": 1.5383866057838663e-05,
"loss": 1.0047,
"step": 20225
},
{
"epoch": 6.16,
"learning_rate": 1.535342465753425e-05,
"loss": 1.0146,
"step": 20250
},
{
"epoch": 6.17,
"learning_rate": 1.5322983257229834e-05,
"loss": 1.0828,
"step": 20275
},
{
"epoch": 6.18,
"learning_rate": 1.529254185692542e-05,
"loss": 1.0656,
"step": 20300
},
{
"epoch": 6.19,
"learning_rate": 1.5262100456621006e-05,
"loss": 1.0661,
"step": 20325
},
{
"epoch": 6.19,
"learning_rate": 1.5231659056316593e-05,
"loss": 1.0858,
"step": 20350
},
{
"epoch": 6.2,
"learning_rate": 1.5201217656012177e-05,
"loss": 1.0975,
"step": 20375
},
{
"epoch": 6.21,
"learning_rate": 1.5170776255707764e-05,
"loss": 1.0874,
"step": 20400
},
{
"epoch": 6.22,
"learning_rate": 1.514033485540335e-05,
"loss": 1.061,
"step": 20425
},
{
"epoch": 6.23,
"learning_rate": 1.5109893455098936e-05,
"loss": 1.0563,
"step": 20450
},
{
"epoch": 6.23,
"learning_rate": 1.5079452054794521e-05,
"loss": 1.0779,
"step": 20475
},
{
"epoch": 6.24,
"learning_rate": 1.5049010654490109e-05,
"loss": 1.0346,
"step": 20500
},
{
"epoch": 6.25,
"learning_rate": 1.5018569254185693e-05,
"loss": 1.0741,
"step": 20525
},
{
"epoch": 6.26,
"learning_rate": 1.498812785388128e-05,
"loss": 1.0446,
"step": 20550
},
{
"epoch": 6.26,
"learning_rate": 1.4957686453576866e-05,
"loss": 1.0447,
"step": 20575
},
{
"epoch": 6.27,
"learning_rate": 1.4927245053272451e-05,
"loss": 1.0887,
"step": 20600
},
{
"epoch": 6.28,
"learning_rate": 1.4896803652968037e-05,
"loss": 1.0644,
"step": 20625
},
{
"epoch": 6.29,
"learning_rate": 1.4866362252663624e-05,
"loss": 1.0262,
"step": 20650
},
{
"epoch": 6.29,
"learning_rate": 1.4835920852359208e-05,
"loss": 1.0814,
"step": 20675
},
{
"epoch": 6.3,
"learning_rate": 1.4805479452054796e-05,
"loss": 1.0796,
"step": 20700
},
{
"epoch": 6.31,
"learning_rate": 1.4775038051750381e-05,
"loss": 1.0907,
"step": 20725
},
{
"epoch": 6.32,
"learning_rate": 1.4744596651445969e-05,
"loss": 1.0584,
"step": 20750
},
{
"epoch": 6.32,
"learning_rate": 1.4714155251141553e-05,
"loss": 1.0504,
"step": 20775
},
{
"epoch": 6.33,
"learning_rate": 1.468371385083714e-05,
"loss": 1.0383,
"step": 20800
},
{
"epoch": 6.34,
"learning_rate": 1.4653272450532726e-05,
"loss": 1.0387,
"step": 20825
},
{
"epoch": 6.35,
"learning_rate": 1.4622831050228312e-05,
"loss": 1.0644,
"step": 20850
},
{
"epoch": 6.35,
"learning_rate": 1.4592389649923897e-05,
"loss": 1.008,
"step": 20875
},
{
"epoch": 6.36,
"learning_rate": 1.4561948249619485e-05,
"loss": 1.086,
"step": 20900
},
{
"epoch": 6.37,
"learning_rate": 1.4531506849315069e-05,
"loss": 1.0496,
"step": 20925
},
{
"epoch": 6.38,
"learning_rate": 1.4501065449010656e-05,
"loss": 0.9918,
"step": 20950
},
{
"epoch": 6.39,
"learning_rate": 1.4470624048706242e-05,
"loss": 1.0559,
"step": 20975
},
{
"epoch": 6.39,
"learning_rate": 1.4440182648401827e-05,
"loss": 1.0467,
"step": 21000
},
{
"epoch": 6.4,
"learning_rate": 1.4409741248097413e-05,
"loss": 1.0557,
"step": 21025
},
{
"epoch": 6.41,
"learning_rate": 1.4379299847793e-05,
"loss": 1.0846,
"step": 21050
},
{
"epoch": 6.42,
"learning_rate": 1.4348858447488584e-05,
"loss": 1.0836,
"step": 21075
},
{
"epoch": 6.42,
"learning_rate": 1.4318417047184172e-05,
"loss": 1.082,
"step": 21100
},
{
"epoch": 6.43,
"learning_rate": 1.4287975646879757e-05,
"loss": 1.0596,
"step": 21125
},
{
"epoch": 6.44,
"learning_rate": 1.4257534246575345e-05,
"loss": 1.0321,
"step": 21150
},
{
"epoch": 6.45,
"learning_rate": 1.4227092846270929e-05,
"loss": 1.0805,
"step": 21175
},
{
"epoch": 6.45,
"learning_rate": 1.4196651445966516e-05,
"loss": 1.1013,
"step": 21200
},
{
"epoch": 6.46,
"learning_rate": 1.4166210045662102e-05,
"loss": 1.0287,
"step": 21225
},
{
"epoch": 6.47,
"learning_rate": 1.4135768645357688e-05,
"loss": 1.0995,
"step": 21250
},
{
"epoch": 6.48,
"learning_rate": 1.4105327245053273e-05,
"loss": 1.0834,
"step": 21275
},
{
"epoch": 6.48,
"learning_rate": 1.407488584474886e-05,
"loss": 1.0702,
"step": 21300
},
{
"epoch": 6.49,
"learning_rate": 1.4045662100456623e-05,
"loss": 1.0709,
"step": 21325
},
{
"epoch": 6.5,
"learning_rate": 1.4015220700152209e-05,
"loss": 1.126,
"step": 21350
},
{
"epoch": 6.51,
"learning_rate": 1.3984779299847794e-05,
"loss": 1.0661,
"step": 21375
},
{
"epoch": 6.51,
"learning_rate": 1.395433789954338e-05,
"loss": 1.0154,
"step": 21400
},
{
"epoch": 6.52,
"learning_rate": 1.3923896499238967e-05,
"loss": 1.0203,
"step": 21425
},
{
"epoch": 6.53,
"learning_rate": 1.3893455098934551e-05,
"loss": 1.0997,
"step": 21450
},
{
"epoch": 6.54,
"learning_rate": 1.3863013698630139e-05,
"loss": 1.1286,
"step": 21475
},
{
"epoch": 6.54,
"learning_rate": 1.3832572298325724e-05,
"loss": 1.0367,
"step": 21500
},
{
"epoch": 6.55,
"learning_rate": 1.380213089802131e-05,
"loss": 1.0712,
"step": 21525
},
{
"epoch": 6.56,
"learning_rate": 1.3771689497716896e-05,
"loss": 1.0636,
"step": 21550
},
{
"epoch": 6.57,
"learning_rate": 1.3741248097412483e-05,
"loss": 1.056,
"step": 21575
},
{
"epoch": 6.58,
"learning_rate": 1.3710806697108067e-05,
"loss": 1.107,
"step": 21600
},
{
"epoch": 6.58,
"learning_rate": 1.3680365296803655e-05,
"loss": 1.1084,
"step": 21625
},
{
"epoch": 6.59,
"learning_rate": 1.364992389649924e-05,
"loss": 1.0469,
"step": 21650
},
{
"epoch": 6.6,
"learning_rate": 1.3619482496194828e-05,
"loss": 1.0499,
"step": 21675
},
{
"epoch": 6.61,
"learning_rate": 1.3589041095890412e-05,
"loss": 1.0193,
"step": 21700
},
{
"epoch": 6.61,
"learning_rate": 1.3558599695585999e-05,
"loss": 1.0213,
"step": 21725
},
{
"epoch": 6.62,
"learning_rate": 1.3528158295281583e-05,
"loss": 1.0763,
"step": 21750
},
{
"epoch": 6.63,
"learning_rate": 1.349771689497717e-05,
"loss": 1.0602,
"step": 21775
},
{
"epoch": 6.64,
"learning_rate": 1.3467275494672756e-05,
"loss": 1.0075,
"step": 21800
},
{
"epoch": 6.64,
"learning_rate": 1.3436834094368343e-05,
"loss": 1.0394,
"step": 21825
},
{
"epoch": 6.65,
"learning_rate": 1.3406392694063927e-05,
"loss": 1.1018,
"step": 21850
},
{
"epoch": 6.66,
"learning_rate": 1.3375951293759515e-05,
"loss": 1.1407,
"step": 21875
},
{
"epoch": 6.67,
"learning_rate": 1.33455098934551e-05,
"loss": 1.0651,
"step": 21900
},
{
"epoch": 6.67,
"learning_rate": 1.3316286149162863e-05,
"loss": 1.1539,
"step": 21925
},
{
"epoch": 6.68,
"learning_rate": 1.328584474885845e-05,
"loss": 1.1164,
"step": 21950
},
{
"epoch": 6.69,
"learning_rate": 1.3255403348554034e-05,
"loss": 1.0859,
"step": 21975
},
{
"epoch": 6.7,
"learning_rate": 1.3224961948249621e-05,
"loss": 1.0339,
"step": 22000
},
{
"epoch": 6.7,
"learning_rate": 1.3194520547945207e-05,
"loss": 1.0881,
"step": 22025
},
{
"epoch": 6.71,
"learning_rate": 1.3164079147640793e-05,
"loss": 1.0617,
"step": 22050
},
{
"epoch": 6.72,
"learning_rate": 1.3133637747336379e-05,
"loss": 1.0946,
"step": 22075
},
{
"epoch": 6.73,
"learning_rate": 1.3103196347031966e-05,
"loss": 1.0516,
"step": 22100
},
{
"epoch": 6.74,
"learning_rate": 1.307275494672755e-05,
"loss": 1.0097,
"step": 22125
},
{
"epoch": 6.74,
"learning_rate": 1.3042313546423137e-05,
"loss": 0.9982,
"step": 22150
},
{
"epoch": 6.75,
"learning_rate": 1.3011872146118723e-05,
"loss": 1.0149,
"step": 22175
},
{
"epoch": 6.76,
"learning_rate": 1.298143074581431e-05,
"loss": 1.0674,
"step": 22200
},
{
"epoch": 6.77,
"learning_rate": 1.2950989345509894e-05,
"loss": 1.0485,
"step": 22225
},
{
"epoch": 6.77,
"learning_rate": 1.2920547945205482e-05,
"loss": 1.0641,
"step": 22250
},
{
"epoch": 6.78,
"learning_rate": 1.2890106544901066e-05,
"loss": 1.0306,
"step": 22275
},
{
"epoch": 6.79,
"learning_rate": 1.2859665144596653e-05,
"loss": 1.0526,
"step": 22300
},
{
"epoch": 6.8,
"learning_rate": 1.2829223744292239e-05,
"loss": 1.1053,
"step": 22325
},
{
"epoch": 6.8,
"learning_rate": 1.2798782343987823e-05,
"loss": 1.069,
"step": 22350
},
{
"epoch": 6.81,
"learning_rate": 1.276834094368341e-05,
"loss": 1.0654,
"step": 22375
},
{
"epoch": 6.82,
"learning_rate": 1.2737899543378996e-05,
"loss": 1.0341,
"step": 22400
},
{
"epoch": 6.83,
"learning_rate": 1.2707458143074583e-05,
"loss": 1.0856,
"step": 22425
},
{
"epoch": 6.83,
"learning_rate": 1.2677016742770167e-05,
"loss": 1.0456,
"step": 22450
},
{
"epoch": 6.84,
"learning_rate": 1.2646575342465755e-05,
"loss": 1.0782,
"step": 22475
},
{
"epoch": 6.85,
"learning_rate": 1.261613394216134e-05,
"loss": 1.0911,
"step": 22500
},
{
"epoch": 6.86,
"learning_rate": 1.2585692541856926e-05,
"loss": 1.0151,
"step": 22525
},
{
"epoch": 6.86,
"learning_rate": 1.2555251141552512e-05,
"loss": 1.1133,
"step": 22550
},
{
"epoch": 6.87,
"learning_rate": 1.2524809741248099e-05,
"loss": 1.098,
"step": 22575
},
{
"epoch": 6.88,
"learning_rate": 1.2494368340943683e-05,
"loss": 1.0943,
"step": 22600
},
{
"epoch": 6.89,
"learning_rate": 1.246392694063927e-05,
"loss": 1.0622,
"step": 22625
},
{
"epoch": 6.89,
"learning_rate": 1.2433485540334856e-05,
"loss": 1.0801,
"step": 22650
},
{
"epoch": 6.9,
"learning_rate": 1.2403044140030442e-05,
"loss": 1.0496,
"step": 22675
},
{
"epoch": 6.91,
"learning_rate": 1.2372602739726027e-05,
"loss": 1.1017,
"step": 22700
},
{
"epoch": 6.92,
"learning_rate": 1.2342161339421615e-05,
"loss": 1.045,
"step": 22725
},
{
"epoch": 6.93,
"learning_rate": 1.2311719939117199e-05,
"loss": 1.1071,
"step": 22750
},
{
"epoch": 6.93,
"learning_rate": 1.2281278538812786e-05,
"loss": 1.0852,
"step": 22775
},
{
"epoch": 6.94,
"learning_rate": 1.2250837138508372e-05,
"loss": 1.1576,
"step": 22800
},
{
"epoch": 6.95,
"learning_rate": 1.2220395738203959e-05,
"loss": 1.079,
"step": 22825
},
{
"epoch": 6.96,
"learning_rate": 1.2189954337899543e-05,
"loss": 1.1521,
"step": 22850
},
{
"epoch": 6.96,
"learning_rate": 1.215951293759513e-05,
"loss": 1.0449,
"step": 22875
},
{
"epoch": 6.97,
"learning_rate": 1.2129071537290714e-05,
"loss": 1.1417,
"step": 22900
},
{
"epoch": 6.98,
"learning_rate": 1.2098630136986302e-05,
"loss": 1.0472,
"step": 22925
},
{
"epoch": 6.99,
"learning_rate": 1.2068188736681888e-05,
"loss": 1.0093,
"step": 22950
},
{
"epoch": 6.99,
"learning_rate": 1.2037747336377475e-05,
"loss": 1.0782,
"step": 22975
},
{
"epoch": 7.0,
"learning_rate": 1.2007305936073059e-05,
"loss": 1.0471,
"step": 23000
},
{
"epoch": 7.01,
"learning_rate": 1.1976864535768646e-05,
"loss": 0.9486,
"step": 23025
},
{
"epoch": 7.02,
"learning_rate": 1.1946423135464232e-05,
"loss": 0.974,
"step": 23050
},
{
"epoch": 7.02,
"learning_rate": 1.1915981735159818e-05,
"loss": 0.9259,
"step": 23075
},
{
"epoch": 7.03,
"learning_rate": 1.1885540334855403e-05,
"loss": 0.928,
"step": 23100
},
{
"epoch": 7.04,
"learning_rate": 1.185509893455099e-05,
"loss": 0.9293,
"step": 23125
},
{
"epoch": 7.05,
"learning_rate": 1.1824657534246575e-05,
"loss": 0.9387,
"step": 23150
},
{
"epoch": 7.05,
"learning_rate": 1.1794216133942162e-05,
"loss": 0.8687,
"step": 23175
},
{
"epoch": 7.06,
"learning_rate": 1.1763774733637748e-05,
"loss": 0.9467,
"step": 23200
},
{
"epoch": 7.07,
"learning_rate": 1.1733333333333335e-05,
"loss": 0.8817,
"step": 23225
},
{
"epoch": 7.08,
"learning_rate": 1.1702891933028919e-05,
"loss": 0.9304,
"step": 23250
},
{
"epoch": 7.09,
"learning_rate": 1.1672450532724506e-05,
"loss": 0.9319,
"step": 23275
},
{
"epoch": 7.09,
"learning_rate": 1.164200913242009e-05,
"loss": 0.9488,
"step": 23300
},
{
"epoch": 7.1,
"learning_rate": 1.1611567732115678e-05,
"loss": 0.9054,
"step": 23325
},
{
"epoch": 7.11,
"learning_rate": 1.1581126331811263e-05,
"loss": 0.9246,
"step": 23350
},
{
"epoch": 7.12,
"learning_rate": 1.155068493150685e-05,
"loss": 0.898,
"step": 23375
},
{
"epoch": 7.12,
"learning_rate": 1.1520243531202435e-05,
"loss": 0.947,
"step": 23400
},
{
"epoch": 7.13,
"learning_rate": 1.1489802130898022e-05,
"loss": 0.9288,
"step": 23425
},
{
"epoch": 7.14,
"learning_rate": 1.1459360730593608e-05,
"loss": 0.9544,
"step": 23450
},
{
"epoch": 7.15,
"learning_rate": 1.1428919330289194e-05,
"loss": 0.9711,
"step": 23475
},
{
"epoch": 7.15,
"learning_rate": 1.139847792998478e-05,
"loss": 0.9133,
"step": 23500
},
{
"epoch": 7.16,
"learning_rate": 1.1368036529680367e-05,
"loss": 0.9182,
"step": 23525
},
{
"epoch": 7.17,
"learning_rate": 1.133759512937595e-05,
"loss": 0.8882,
"step": 23550
},
{
"epoch": 7.18,
"learning_rate": 1.1307153729071538e-05,
"loss": 0.9431,
"step": 23575
},
{
"epoch": 7.18,
"learning_rate": 1.1276712328767124e-05,
"loss": 0.9347,
"step": 23600
},
{
"epoch": 7.19,
"learning_rate": 1.1246270928462711e-05,
"loss": 0.9397,
"step": 23625
},
{
"epoch": 7.2,
"learning_rate": 1.1215829528158295e-05,
"loss": 0.8936,
"step": 23650
},
{
"epoch": 7.21,
"learning_rate": 1.1185388127853882e-05,
"loss": 0.9422,
"step": 23675
},
{
"epoch": 7.21,
"learning_rate": 1.1154946727549466e-05,
"loss": 0.994,
"step": 23700
},
{
"epoch": 7.22,
"learning_rate": 1.1124505327245054e-05,
"loss": 0.9458,
"step": 23725
},
{
"epoch": 7.23,
"learning_rate": 1.109406392694064e-05,
"loss": 0.9833,
"step": 23750
},
{
"epoch": 7.24,
"learning_rate": 1.1063622526636227e-05,
"loss": 0.9156,
"step": 23775
},
{
"epoch": 7.25,
"learning_rate": 1.103318112633181e-05,
"loss": 0.952,
"step": 23800
},
{
"epoch": 7.25,
"learning_rate": 1.1002739726027398e-05,
"loss": 0.9329,
"step": 23825
},
{
"epoch": 7.26,
"learning_rate": 1.0972298325722984e-05,
"loss": 0.969,
"step": 23850
},
{
"epoch": 7.27,
"learning_rate": 1.094185692541857e-05,
"loss": 0.9036,
"step": 23875
},
{
"epoch": 7.28,
"learning_rate": 1.0911415525114155e-05,
"loss": 0.893,
"step": 23900
},
{
"epoch": 7.28,
"learning_rate": 1.0880974124809743e-05,
"loss": 1.0085,
"step": 23925
},
{
"epoch": 7.29,
"learning_rate": 1.0850532724505327e-05,
"loss": 0.92,
"step": 23950
},
{
"epoch": 7.3,
"learning_rate": 1.0820091324200914e-05,
"loss": 0.928,
"step": 23975
},
{
"epoch": 7.31,
"learning_rate": 1.07896499238965e-05,
"loss": 0.9017,
"step": 24000
},
{
"epoch": 7.31,
"learning_rate": 1.0759208523592087e-05,
"loss": 0.9032,
"step": 24025
},
{
"epoch": 7.32,
"learning_rate": 1.0728767123287671e-05,
"loss": 0.9073,
"step": 24050
},
{
"epoch": 7.33,
"learning_rate": 1.0698325722983258e-05,
"loss": 0.9447,
"step": 24075
},
{
"epoch": 7.34,
"learning_rate": 1.0667884322678842e-05,
"loss": 0.9341,
"step": 24100
},
{
"epoch": 7.34,
"learning_rate": 1.0638660578386606e-05,
"loss": 0.9369,
"step": 24125
},
{
"epoch": 7.35,
"learning_rate": 1.0608219178082194e-05,
"loss": 0.9721,
"step": 24150
},
{
"epoch": 7.36,
"learning_rate": 1.0577777777777778e-05,
"loss": 0.9367,
"step": 24175
},
{
"epoch": 7.37,
"learning_rate": 1.0547336377473365e-05,
"loss": 0.9746,
"step": 24200
},
{
"epoch": 7.37,
"learning_rate": 1.0516894977168949e-05,
"loss": 0.914,
"step": 24225
},
{
"epoch": 7.38,
"learning_rate": 1.0486453576864537e-05,
"loss": 0.8861,
"step": 24250
},
{
"epoch": 7.39,
"learning_rate": 1.0456012176560122e-05,
"loss": 0.9416,
"step": 24275
},
{
"epoch": 7.4,
"learning_rate": 1.042557077625571e-05,
"loss": 0.9378,
"step": 24300
},
{
"epoch": 7.4,
"learning_rate": 1.0395129375951294e-05,
"loss": 0.9149,
"step": 24325
},
{
"epoch": 7.41,
"learning_rate": 1.0364687975646881e-05,
"loss": 0.9661,
"step": 24350
},
{
"epoch": 7.42,
"learning_rate": 1.0334246575342467e-05,
"loss": 0.9946,
"step": 24375
},
{
"epoch": 7.43,
"learning_rate": 1.0303805175038052e-05,
"loss": 0.9153,
"step": 24400
},
{
"epoch": 7.44,
"learning_rate": 1.0273363774733638e-05,
"loss": 1.0122,
"step": 24425
},
{
"epoch": 7.44,
"learning_rate": 1.0242922374429225e-05,
"loss": 0.9563,
"step": 24450
},
{
"epoch": 7.45,
"learning_rate": 1.021248097412481e-05,
"loss": 0.9451,
"step": 24475
},
{
"epoch": 7.46,
"learning_rate": 1.0182039573820397e-05,
"loss": 0.9543,
"step": 24500
},
{
"epoch": 7.47,
"learning_rate": 1.0151598173515982e-05,
"loss": 0.9336,
"step": 24525
},
{
"epoch": 7.47,
"learning_rate": 1.012115677321157e-05,
"loss": 0.9474,
"step": 24550
},
{
"epoch": 7.48,
"learning_rate": 1.0090715372907154e-05,
"loss": 0.9503,
"step": 24575
},
{
"epoch": 7.49,
"learning_rate": 1.0060273972602741e-05,
"loss": 0.9395,
"step": 24600
},
{
"epoch": 7.5,
"learning_rate": 1.0029832572298325e-05,
"loss": 0.9076,
"step": 24625
},
{
"epoch": 7.5,
"learning_rate": 9.999391171993912e-06,
"loss": 0.9026,
"step": 24650
},
{
"epoch": 7.51,
"learning_rate": 9.968949771689498e-06,
"loss": 0.9039,
"step": 24675
},
{
"epoch": 7.52,
"learning_rate": 9.938508371385086e-06,
"loss": 0.9744,
"step": 24700
},
{
"epoch": 7.53,
"learning_rate": 9.908066971080671e-06,
"loss": 0.9818,
"step": 24725
},
{
"epoch": 7.53,
"learning_rate": 9.877625570776257e-06,
"loss": 0.8859,
"step": 24750
},
{
"epoch": 7.54,
"learning_rate": 9.847184170471843e-06,
"loss": 0.9602,
"step": 24775
},
{
"epoch": 7.55,
"learning_rate": 9.816742770167428e-06,
"loss": 0.9405,
"step": 24800
},
{
"epoch": 7.56,
"learning_rate": 9.786301369863016e-06,
"loss": 0.9669,
"step": 24825
},
{
"epoch": 7.56,
"learning_rate": 9.755859969558601e-06,
"loss": 0.9106,
"step": 24850
},
{
"epoch": 7.57,
"learning_rate": 9.725418569254187e-06,
"loss": 0.9118,
"step": 24875
},
{
"epoch": 7.58,
"learning_rate": 9.694977168949773e-06,
"loss": 0.981,
"step": 24900
},
{
"epoch": 7.59,
"learning_rate": 9.664535768645358e-06,
"loss": 0.974,
"step": 24925
},
{
"epoch": 7.6,
"learning_rate": 9.634094368340946e-06,
"loss": 0.8843,
"step": 24950
},
{
"epoch": 7.6,
"learning_rate": 9.603652968036531e-06,
"loss": 0.9714,
"step": 24975
},
{
"epoch": 7.61,
"learning_rate": 9.573211567732117e-06,
"loss": 0.9101,
"step": 25000
},
{
"epoch": 7.62,
"learning_rate": 9.542770167427703e-06,
"loss": 0.9264,
"step": 25025
},
{
"epoch": 7.63,
"learning_rate": 9.512328767123288e-06,
"loss": 0.9459,
"step": 25050
},
{
"epoch": 7.63,
"learning_rate": 9.481887366818874e-06,
"loss": 0.9391,
"step": 25075
},
{
"epoch": 7.64,
"learning_rate": 9.451445966514461e-06,
"loss": 0.9473,
"step": 25100
},
{
"epoch": 7.65,
"learning_rate": 9.421004566210047e-06,
"loss": 0.897,
"step": 25125
},
{
"epoch": 7.66,
"learning_rate": 9.390563165905633e-06,
"loss": 0.9351,
"step": 25150
},
{
"epoch": 7.66,
"learning_rate": 9.360121765601219e-06,
"loss": 0.9249,
"step": 25175
},
{
"epoch": 7.67,
"learning_rate": 9.329680365296804e-06,
"loss": 0.9407,
"step": 25200
},
{
"epoch": 7.68,
"learning_rate": 9.299238964992392e-06,
"loss": 0.9331,
"step": 25225
},
{
"epoch": 7.69,
"learning_rate": 9.268797564687977e-06,
"loss": 0.9749,
"step": 25250
},
{
"epoch": 7.69,
"learning_rate": 9.238356164383563e-06,
"loss": 0.981,
"step": 25275
},
{
"epoch": 7.7,
"learning_rate": 9.207914764079149e-06,
"loss": 0.9334,
"step": 25300
},
{
"epoch": 7.71,
"learning_rate": 9.177473363774734e-06,
"loss": 0.928,
"step": 25325
},
{
"epoch": 7.72,
"learning_rate": 9.147031963470322e-06,
"loss": 0.8981,
"step": 25350
},
{
"epoch": 7.72,
"learning_rate": 9.116590563165907e-06,
"loss": 0.9735,
"step": 25375
},
{
"epoch": 7.73,
"learning_rate": 9.086149162861493e-06,
"loss": 0.9042,
"step": 25400
},
{
"epoch": 7.74,
"learning_rate": 9.055707762557079e-06,
"loss": 0.9443,
"step": 25425
},
{
"epoch": 7.75,
"learning_rate": 9.025266362252664e-06,
"loss": 0.9571,
"step": 25450
},
{
"epoch": 7.75,
"learning_rate": 8.99482496194825e-06,
"loss": 0.9154,
"step": 25475
},
{
"epoch": 7.76,
"learning_rate": 8.964383561643837e-06,
"loss": 0.9131,
"step": 25500
},
{
"epoch": 7.77,
"learning_rate": 8.933942161339423e-06,
"loss": 0.9096,
"step": 25525
},
{
"epoch": 7.78,
"learning_rate": 8.903500761035009e-06,
"loss": 1.0131,
"step": 25550
},
{
"epoch": 7.79,
"learning_rate": 8.873059360730594e-06,
"loss": 0.9257,
"step": 25575
},
{
"epoch": 7.79,
"learning_rate": 8.84261796042618e-06,
"loss": 0.9627,
"step": 25600
},
{
"epoch": 7.8,
"learning_rate": 8.812176560121768e-06,
"loss": 0.9599,
"step": 25625
},
{
"epoch": 7.81,
"learning_rate": 8.781735159817353e-06,
"loss": 1.0309,
"step": 25650
},
{
"epoch": 7.82,
"learning_rate": 8.751293759512939e-06,
"loss": 0.9223,
"step": 25675
},
{
"epoch": 7.82,
"learning_rate": 8.720852359208525e-06,
"loss": 0.9609,
"step": 25700
},
{
"epoch": 7.83,
"learning_rate": 8.69041095890411e-06,
"loss": 0.9848,
"step": 25725
},
{
"epoch": 7.84,
"learning_rate": 8.659969558599698e-06,
"loss": 0.9744,
"step": 25750
},
{
"epoch": 7.85,
"learning_rate": 8.629528158295283e-06,
"loss": 0.9079,
"step": 25775
},
{
"epoch": 7.85,
"learning_rate": 8.599086757990869e-06,
"loss": 0.9524,
"step": 25800
},
{
"epoch": 7.86,
"learning_rate": 8.568645357686455e-06,
"loss": 0.8825,
"step": 25825
},
{
"epoch": 7.87,
"learning_rate": 8.53820395738204e-06,
"loss": 0.9648,
"step": 25850
},
{
"epoch": 7.88,
"learning_rate": 8.507762557077626e-06,
"loss": 0.9268,
"step": 25875
},
{
"epoch": 7.88,
"learning_rate": 8.477321156773213e-06,
"loss": 1.0055,
"step": 25900
},
{
"epoch": 7.89,
"learning_rate": 8.446879756468799e-06,
"loss": 0.9859,
"step": 25925
},
{
"epoch": 7.9,
"learning_rate": 8.416438356164385e-06,
"loss": 0.876,
"step": 25950
},
{
"epoch": 7.91,
"learning_rate": 8.38599695585997e-06,
"loss": 0.9929,
"step": 25975
},
{
"epoch": 7.91,
"learning_rate": 8.355555555555556e-06,
"loss": 0.937,
"step": 26000
},
{
"epoch": 7.92,
"learning_rate": 8.325114155251143e-06,
"loss": 0.9504,
"step": 26025
},
{
"epoch": 7.93,
"learning_rate": 8.29467275494673e-06,
"loss": 0.9475,
"step": 26050
},
{
"epoch": 7.94,
"learning_rate": 8.264231354642315e-06,
"loss": 0.8728,
"step": 26075
},
{
"epoch": 7.95,
"learning_rate": 8.2337899543379e-06,
"loss": 0.9089,
"step": 26100
},
{
"epoch": 7.95,
"learning_rate": 8.203348554033486e-06,
"loss": 0.963,
"step": 26125
},
{
"epoch": 7.96,
"learning_rate": 8.172907153729072e-06,
"loss": 0.9398,
"step": 26150
},
{
"epoch": 7.97,
"learning_rate": 8.14246575342466e-06,
"loss": 0.9664,
"step": 26175
},
{
"epoch": 7.98,
"learning_rate": 8.112024353120245e-06,
"loss": 0.9756,
"step": 26200
},
{
"epoch": 7.98,
"learning_rate": 8.08158295281583e-06,
"loss": 0.9686,
"step": 26225
},
{
"epoch": 7.99,
"learning_rate": 8.051141552511416e-06,
"loss": 0.9906,
"step": 26250
},
{
"epoch": 8.0,
"learning_rate": 8.020700152207002e-06,
"loss": 0.9644,
"step": 26275
},
{
"epoch": 8.01,
"learning_rate": 7.99025875190259e-06,
"loss": 0.8352,
"step": 26300
},
{
"epoch": 8.01,
"learning_rate": 7.959817351598175e-06,
"loss": 0.7675,
"step": 26325
},
{
"epoch": 8.02,
"learning_rate": 7.92937595129376e-06,
"loss": 0.8487,
"step": 26350
},
{
"epoch": 8.03,
"learning_rate": 7.898934550989346e-06,
"loss": 0.8241,
"step": 26375
},
{
"epoch": 8.04,
"learning_rate": 7.868493150684932e-06,
"loss": 0.8417,
"step": 26400
},
{
"epoch": 8.04,
"learning_rate": 7.83805175038052e-06,
"loss": 0.8277,
"step": 26425
},
{
"epoch": 8.05,
"learning_rate": 7.807610350076105e-06,
"loss": 0.8032,
"step": 26450
},
{
"epoch": 8.06,
"learning_rate": 7.77716894977169e-06,
"loss": 0.8111,
"step": 26475
},
{
"epoch": 8.07,
"learning_rate": 7.746727549467276e-06,
"loss": 0.8319,
"step": 26500
},
{
"epoch": 8.07,
"learning_rate": 7.716286149162862e-06,
"loss": 0.8437,
"step": 26525
},
{
"epoch": 8.08,
"learning_rate": 7.685844748858448e-06,
"loss": 0.8167,
"step": 26550
},
{
"epoch": 8.09,
"learning_rate": 7.655403348554035e-06,
"loss": 0.798,
"step": 26575
},
{
"epoch": 8.1,
"learning_rate": 7.624961948249621e-06,
"loss": 0.8465,
"step": 26600
},
{
"epoch": 8.11,
"learning_rate": 7.594520547945206e-06,
"loss": 0.8215,
"step": 26625
},
{
"epoch": 8.11,
"learning_rate": 7.564079147640791e-06,
"loss": 0.8385,
"step": 26650
},
{
"epoch": 8.12,
"learning_rate": 7.533637747336378e-06,
"loss": 0.8552,
"step": 26675
},
{
"epoch": 8.13,
"learning_rate": 7.503196347031964e-06,
"loss": 0.8127,
"step": 26700
},
{
"epoch": 8.14,
"learning_rate": 7.472754946727549e-06,
"loss": 0.8545,
"step": 26725
},
{
"epoch": 8.14,
"learning_rate": 7.442313546423136e-06,
"loss": 0.8589,
"step": 26750
},
{
"epoch": 8.15,
"learning_rate": 7.4118721461187215e-06,
"loss": 0.8231,
"step": 26775
},
{
"epoch": 8.16,
"learning_rate": 7.381430745814307e-06,
"loss": 0.8292,
"step": 26800
},
{
"epoch": 8.17,
"learning_rate": 7.350989345509894e-06,
"loss": 0.866,
"step": 26825
},
{
"epoch": 8.17,
"learning_rate": 7.320547945205479e-06,
"loss": 0.8413,
"step": 26850
},
{
"epoch": 8.18,
"learning_rate": 7.290106544901066e-06,
"loss": 0.8609,
"step": 26875
},
{
"epoch": 8.19,
"learning_rate": 7.259665144596652e-06,
"loss": 0.8566,
"step": 26900
},
{
"epoch": 8.2,
"learning_rate": 7.229223744292237e-06,
"loss": 0.9092,
"step": 26925
},
{
"epoch": 8.2,
"learning_rate": 7.198782343987824e-06,
"loss": 0.8238,
"step": 26950
},
{
"epoch": 8.21,
"learning_rate": 7.1683409436834095e-06,
"loss": 0.8372,
"step": 26975
},
{
"epoch": 8.22,
"learning_rate": 7.137899543378995e-06,
"loss": 0.8547,
"step": 27000
},
{
"epoch": 8.23,
"learning_rate": 7.107458143074582e-06,
"loss": 0.8121,
"step": 27025
},
{
"epoch": 8.23,
"learning_rate": 7.077016742770167e-06,
"loss": 0.8415,
"step": 27050
},
{
"epoch": 8.24,
"learning_rate": 7.046575342465753e-06,
"loss": 0.8953,
"step": 27075
},
{
"epoch": 8.25,
"learning_rate": 7.01613394216134e-06,
"loss": 0.8622,
"step": 27100
},
{
"epoch": 8.26,
"learning_rate": 6.985692541856925e-06,
"loss": 0.874,
"step": 27125
},
{
"epoch": 8.26,
"learning_rate": 6.955251141552512e-06,
"loss": 0.8017,
"step": 27150
},
{
"epoch": 8.27,
"learning_rate": 6.9248097412480975e-06,
"loss": 0.82,
"step": 27175
},
{
"epoch": 8.28,
"learning_rate": 6.894368340943683e-06,
"loss": 0.827,
"step": 27200
},
{
"epoch": 8.29,
"learning_rate": 6.86392694063927e-06,
"loss": 0.8299,
"step": 27225
},
{
"epoch": 8.3,
"learning_rate": 6.833485540334855e-06,
"loss": 0.8529,
"step": 27250
},
{
"epoch": 8.3,
"learning_rate": 6.803044140030441e-06,
"loss": 0.8119,
"step": 27275
},
{
"epoch": 8.31,
"learning_rate": 6.7726027397260276e-06,
"loss": 0.8448,
"step": 27300
},
{
"epoch": 8.32,
"learning_rate": 6.742161339421613e-06,
"loss": 0.824,
"step": 27325
},
{
"epoch": 8.33,
"learning_rate": 6.7117199391172e-06,
"loss": 0.8442,
"step": 27350
},
{
"epoch": 8.33,
"learning_rate": 6.6812785388127855e-06,
"loss": 0.8194,
"step": 27375
},
{
"epoch": 8.34,
"learning_rate": 6.650837138508371e-06,
"loss": 0.8177,
"step": 27400
},
{
"epoch": 8.35,
"learning_rate": 6.620395738203958e-06,
"loss": 0.873,
"step": 27425
},
{
"epoch": 8.36,
"learning_rate": 6.589954337899543e-06,
"loss": 0.7966,
"step": 27450
},
{
"epoch": 8.36,
"learning_rate": 6.559512937595129e-06,
"loss": 0.8656,
"step": 27475
},
{
"epoch": 8.37,
"learning_rate": 6.5290715372907155e-06,
"loss": 0.8272,
"step": 27500
},
{
"epoch": 8.38,
"learning_rate": 6.498630136986301e-06,
"loss": 0.7805,
"step": 27525
},
{
"epoch": 8.39,
"learning_rate": 6.468188736681888e-06,
"loss": 0.8527,
"step": 27550
},
{
"epoch": 8.39,
"learning_rate": 6.4377473363774734e-06,
"loss": 0.8507,
"step": 27575
},
{
"epoch": 8.4,
"learning_rate": 6.407305936073059e-06,
"loss": 0.7855,
"step": 27600
},
{
"epoch": 8.41,
"learning_rate": 6.376864535768646e-06,
"loss": 0.8705,
"step": 27625
},
{
"epoch": 8.42,
"learning_rate": 6.346423135464231e-06,
"loss": 0.8078,
"step": 27650
},
{
"epoch": 8.42,
"learning_rate": 6.315981735159817e-06,
"loss": 0.7914,
"step": 27675
},
{
"epoch": 8.43,
"learning_rate": 6.2855403348554035e-06,
"loss": 0.8536,
"step": 27700
},
{
"epoch": 8.44,
"learning_rate": 6.255098934550989e-06,
"loss": 0.8765,
"step": 27725
},
{
"epoch": 8.45,
"learning_rate": 6.224657534246576e-06,
"loss": 0.8406,
"step": 27750
},
{
"epoch": 8.46,
"learning_rate": 6.194216133942161e-06,
"loss": 0.8427,
"step": 27775
},
{
"epoch": 8.46,
"learning_rate": 6.163774733637747e-06,
"loss": 0.8689,
"step": 27800
},
{
"epoch": 8.47,
"learning_rate": 6.133333333333334e-06,
"loss": 0.8204,
"step": 27825
},
{
"epoch": 8.48,
"learning_rate": 6.102891933028919e-06,
"loss": 0.8447,
"step": 27850
},
{
"epoch": 8.49,
"learning_rate": 6.072450532724505e-06,
"loss": 0.8356,
"step": 27875
},
{
"epoch": 8.49,
"learning_rate": 6.0420091324200915e-06,
"loss": 0.8776,
"step": 27900
},
{
"epoch": 8.5,
"learning_rate": 6.011567732115677e-06,
"loss": 0.8543,
"step": 27925
},
{
"epoch": 8.51,
"learning_rate": 5.981126331811264e-06,
"loss": 0.8341,
"step": 27950
},
{
"epoch": 8.52,
"learning_rate": 5.950684931506849e-06,
"loss": 0.832,
"step": 27975
},
{
"epoch": 8.52,
"learning_rate": 5.920243531202435e-06,
"loss": 0.7922,
"step": 28000
},
{
"epoch": 8.53,
"learning_rate": 5.889802130898022e-06,
"loss": 0.8491,
"step": 28025
},
{
"epoch": 8.54,
"learning_rate": 5.859360730593607e-06,
"loss": 0.8911,
"step": 28050
},
{
"epoch": 8.55,
"learning_rate": 5.828919330289193e-06,
"loss": 0.8432,
"step": 28075
},
{
"epoch": 8.55,
"learning_rate": 5.7984779299847795e-06,
"loss": 0.7997,
"step": 28100
},
{
"epoch": 8.56,
"learning_rate": 5.768036529680365e-06,
"loss": 0.836,
"step": 28125
},
{
"epoch": 8.57,
"learning_rate": 5.737595129375952e-06,
"loss": 0.8332,
"step": 28150
},
{
"epoch": 8.58,
"learning_rate": 5.707153729071537e-06,
"loss": 0.8591,
"step": 28175
},
{
"epoch": 8.58,
"learning_rate": 5.676712328767123e-06,
"loss": 0.8475,
"step": 28200
},
{
"epoch": 8.59,
"learning_rate": 5.64627092846271e-06,
"loss": 0.8842,
"step": 28225
},
{
"epoch": 8.6,
"learning_rate": 5.615829528158295e-06,
"loss": 0.7811,
"step": 28250
},
{
"epoch": 8.61,
"learning_rate": 5.585388127853881e-06,
"loss": 0.8826,
"step": 28275
},
{
"epoch": 8.61,
"learning_rate": 5.556164383561644e-06,
"loss": 0.8344,
"step": 28300
},
{
"epoch": 8.62,
"learning_rate": 5.52572298325723e-06,
"loss": 0.8106,
"step": 28325
},
{
"epoch": 8.63,
"learning_rate": 5.495281582952816e-06,
"loss": 0.7875,
"step": 28350
},
{
"epoch": 8.64,
"learning_rate": 5.464840182648402e-06,
"loss": 0.83,
"step": 28375
},
{
"epoch": 8.65,
"learning_rate": 5.434398782343988e-06,
"loss": 0.8188,
"step": 28400
},
{
"epoch": 8.65,
"learning_rate": 5.403957382039574e-06,
"loss": 0.8448,
"step": 28425
},
{
"epoch": 8.66,
"learning_rate": 5.37351598173516e-06,
"loss": 0.7736,
"step": 28450
},
{
"epoch": 8.67,
"learning_rate": 5.3430745814307465e-06,
"loss": 0.8335,
"step": 28475
},
{
"epoch": 8.68,
"learning_rate": 5.312633181126332e-06,
"loss": 0.8536,
"step": 28500
},
{
"epoch": 8.68,
"learning_rate": 5.282191780821918e-06,
"loss": 0.8378,
"step": 28525
},
{
"epoch": 8.69,
"learning_rate": 5.251750380517504e-06,
"loss": 0.8067,
"step": 28550
},
{
"epoch": 8.7,
"learning_rate": 5.222526636225267e-06,
"loss": 0.8447,
"step": 28575
},
{
"epoch": 8.71,
"learning_rate": 5.192085235920853e-06,
"loss": 0.8222,
"step": 28600
},
{
"epoch": 8.71,
"learning_rate": 5.161643835616439e-06,
"loss": 0.813,
"step": 28625
},
{
"epoch": 8.72,
"learning_rate": 5.131202435312025e-06,
"loss": 0.8439,
"step": 28650
},
{
"epoch": 8.73,
"learning_rate": 5.100761035007611e-06,
"loss": 0.8635,
"step": 28675
},
{
"epoch": 8.74,
"learning_rate": 5.070319634703197e-06,
"loss": 0.8653,
"step": 28700
},
{
"epoch": 8.74,
"learning_rate": 5.0398782343987825e-06,
"loss": 0.827,
"step": 28725
},
{
"epoch": 8.75,
"learning_rate": 5.009436834094369e-06,
"loss": 0.8366,
"step": 28750
},
{
"epoch": 8.76,
"learning_rate": 4.978995433789955e-06,
"loss": 0.8199,
"step": 28775
},
{
"epoch": 8.77,
"learning_rate": 4.949771689497717e-06,
"loss": 0.9215,
"step": 28800
},
{
"epoch": 8.77,
"learning_rate": 4.919330289193303e-06,
"loss": 0.8776,
"step": 28825
},
{
"epoch": 8.78,
"learning_rate": 4.888888888888889e-06,
"loss": 0.8023,
"step": 28850
},
{
"epoch": 8.79,
"learning_rate": 4.858447488584475e-06,
"loss": 0.8622,
"step": 28875
},
{
"epoch": 8.8,
"learning_rate": 4.8280060882800615e-06,
"loss": 0.801,
"step": 28900
},
{
"epoch": 8.81,
"learning_rate": 4.797564687975647e-06,
"loss": 0.837,
"step": 28925
},
{
"epoch": 8.81,
"learning_rate": 4.767123287671233e-06,
"loss": 0.8399,
"step": 28950
},
{
"epoch": 8.82,
"learning_rate": 4.736681887366819e-06,
"loss": 0.804,
"step": 28975
},
{
"epoch": 8.83,
"learning_rate": 4.706240487062405e-06,
"loss": 0.8425,
"step": 29000
},
{
"epoch": 8.84,
"learning_rate": 4.675799086757991e-06,
"loss": 0.8786,
"step": 29025
},
{
"epoch": 8.84,
"learning_rate": 4.645357686453577e-06,
"loss": 0.9328,
"step": 29050
},
{
"epoch": 8.85,
"learning_rate": 4.614916286149163e-06,
"loss": 0.8182,
"step": 29075
},
{
"epoch": 8.86,
"learning_rate": 4.5844748858447495e-06,
"loss": 0.8446,
"step": 29100
},
{
"epoch": 8.87,
"learning_rate": 4.554033485540335e-06,
"loss": 0.8503,
"step": 29125
},
{
"epoch": 8.87,
"learning_rate": 4.523592085235921e-06,
"loss": 0.8258,
"step": 29150
},
{
"epoch": 8.88,
"learning_rate": 4.493150684931507e-06,
"loss": 0.8692,
"step": 29175
},
{
"epoch": 8.89,
"learning_rate": 4.462709284627093e-06,
"loss": 0.8569,
"step": 29200
},
{
"epoch": 8.9,
"learning_rate": 4.432267884322679e-06,
"loss": 0.8694,
"step": 29225
},
{
"epoch": 8.9,
"learning_rate": 4.401826484018265e-06,
"loss": 0.836,
"step": 29250
},
{
"epoch": 8.91,
"learning_rate": 4.371385083713851e-06,
"loss": 0.804,
"step": 29275
},
{
"epoch": 8.92,
"learning_rate": 4.340943683409437e-06,
"loss": 0.8593,
"step": 29300
},
{
"epoch": 8.93,
"learning_rate": 4.310502283105023e-06,
"loss": 0.8547,
"step": 29325
},
{
"epoch": 8.93,
"learning_rate": 4.280060882800609e-06,
"loss": 0.8705,
"step": 29350
},
{
"epoch": 8.94,
"learning_rate": 4.249619482496195e-06,
"loss": 0.867,
"step": 29375
},
{
"epoch": 8.95,
"learning_rate": 4.219178082191781e-06,
"loss": 0.796,
"step": 29400
},
{
"epoch": 8.96,
"learning_rate": 4.188736681887367e-06,
"loss": 0.8249,
"step": 29425
},
{
"epoch": 8.96,
"learning_rate": 4.158295281582953e-06,
"loss": 0.8244,
"step": 29450
},
{
"epoch": 8.97,
"learning_rate": 4.127853881278539e-06,
"loss": 0.8224,
"step": 29475
},
{
"epoch": 8.98,
"learning_rate": 4.097412480974125e-06,
"loss": 0.8259,
"step": 29500
},
{
"epoch": 8.99,
"learning_rate": 4.066971080669711e-06,
"loss": 0.8255,
"step": 29525
},
{
"epoch": 9.0,
"learning_rate": 4.036529680365297e-06,
"loss": 0.8501,
"step": 29550
},
{
"epoch": 9.0,
"learning_rate": 4.006088280060883e-06,
"loss": 0.7706,
"step": 29575
},
{
"epoch": 9.01,
"learning_rate": 3.975646879756469e-06,
"loss": 0.7943,
"step": 29600
},
{
"epoch": 9.02,
"learning_rate": 3.945205479452055e-06,
"loss": 0.7389,
"step": 29625
},
{
"epoch": 9.03,
"learning_rate": 3.914764079147641e-06,
"loss": 0.7088,
"step": 29650
},
{
"epoch": 9.03,
"learning_rate": 3.884322678843227e-06,
"loss": 0.772,
"step": 29675
},
{
"epoch": 9.04,
"learning_rate": 3.853881278538813e-06,
"loss": 0.7772,
"step": 29700
},
{
"epoch": 9.05,
"learning_rate": 3.823439878234399e-06,
"loss": 0.7484,
"step": 29725
},
{
"epoch": 9.06,
"learning_rate": 3.792998477929985e-06,
"loss": 0.7653,
"step": 29750
},
{
"epoch": 9.06,
"learning_rate": 3.762557077625571e-06,
"loss": 0.7537,
"step": 29775
},
{
"epoch": 9.07,
"learning_rate": 3.732115677321157e-06,
"loss": 0.7248,
"step": 29800
},
{
"epoch": 9.08,
"learning_rate": 3.701674277016743e-06,
"loss": 0.8037,
"step": 29825
},
{
"epoch": 9.09,
"learning_rate": 3.671232876712329e-06,
"loss": 0.7425,
"step": 29850
},
{
"epoch": 9.09,
"learning_rate": 3.640791476407915e-06,
"loss": 0.7592,
"step": 29875
},
{
"epoch": 9.1,
"learning_rate": 3.610350076103501e-06,
"loss": 0.7541,
"step": 29900
},
{
"epoch": 9.11,
"learning_rate": 3.579908675799087e-06,
"loss": 0.739,
"step": 29925
},
{
"epoch": 9.12,
"learning_rate": 3.549467275494673e-06,
"loss": 0.7581,
"step": 29950
},
{
"epoch": 9.12,
"learning_rate": 3.519025875190259e-06,
"loss": 0.8068,
"step": 29975
},
{
"epoch": 9.13,
"learning_rate": 3.488584474885845e-06,
"loss": 0.7699,
"step": 30000
},
{
"epoch": 9.14,
"learning_rate": 3.458143074581431e-06,
"loss": 0.725,
"step": 30025
},
{
"epoch": 9.15,
"learning_rate": 3.4277016742770168e-06,
"loss": 0.7598,
"step": 30050
},
{
"epoch": 9.16,
"learning_rate": 3.397260273972603e-06,
"loss": 0.726,
"step": 30075
},
{
"epoch": 9.16,
"learning_rate": 3.366818873668189e-06,
"loss": 0.7622,
"step": 30100
},
{
"epoch": 9.17,
"learning_rate": 3.336377473363775e-06,
"loss": 0.7633,
"step": 30125
},
{
"epoch": 9.18,
"learning_rate": 3.3059360730593608e-06,
"loss": 0.7839,
"step": 30150
},
{
"epoch": 9.19,
"learning_rate": 3.275494672754947e-06,
"loss": 0.758,
"step": 30175
},
{
"epoch": 9.19,
"learning_rate": 3.245053272450533e-06,
"loss": 0.7451,
"step": 30200
},
{
"epoch": 9.2,
"learning_rate": 3.214611872146119e-06,
"loss": 0.7402,
"step": 30225
},
{
"epoch": 9.21,
"learning_rate": 3.1841704718417048e-06,
"loss": 0.733,
"step": 30250
},
{
"epoch": 9.22,
"learning_rate": 3.153729071537291e-06,
"loss": 0.7695,
"step": 30275
},
{
"epoch": 9.22,
"learning_rate": 3.123287671232877e-06,
"loss": 0.7838,
"step": 30300
},
{
"epoch": 9.23,
"learning_rate": 3.092846270928463e-06,
"loss": 0.7946,
"step": 30325
},
{
"epoch": 9.24,
"learning_rate": 3.0624048706240488e-06,
"loss": 0.7699,
"step": 30350
},
{
"epoch": 9.25,
"learning_rate": 3.031963470319635e-06,
"loss": 0.7173,
"step": 30375
},
{
"epoch": 9.25,
"learning_rate": 3.001522070015221e-06,
"loss": 0.7182,
"step": 30400
},
{
"epoch": 9.26,
"learning_rate": 2.971080669710807e-06,
"loss": 0.7544,
"step": 30425
},
{
"epoch": 9.27,
"learning_rate": 2.9406392694063927e-06,
"loss": 0.7678,
"step": 30450
},
{
"epoch": 9.28,
"learning_rate": 2.910197869101979e-06,
"loss": 0.7222,
"step": 30475
},
{
"epoch": 9.28,
"learning_rate": 2.879756468797565e-06,
"loss": 0.7949,
"step": 30500
},
{
"epoch": 9.29,
"learning_rate": 2.849315068493151e-06,
"loss": 0.7523,
"step": 30525
},
{
"epoch": 9.3,
"learning_rate": 2.8188736681887367e-06,
"loss": 0.7809,
"step": 30550
},
{
"epoch": 9.31,
"learning_rate": 2.788432267884323e-06,
"loss": 0.7836,
"step": 30575
},
{
"epoch": 9.32,
"learning_rate": 2.757990867579909e-06,
"loss": 0.7481,
"step": 30600
},
{
"epoch": 9.32,
"learning_rate": 2.727549467275495e-06,
"loss": 0.756,
"step": 30625
},
{
"epoch": 9.33,
"learning_rate": 2.6971080669710807e-06,
"loss": 0.8018,
"step": 30650
},
{
"epoch": 9.34,
"learning_rate": 2.666666666666667e-06,
"loss": 0.7264,
"step": 30675
},
{
"epoch": 9.35,
"learning_rate": 2.636225266362253e-06,
"loss": 0.7553,
"step": 30700
},
{
"epoch": 9.35,
"learning_rate": 2.605783866057839e-06,
"loss": 0.8181,
"step": 30725
},
{
"epoch": 9.36,
"learning_rate": 2.5753424657534247e-06,
"loss": 0.8361,
"step": 30750
},
{
"epoch": 9.37,
"learning_rate": 2.544901065449011e-06,
"loss": 0.7377,
"step": 30775
},
{
"epoch": 9.38,
"learning_rate": 2.514459665144597e-06,
"loss": 0.7311,
"step": 30800
},
{
"epoch": 9.38,
"learning_rate": 2.484018264840183e-06,
"loss": 0.7555,
"step": 30825
},
{
"epoch": 9.39,
"learning_rate": 2.4535768645357687e-06,
"loss": 0.7609,
"step": 30850
},
{
"epoch": 9.4,
"learning_rate": 2.423135464231355e-06,
"loss": 0.7774,
"step": 30875
},
{
"epoch": 9.41,
"learning_rate": 2.392694063926941e-06,
"loss": 0.7941,
"step": 30900
},
{
"epoch": 9.41,
"learning_rate": 2.362252663622527e-06,
"loss": 0.782,
"step": 30925
},
{
"epoch": 9.42,
"learning_rate": 2.3318112633181127e-06,
"loss": 0.7627,
"step": 30950
},
{
"epoch": 9.43,
"learning_rate": 2.301369863013699e-06,
"loss": 0.7271,
"step": 30975
},
{
"epoch": 9.44,
"learning_rate": 2.270928462709285e-06,
"loss": 0.7764,
"step": 31000
},
{
"epoch": 9.44,
"learning_rate": 2.240487062404871e-06,
"loss": 0.8404,
"step": 31025
},
{
"epoch": 9.45,
"learning_rate": 2.2100456621004567e-06,
"loss": 0.7565,
"step": 31050
},
{
"epoch": 9.46,
"learning_rate": 2.179604261796043e-06,
"loss": 0.7917,
"step": 31075
},
{
"epoch": 9.47,
"learning_rate": 2.149162861491629e-06,
"loss": 0.7568,
"step": 31100
},
{
"epoch": 9.47,
"learning_rate": 2.1187214611872146e-06,
"loss": 0.7278,
"step": 31125
},
{
"epoch": 9.48,
"learning_rate": 2.0882800608828007e-06,
"loss": 0.8142,
"step": 31150
},
{
"epoch": 9.49,
"learning_rate": 2.0578386605783868e-06,
"loss": 0.791,
"step": 31175
},
{
"epoch": 9.5,
"learning_rate": 2.027397260273973e-06,
"loss": 0.7884,
"step": 31200
},
{
"epoch": 9.51,
"learning_rate": 1.9969558599695586e-06,
"loss": 0.8056,
"step": 31225
},
{
"epoch": 9.51,
"learning_rate": 1.9665144596651447e-06,
"loss": 0.7537,
"step": 31250
},
{
"epoch": 9.52,
"learning_rate": 1.9360730593607308e-06,
"loss": 0.7794,
"step": 31275
},
{
"epoch": 9.53,
"learning_rate": 1.9056316590563167e-06,
"loss": 0.8168,
"step": 31300
},
{
"epoch": 9.54,
"learning_rate": 1.8751902587519028e-06,
"loss": 0.756,
"step": 31325
},
{
"epoch": 9.54,
"learning_rate": 1.8447488584474887e-06,
"loss": 0.7625,
"step": 31350
},
{
"epoch": 9.55,
"learning_rate": 1.8143074581430748e-06,
"loss": 0.7638,
"step": 31375
},
{
"epoch": 9.56,
"learning_rate": 1.7838660578386607e-06,
"loss": 0.7532,
"step": 31400
},
{
"epoch": 9.57,
"learning_rate": 1.7534246575342468e-06,
"loss": 0.7272,
"step": 31425
},
{
"epoch": 9.57,
"learning_rate": 1.7229832572298326e-06,
"loss": 0.7503,
"step": 31450
},
{
"epoch": 9.58,
"learning_rate": 1.6925418569254187e-06,
"loss": 0.7559,
"step": 31475
},
{
"epoch": 9.59,
"learning_rate": 1.6621004566210046e-06,
"loss": 0.7825,
"step": 31500
},
{
"epoch": 9.6,
"learning_rate": 1.6316590563165907e-06,
"loss": 0.7557,
"step": 31525
},
{
"epoch": 9.6,
"learning_rate": 1.6012176560121766e-06,
"loss": 0.7957,
"step": 31550
},
{
"epoch": 9.61,
"learning_rate": 1.5707762557077627e-06,
"loss": 0.7323,
"step": 31575
},
{
"epoch": 9.62,
"learning_rate": 1.5403348554033486e-06,
"loss": 0.7987,
"step": 31600
},
{
"epoch": 9.63,
"learning_rate": 1.5098934550989347e-06,
"loss": 0.8013,
"step": 31625
},
{
"epoch": 9.63,
"learning_rate": 1.4794520547945206e-06,
"loss": 0.7903,
"step": 31650
},
{
"epoch": 9.64,
"learning_rate": 1.4490106544901067e-06,
"loss": 0.7038,
"step": 31675
},
{
"epoch": 9.65,
"learning_rate": 1.4185692541856926e-06,
"loss": 0.7153,
"step": 31700
},
{
"epoch": 9.66,
"learning_rate": 1.3881278538812787e-06,
"loss": 0.7497,
"step": 31725
},
{
"epoch": 9.67,
"learning_rate": 1.3576864535768646e-06,
"loss": 0.7115,
"step": 31750
},
{
"epoch": 9.67,
"learning_rate": 1.3272450532724507e-06,
"loss": 0.7573,
"step": 31775
},
{
"epoch": 9.68,
"learning_rate": 1.2968036529680366e-06,
"loss": 0.756,
"step": 31800
},
{
"epoch": 9.69,
"learning_rate": 1.2663622526636227e-06,
"loss": 0.797,
"step": 31825
},
{
"epoch": 9.7,
"learning_rate": 1.2359208523592086e-06,
"loss": 0.7785,
"step": 31850
},
{
"epoch": 9.7,
"learning_rate": 1.2054794520547947e-06,
"loss": 0.75,
"step": 31875
},
{
"epoch": 9.71,
"learning_rate": 1.1750380517503806e-06,
"loss": 0.7955,
"step": 31900
},
{
"epoch": 9.72,
"learning_rate": 1.1445966514459667e-06,
"loss": 0.8163,
"step": 31925
},
{
"epoch": 9.73,
"learning_rate": 1.1141552511415526e-06,
"loss": 0.7569,
"step": 31950
},
{
"epoch": 9.73,
"learning_rate": 1.0837138508371387e-06,
"loss": 0.7812,
"step": 31975
},
{
"epoch": 9.74,
"learning_rate": 1.0532724505327246e-06,
"loss": 0.7108,
"step": 32000
},
{
"epoch": 9.75,
"learning_rate": 1.0228310502283107e-06,
"loss": 0.754,
"step": 32025
},
{
"epoch": 9.76,
"learning_rate": 9.923896499238966e-07,
"loss": 0.805,
"step": 32050
},
{
"epoch": 9.76,
"learning_rate": 9.619482496194827e-07,
"loss": 0.7494,
"step": 32075
},
{
"epoch": 9.77,
"learning_rate": 9.315068493150686e-07,
"loss": 0.7821,
"step": 32100
},
{
"epoch": 9.78,
"learning_rate": 9.010654490106546e-07,
"loss": 0.8192,
"step": 32125
},
{
"epoch": 9.79,
"learning_rate": 8.706240487062406e-07,
"loss": 0.7508,
"step": 32150
},
{
"epoch": 9.79,
"learning_rate": 8.401826484018266e-07,
"loss": 0.7274,
"step": 32175
},
{
"epoch": 9.8,
"learning_rate": 8.097412480974126e-07,
"loss": 0.786,
"step": 32200
},
{
"epoch": 9.81,
"learning_rate": 7.792998477929986e-07,
"loss": 0.7535,
"step": 32225
},
{
"epoch": 9.82,
"learning_rate": 7.488584474885845e-07,
"loss": 0.7448,
"step": 32250
},
{
"epoch": 9.82,
"learning_rate": 7.184170471841705e-07,
"loss": 0.7506,
"step": 32275
},
{
"epoch": 9.83,
"learning_rate": 6.879756468797565e-07,
"loss": 0.7662,
"step": 32300
},
{
"epoch": 9.84,
"learning_rate": 6.575342465753425e-07,
"loss": 0.7408,
"step": 32325
},
{
"epoch": 9.85,
"learning_rate": 6.270928462709285e-07,
"loss": 0.7333,
"step": 32350
},
{
"epoch": 9.86,
"learning_rate": 5.966514459665146e-07,
"loss": 0.7941,
"step": 32375
},
{
"epoch": 9.86,
"learning_rate": 5.662100456621006e-07,
"loss": 0.7735,
"step": 32400
},
{
"epoch": 9.87,
"learning_rate": 5.357686453576865e-07,
"loss": 0.7741,
"step": 32425
},
{
"epoch": 9.88,
"learning_rate": 5.053272450532725e-07,
"loss": 0.7857,
"step": 32450
},
{
"epoch": 9.89,
"learning_rate": 4.748858447488585e-07,
"loss": 0.8137,
"step": 32475
},
{
"epoch": 9.89,
"learning_rate": 4.444444444444445e-07,
"loss": 0.7557,
"step": 32500
},
{
"epoch": 9.9,
"learning_rate": 4.140030441400305e-07,
"loss": 0.7187,
"step": 32525
},
{
"epoch": 9.91,
"learning_rate": 3.835616438356165e-07,
"loss": 0.7536,
"step": 32550
},
{
"epoch": 9.92,
"learning_rate": 3.531202435312025e-07,
"loss": 0.7462,
"step": 32575
},
{
"epoch": 9.92,
"learning_rate": 3.226788432267885e-07,
"loss": 0.7967,
"step": 32600
},
{
"epoch": 9.93,
"learning_rate": 2.922374429223744e-07,
"loss": 0.7531,
"step": 32625
},
{
"epoch": 9.94,
"learning_rate": 2.617960426179604e-07,
"loss": 0.7584,
"step": 32650
},
{
"epoch": 9.95,
"learning_rate": 2.3135464231354645e-07,
"loss": 0.7664,
"step": 32675
},
{
"epoch": 9.95,
"learning_rate": 2.0091324200913244e-07,
"loss": 0.8058,
"step": 32700
},
{
"epoch": 9.96,
"learning_rate": 1.7047184170471844e-07,
"loss": 0.795,
"step": 32725
},
{
"epoch": 9.97,
"learning_rate": 1.4003044140030444e-07,
"loss": 0.7861,
"step": 32750
},
{
"epoch": 9.98,
"learning_rate": 1.0958904109589042e-07,
"loss": 0.7567,
"step": 32775
},
{
"epoch": 9.98,
"learning_rate": 7.914764079147642e-08,
"loss": 0.7481,
"step": 32800
},
{
"epoch": 9.99,
"learning_rate": 4.870624048706241e-08,
"loss": 0.7556,
"step": 32825
},
{
"epoch": 10.0,
"learning_rate": 1.9482496194824964e-08,
"loss": 0.7565,
"step": 32850
},
{
"epoch": 10.0,
"step": 32850,
"total_flos": 4.270496328921907e+17,
"train_loss": 1.3997784228157961,
"train_runtime": 9169.8515,
"train_samples_per_second": 21.49,
"train_steps_per_second": 3.582
}
],
"max_steps": 32850,
"num_train_epochs": 10,
"total_flos": 4.270496328921907e+17,
"trial_name": null,
"trial_params": null
}