argpt2-goodreads / trainer_state.json
mofawzy's picture
init commit
1bd775d
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 20.0,
"global_step": 16900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06,
"learning_rate": 4.985207100591716e-05,
"loss": 2.2222,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 4.970414201183432e-05,
"loss": 2.043,
"step": 100
},
{
"epoch": 0.18,
"learning_rate": 4.955621301775148e-05,
"loss": 1.9649,
"step": 150
},
{
"epoch": 0.24,
"learning_rate": 4.9408284023668644e-05,
"loss": 1.9144,
"step": 200
},
{
"epoch": 0.3,
"learning_rate": 4.92603550295858e-05,
"loss": 1.8759,
"step": 250
},
{
"epoch": 0.36,
"learning_rate": 4.9112426035502965e-05,
"loss": 1.8501,
"step": 300
},
{
"epoch": 0.41,
"learning_rate": 4.896449704142012e-05,
"loss": 1.8282,
"step": 350
},
{
"epoch": 0.47,
"learning_rate": 4.881656804733728e-05,
"loss": 1.8055,
"step": 400
},
{
"epoch": 0.53,
"learning_rate": 4.866863905325444e-05,
"loss": 1.7912,
"step": 450
},
{
"epoch": 0.59,
"learning_rate": 4.85207100591716e-05,
"loss": 1.7801,
"step": 500
},
{
"epoch": 0.65,
"learning_rate": 4.8372781065088756e-05,
"loss": 1.7691,
"step": 550
},
{
"epoch": 0.71,
"learning_rate": 4.822485207100592e-05,
"loss": 1.753,
"step": 600
},
{
"epoch": 0.77,
"learning_rate": 4.8076923076923084e-05,
"loss": 1.7455,
"step": 650
},
{
"epoch": 0.83,
"learning_rate": 4.792899408284024e-05,
"loss": 1.7326,
"step": 700
},
{
"epoch": 0.89,
"learning_rate": 4.77810650887574e-05,
"loss": 1.7271,
"step": 750
},
{
"epoch": 0.95,
"learning_rate": 4.7633136094674555e-05,
"loss": 1.7172,
"step": 800
},
{
"epoch": 1.01,
"learning_rate": 4.748520710059172e-05,
"loss": 1.7097,
"step": 850
},
{
"epoch": 1.07,
"learning_rate": 4.7337278106508875e-05,
"loss": 1.698,
"step": 900
},
{
"epoch": 1.12,
"learning_rate": 4.718934911242604e-05,
"loss": 1.6881,
"step": 950
},
{
"epoch": 1.18,
"learning_rate": 4.7041420118343196e-05,
"loss": 1.6812,
"step": 1000
},
{
"epoch": 1.24,
"learning_rate": 4.689349112426036e-05,
"loss": 1.6711,
"step": 1050
},
{
"epoch": 1.3,
"learning_rate": 4.674556213017752e-05,
"loss": 1.6749,
"step": 1100
},
{
"epoch": 1.36,
"learning_rate": 4.6597633136094674e-05,
"loss": 1.6684,
"step": 1150
},
{
"epoch": 1.42,
"learning_rate": 4.644970414201184e-05,
"loss": 1.6656,
"step": 1200
},
{
"epoch": 1.48,
"learning_rate": 4.6301775147928994e-05,
"loss": 1.6604,
"step": 1250
},
{
"epoch": 1.54,
"learning_rate": 4.615384615384616e-05,
"loss": 1.6578,
"step": 1300
},
{
"epoch": 1.6,
"learning_rate": 4.6005917159763315e-05,
"loss": 1.6527,
"step": 1350
},
{
"epoch": 1.66,
"learning_rate": 4.585798816568048e-05,
"loss": 1.6502,
"step": 1400
},
{
"epoch": 1.72,
"learning_rate": 4.5710059171597636e-05,
"loss": 1.6485,
"step": 1450
},
{
"epoch": 1.78,
"learning_rate": 4.556213017751479e-05,
"loss": 1.6418,
"step": 1500
},
{
"epoch": 1.83,
"learning_rate": 4.5414201183431957e-05,
"loss": 1.6397,
"step": 1550
},
{
"epoch": 1.89,
"learning_rate": 4.5266272189349114e-05,
"loss": 1.6301,
"step": 1600
},
{
"epoch": 1.95,
"learning_rate": 4.511834319526627e-05,
"loss": 1.6273,
"step": 1650
},
{
"epoch": 2.01,
"learning_rate": 4.4970414201183434e-05,
"loss": 1.6227,
"step": 1700
},
{
"epoch": 2.07,
"learning_rate": 4.48224852071006e-05,
"loss": 1.6148,
"step": 1750
},
{
"epoch": 2.13,
"learning_rate": 4.4674556213017755e-05,
"loss": 1.6132,
"step": 1800
},
{
"epoch": 2.19,
"learning_rate": 4.452662721893491e-05,
"loss": 1.6094,
"step": 1850
},
{
"epoch": 2.25,
"learning_rate": 4.437869822485207e-05,
"loss": 1.605,
"step": 1900
},
{
"epoch": 2.31,
"learning_rate": 4.423076923076923e-05,
"loss": 1.6058,
"step": 1950
},
{
"epoch": 2.37,
"learning_rate": 4.408284023668639e-05,
"loss": 1.6,
"step": 2000
},
{
"epoch": 2.43,
"learning_rate": 4.393491124260355e-05,
"loss": 1.597,
"step": 2050
},
{
"epoch": 2.49,
"learning_rate": 4.378698224852072e-05,
"loss": 1.5985,
"step": 2100
},
{
"epoch": 2.54,
"learning_rate": 4.3639053254437874e-05,
"loss": 1.5941,
"step": 2150
},
{
"epoch": 2.6,
"learning_rate": 4.349112426035503e-05,
"loss": 1.5953,
"step": 2200
},
{
"epoch": 2.66,
"learning_rate": 4.334319526627219e-05,
"loss": 1.5922,
"step": 2250
},
{
"epoch": 2.72,
"learning_rate": 4.319526627218935e-05,
"loss": 1.5908,
"step": 2300
},
{
"epoch": 2.78,
"learning_rate": 4.304733727810651e-05,
"loss": 1.5827,
"step": 2350
},
{
"epoch": 2.84,
"learning_rate": 4.289940828402367e-05,
"loss": 1.585,
"step": 2400
},
{
"epoch": 2.9,
"learning_rate": 4.275147928994083e-05,
"loss": 1.5853,
"step": 2450
},
{
"epoch": 2.96,
"learning_rate": 4.260355029585799e-05,
"loss": 1.5837,
"step": 2500
},
{
"epoch": 3.02,
"learning_rate": 4.245562130177515e-05,
"loss": 1.5704,
"step": 2550
},
{
"epoch": 3.08,
"learning_rate": 4.230769230769231e-05,
"loss": 1.5664,
"step": 2600
},
{
"epoch": 3.14,
"learning_rate": 4.215976331360947e-05,
"loss": 1.5649,
"step": 2650
},
{
"epoch": 3.2,
"learning_rate": 4.201183431952663e-05,
"loss": 1.5639,
"step": 2700
},
{
"epoch": 3.25,
"learning_rate": 4.1863905325443785e-05,
"loss": 1.5623,
"step": 2750
},
{
"epoch": 3.31,
"learning_rate": 4.171597633136095e-05,
"loss": 1.5616,
"step": 2800
},
{
"epoch": 3.37,
"learning_rate": 4.156804733727811e-05,
"loss": 1.5603,
"step": 2850
},
{
"epoch": 3.43,
"learning_rate": 4.142011834319527e-05,
"loss": 1.56,
"step": 2900
},
{
"epoch": 3.49,
"learning_rate": 4.1272189349112426e-05,
"loss": 1.5552,
"step": 2950
},
{
"epoch": 3.55,
"learning_rate": 4.112426035502959e-05,
"loss": 1.5563,
"step": 3000
},
{
"epoch": 3.61,
"learning_rate": 4.097633136094675e-05,
"loss": 1.5531,
"step": 3050
},
{
"epoch": 3.67,
"learning_rate": 4.0828402366863904e-05,
"loss": 1.5525,
"step": 3100
},
{
"epoch": 3.73,
"learning_rate": 4.068047337278107e-05,
"loss": 1.5487,
"step": 3150
},
{
"epoch": 3.79,
"learning_rate": 4.053254437869823e-05,
"loss": 1.557,
"step": 3200
},
{
"epoch": 3.85,
"learning_rate": 4.038461538461539e-05,
"loss": 1.5508,
"step": 3250
},
{
"epoch": 3.91,
"learning_rate": 4.0236686390532545e-05,
"loss": 1.5467,
"step": 3300
},
{
"epoch": 3.96,
"learning_rate": 4.00887573964497e-05,
"loss": 1.546,
"step": 3350
},
{
"epoch": 4.02,
"learning_rate": 3.9940828402366866e-05,
"loss": 1.5368,
"step": 3400
},
{
"epoch": 4.08,
"learning_rate": 3.979289940828402e-05,
"loss": 1.5326,
"step": 3450
},
{
"epoch": 4.14,
"learning_rate": 3.964497041420119e-05,
"loss": 1.5352,
"step": 3500
},
{
"epoch": 4.2,
"learning_rate": 3.9497041420118344e-05,
"loss": 1.5311,
"step": 3550
},
{
"epoch": 4.26,
"learning_rate": 3.934911242603551e-05,
"loss": 1.5299,
"step": 3600
},
{
"epoch": 4.32,
"learning_rate": 3.9201183431952664e-05,
"loss": 1.5301,
"step": 3650
},
{
"epoch": 4.38,
"learning_rate": 3.905325443786982e-05,
"loss": 1.5334,
"step": 3700
},
{
"epoch": 4.44,
"learning_rate": 3.8905325443786985e-05,
"loss": 1.5256,
"step": 3750
},
{
"epoch": 4.5,
"learning_rate": 3.875739644970414e-05,
"loss": 1.5246,
"step": 3800
},
{
"epoch": 4.56,
"learning_rate": 3.86094674556213e-05,
"loss": 1.5305,
"step": 3850
},
{
"epoch": 4.62,
"learning_rate": 3.846153846153846e-05,
"loss": 1.5234,
"step": 3900
},
{
"epoch": 4.67,
"learning_rate": 3.8313609467455627e-05,
"loss": 1.5262,
"step": 3950
},
{
"epoch": 4.73,
"learning_rate": 3.8165680473372784e-05,
"loss": 1.5267,
"step": 4000
},
{
"epoch": 4.79,
"learning_rate": 3.801775147928994e-05,
"loss": 1.5209,
"step": 4050
},
{
"epoch": 4.85,
"learning_rate": 3.7869822485207104e-05,
"loss": 1.5196,
"step": 4100
},
{
"epoch": 4.91,
"learning_rate": 3.772189349112426e-05,
"loss": 1.5222,
"step": 4150
},
{
"epoch": 4.97,
"learning_rate": 3.757396449704142e-05,
"loss": 1.5196,
"step": 4200
},
{
"epoch": 5.03,
"learning_rate": 3.742603550295858e-05,
"loss": 1.5127,
"step": 4250
},
{
"epoch": 5.09,
"learning_rate": 3.7278106508875746e-05,
"loss": 1.5052,
"step": 4300
},
{
"epoch": 5.15,
"learning_rate": 3.71301775147929e-05,
"loss": 1.5066,
"step": 4350
},
{
"epoch": 5.21,
"learning_rate": 3.698224852071006e-05,
"loss": 1.5059,
"step": 4400
},
{
"epoch": 5.27,
"learning_rate": 3.6834319526627223e-05,
"loss": 1.5123,
"step": 4450
},
{
"epoch": 5.33,
"learning_rate": 3.668639053254438e-05,
"loss": 1.5058,
"step": 4500
},
{
"epoch": 5.38,
"learning_rate": 3.653846153846154e-05,
"loss": 1.5025,
"step": 4550
},
{
"epoch": 5.44,
"learning_rate": 3.63905325443787e-05,
"loss": 1.5006,
"step": 4600
},
{
"epoch": 5.5,
"learning_rate": 3.6242603550295865e-05,
"loss": 1.5022,
"step": 4650
},
{
"epoch": 5.56,
"learning_rate": 3.609467455621302e-05,
"loss": 1.5061,
"step": 4700
},
{
"epoch": 5.62,
"learning_rate": 3.594674556213018e-05,
"loss": 1.5057,
"step": 4750
},
{
"epoch": 5.68,
"learning_rate": 3.5798816568047336e-05,
"loss": 1.5022,
"step": 4800
},
{
"epoch": 5.74,
"learning_rate": 3.56508875739645e-05,
"loss": 1.5022,
"step": 4850
},
{
"epoch": 5.8,
"learning_rate": 3.5502958579881656e-05,
"loss": 1.5007,
"step": 4900
},
{
"epoch": 5.86,
"learning_rate": 3.5355029585798813e-05,
"loss": 1.4965,
"step": 4950
},
{
"epoch": 5.92,
"learning_rate": 3.520710059171598e-05,
"loss": 1.5002,
"step": 5000
},
{
"epoch": 5.98,
"learning_rate": 3.505917159763314e-05,
"loss": 1.4991,
"step": 5050
},
{
"epoch": 6.04,
"learning_rate": 3.49112426035503e-05,
"loss": 1.4943,
"step": 5100
},
{
"epoch": 6.09,
"learning_rate": 3.4763313609467455e-05,
"loss": 1.486,
"step": 5150
},
{
"epoch": 6.15,
"learning_rate": 3.461538461538462e-05,
"loss": 1.486,
"step": 5200
},
{
"epoch": 6.21,
"learning_rate": 3.4467455621301776e-05,
"loss": 1.4871,
"step": 5250
},
{
"epoch": 6.27,
"learning_rate": 3.431952662721893e-05,
"loss": 1.4844,
"step": 5300
},
{
"epoch": 6.33,
"learning_rate": 3.4171597633136096e-05,
"loss": 1.4845,
"step": 5350
},
{
"epoch": 6.39,
"learning_rate": 3.402366863905326e-05,
"loss": 1.4859,
"step": 5400
},
{
"epoch": 6.45,
"learning_rate": 3.387573964497042e-05,
"loss": 1.4872,
"step": 5450
},
{
"epoch": 6.51,
"learning_rate": 3.3727810650887574e-05,
"loss": 1.4816,
"step": 5500
},
{
"epoch": 6.57,
"learning_rate": 3.357988165680474e-05,
"loss": 1.4868,
"step": 5550
},
{
"epoch": 6.63,
"learning_rate": 3.3431952662721895e-05,
"loss": 1.4816,
"step": 5600
},
{
"epoch": 6.69,
"learning_rate": 3.328402366863905e-05,
"loss": 1.4838,
"step": 5650
},
{
"epoch": 6.75,
"learning_rate": 3.3136094674556215e-05,
"loss": 1.4808,
"step": 5700
},
{
"epoch": 6.8,
"learning_rate": 3.298816568047338e-05,
"loss": 1.4821,
"step": 5750
},
{
"epoch": 6.86,
"learning_rate": 3.2840236686390536e-05,
"loss": 1.4828,
"step": 5800
},
{
"epoch": 6.92,
"learning_rate": 3.269230769230769e-05,
"loss": 1.4768,
"step": 5850
},
{
"epoch": 6.98,
"learning_rate": 3.254437869822485e-05,
"loss": 1.4822,
"step": 5900
},
{
"epoch": 7.04,
"learning_rate": 3.2396449704142014e-05,
"loss": 1.4681,
"step": 5950
},
{
"epoch": 7.1,
"learning_rate": 3.224852071005917e-05,
"loss": 1.4655,
"step": 6000
},
{
"epoch": 7.16,
"learning_rate": 3.210059171597633e-05,
"loss": 1.4679,
"step": 6050
},
{
"epoch": 7.22,
"learning_rate": 3.195266272189349e-05,
"loss": 1.4687,
"step": 6100
},
{
"epoch": 7.28,
"learning_rate": 3.1804733727810655e-05,
"loss": 1.4695,
"step": 6150
},
{
"epoch": 7.34,
"learning_rate": 3.165680473372781e-05,
"loss": 1.4684,
"step": 6200
},
{
"epoch": 7.4,
"learning_rate": 3.150887573964497e-05,
"loss": 1.4689,
"step": 6250
},
{
"epoch": 7.46,
"learning_rate": 3.136094674556213e-05,
"loss": 1.4684,
"step": 6300
},
{
"epoch": 7.51,
"learning_rate": 3.121301775147929e-05,
"loss": 1.4662,
"step": 6350
},
{
"epoch": 7.57,
"learning_rate": 3.106508875739645e-05,
"loss": 1.4654,
"step": 6400
},
{
"epoch": 7.63,
"learning_rate": 3.091715976331361e-05,
"loss": 1.4662,
"step": 6450
},
{
"epoch": 7.69,
"learning_rate": 3.0769230769230774e-05,
"loss": 1.4669,
"step": 6500
},
{
"epoch": 7.75,
"learning_rate": 3.062130177514793e-05,
"loss": 1.4671,
"step": 6550
},
{
"epoch": 7.81,
"learning_rate": 3.047337278106509e-05,
"loss": 1.4647,
"step": 6600
},
{
"epoch": 7.87,
"learning_rate": 3.032544378698225e-05,
"loss": 1.4669,
"step": 6650
},
{
"epoch": 7.93,
"learning_rate": 3.017751479289941e-05,
"loss": 1.4637,
"step": 6700
},
{
"epoch": 7.99,
"learning_rate": 3.0029585798816566e-05,
"loss": 1.4706,
"step": 6750
},
{
"epoch": 8.05,
"learning_rate": 2.9881656804733733e-05,
"loss": 1.4526,
"step": 6800
},
{
"epoch": 8.11,
"learning_rate": 2.973372781065089e-05,
"loss": 1.4544,
"step": 6850
},
{
"epoch": 8.17,
"learning_rate": 2.958579881656805e-05,
"loss": 1.4539,
"step": 6900
},
{
"epoch": 8.22,
"learning_rate": 2.9437869822485207e-05,
"loss": 1.4521,
"step": 6950
},
{
"epoch": 8.28,
"learning_rate": 2.9289940828402368e-05,
"loss": 1.4562,
"step": 7000
},
{
"epoch": 8.34,
"learning_rate": 2.9142011834319528e-05,
"loss": 1.4511,
"step": 7050
},
{
"epoch": 8.4,
"learning_rate": 2.8994082840236685e-05,
"loss": 1.4522,
"step": 7100
},
{
"epoch": 8.46,
"learning_rate": 2.8846153846153845e-05,
"loss": 1.4518,
"step": 7150
},
{
"epoch": 8.52,
"learning_rate": 2.869822485207101e-05,
"loss": 1.4501,
"step": 7200
},
{
"epoch": 8.58,
"learning_rate": 2.855029585798817e-05,
"loss": 1.4577,
"step": 7250
},
{
"epoch": 8.64,
"learning_rate": 2.8402366863905327e-05,
"loss": 1.4475,
"step": 7300
},
{
"epoch": 8.7,
"learning_rate": 2.8254437869822487e-05,
"loss": 1.4528,
"step": 7350
},
{
"epoch": 8.76,
"learning_rate": 2.8106508875739644e-05,
"loss": 1.4495,
"step": 7400
},
{
"epoch": 8.82,
"learning_rate": 2.7958579881656804e-05,
"loss": 1.4552,
"step": 7450
},
{
"epoch": 8.88,
"learning_rate": 2.7810650887573965e-05,
"loss": 1.4544,
"step": 7500
},
{
"epoch": 8.93,
"learning_rate": 2.766272189349113e-05,
"loss": 1.4512,
"step": 7550
},
{
"epoch": 8.99,
"learning_rate": 2.751479289940829e-05,
"loss": 1.4531,
"step": 7600
},
{
"epoch": 9.05,
"learning_rate": 2.7366863905325446e-05,
"loss": 1.4462,
"step": 7650
},
{
"epoch": 9.11,
"learning_rate": 2.7218934911242606e-05,
"loss": 1.4399,
"step": 7700
},
{
"epoch": 9.17,
"learning_rate": 2.7071005917159763e-05,
"loss": 1.4394,
"step": 7750
},
{
"epoch": 9.23,
"learning_rate": 2.6923076923076923e-05,
"loss": 1.4366,
"step": 7800
},
{
"epoch": 9.29,
"learning_rate": 2.6775147928994084e-05,
"loss": 1.4416,
"step": 7850
},
{
"epoch": 9.35,
"learning_rate": 2.6627218934911247e-05,
"loss": 1.4405,
"step": 7900
},
{
"epoch": 9.41,
"learning_rate": 2.6479289940828404e-05,
"loss": 1.4387,
"step": 7950
},
{
"epoch": 9.47,
"learning_rate": 2.6331360946745565e-05,
"loss": 1.4418,
"step": 8000
},
{
"epoch": 9.53,
"learning_rate": 2.6183431952662725e-05,
"loss": 1.4351,
"step": 8050
},
{
"epoch": 9.59,
"learning_rate": 2.6035502958579882e-05,
"loss": 1.4399,
"step": 8100
},
{
"epoch": 9.64,
"learning_rate": 2.5887573964497042e-05,
"loss": 1.4389,
"step": 8150
},
{
"epoch": 9.7,
"learning_rate": 2.57396449704142e-05,
"loss": 1.4382,
"step": 8200
},
{
"epoch": 9.76,
"learning_rate": 2.559171597633136e-05,
"loss": 1.4391,
"step": 8250
},
{
"epoch": 9.82,
"learning_rate": 2.5443786982248524e-05,
"loss": 1.4429,
"step": 8300
},
{
"epoch": 9.88,
"learning_rate": 2.5295857988165684e-05,
"loss": 1.4393,
"step": 8350
},
{
"epoch": 9.94,
"learning_rate": 2.514792899408284e-05,
"loss": 1.4407,
"step": 8400
},
{
"epoch": 10.0,
"learning_rate": 2.5e-05,
"loss": 1.4418,
"step": 8450
},
{
"epoch": 10.06,
"learning_rate": 2.485207100591716e-05,
"loss": 1.4246,
"step": 8500
},
{
"epoch": 10.12,
"learning_rate": 2.4704142011834322e-05,
"loss": 1.4319,
"step": 8550
},
{
"epoch": 10.18,
"learning_rate": 2.4556213017751482e-05,
"loss": 1.4263,
"step": 8600
},
{
"epoch": 10.24,
"learning_rate": 2.440828402366864e-05,
"loss": 1.4334,
"step": 8650
},
{
"epoch": 10.3,
"learning_rate": 2.42603550295858e-05,
"loss": 1.4287,
"step": 8700
},
{
"epoch": 10.36,
"learning_rate": 2.411242603550296e-05,
"loss": 1.4289,
"step": 8750
},
{
"epoch": 10.41,
"learning_rate": 2.396449704142012e-05,
"loss": 1.4285,
"step": 8800
},
{
"epoch": 10.47,
"learning_rate": 2.3816568047337277e-05,
"loss": 1.4286,
"step": 8850
},
{
"epoch": 10.53,
"learning_rate": 2.3668639053254438e-05,
"loss": 1.4273,
"step": 8900
},
{
"epoch": 10.59,
"learning_rate": 2.3520710059171598e-05,
"loss": 1.4291,
"step": 8950
},
{
"epoch": 10.65,
"learning_rate": 2.337278106508876e-05,
"loss": 1.4268,
"step": 9000
},
{
"epoch": 10.71,
"learning_rate": 2.322485207100592e-05,
"loss": 1.4302,
"step": 9050
},
{
"epoch": 10.77,
"learning_rate": 2.307692307692308e-05,
"loss": 1.4313,
"step": 9100
},
{
"epoch": 10.83,
"learning_rate": 2.292899408284024e-05,
"loss": 1.4274,
"step": 9150
},
{
"epoch": 10.89,
"learning_rate": 2.2781065088757396e-05,
"loss": 1.4286,
"step": 9200
},
{
"epoch": 10.95,
"learning_rate": 2.2633136094674557e-05,
"loss": 1.4278,
"step": 9250
},
{
"epoch": 11.01,
"learning_rate": 2.2485207100591717e-05,
"loss": 1.4265,
"step": 9300
},
{
"epoch": 11.07,
"learning_rate": 2.2337278106508877e-05,
"loss": 1.4156,
"step": 9350
},
{
"epoch": 11.12,
"learning_rate": 2.2189349112426034e-05,
"loss": 1.4174,
"step": 9400
},
{
"epoch": 11.18,
"learning_rate": 2.2041420118343195e-05,
"loss": 1.4161,
"step": 9450
},
{
"epoch": 11.24,
"learning_rate": 2.189349112426036e-05,
"loss": 1.4216,
"step": 9500
},
{
"epoch": 11.3,
"learning_rate": 2.1745562130177516e-05,
"loss": 1.4167,
"step": 9550
},
{
"epoch": 11.36,
"learning_rate": 2.1597633136094676e-05,
"loss": 1.4188,
"step": 9600
},
{
"epoch": 11.42,
"learning_rate": 2.1449704142011836e-05,
"loss": 1.4242,
"step": 9650
},
{
"epoch": 11.48,
"learning_rate": 2.1301775147928997e-05,
"loss": 1.4177,
"step": 9700
},
{
"epoch": 11.54,
"learning_rate": 2.1153846153846154e-05,
"loss": 1.4188,
"step": 9750
},
{
"epoch": 11.6,
"learning_rate": 2.1005917159763314e-05,
"loss": 1.4202,
"step": 9800
},
{
"epoch": 11.66,
"learning_rate": 2.0857988165680474e-05,
"loss": 1.4209,
"step": 9850
},
{
"epoch": 11.72,
"learning_rate": 2.0710059171597635e-05,
"loss": 1.4155,
"step": 9900
},
{
"epoch": 11.78,
"learning_rate": 2.0562130177514795e-05,
"loss": 1.4214,
"step": 9950
},
{
"epoch": 11.83,
"learning_rate": 2.0414201183431952e-05,
"loss": 1.4201,
"step": 10000
},
{
"epoch": 11.89,
"learning_rate": 2.0266272189349116e-05,
"loss": 1.4175,
"step": 10050
},
{
"epoch": 11.95,
"learning_rate": 2.0118343195266273e-05,
"loss": 1.4171,
"step": 10100
},
{
"epoch": 12.01,
"learning_rate": 1.9970414201183433e-05,
"loss": 1.4188,
"step": 10150
},
{
"epoch": 12.07,
"learning_rate": 1.9822485207100593e-05,
"loss": 1.4123,
"step": 10200
},
{
"epoch": 12.13,
"learning_rate": 1.9674556213017754e-05,
"loss": 1.4079,
"step": 10250
},
{
"epoch": 12.19,
"learning_rate": 1.952662721893491e-05,
"loss": 1.4057,
"step": 10300
},
{
"epoch": 12.25,
"learning_rate": 1.937869822485207e-05,
"loss": 1.4053,
"step": 10350
},
{
"epoch": 12.31,
"learning_rate": 1.923076923076923e-05,
"loss": 1.41,
"step": 10400
},
{
"epoch": 12.37,
"learning_rate": 1.9082840236686392e-05,
"loss": 1.4113,
"step": 10450
},
{
"epoch": 12.43,
"learning_rate": 1.8934911242603552e-05,
"loss": 1.4073,
"step": 10500
},
{
"epoch": 12.49,
"learning_rate": 1.878698224852071e-05,
"loss": 1.416,
"step": 10550
},
{
"epoch": 12.54,
"learning_rate": 1.8639053254437873e-05,
"loss": 1.4113,
"step": 10600
},
{
"epoch": 12.6,
"learning_rate": 1.849112426035503e-05,
"loss": 1.4095,
"step": 10650
},
{
"epoch": 12.66,
"learning_rate": 1.834319526627219e-05,
"loss": 1.4056,
"step": 10700
},
{
"epoch": 12.72,
"learning_rate": 1.819526627218935e-05,
"loss": 1.4081,
"step": 10750
},
{
"epoch": 12.78,
"learning_rate": 1.804733727810651e-05,
"loss": 1.4123,
"step": 10800
},
{
"epoch": 12.84,
"learning_rate": 1.7899408284023668e-05,
"loss": 1.4106,
"step": 10850
},
{
"epoch": 12.9,
"learning_rate": 1.7751479289940828e-05,
"loss": 1.4123,
"step": 10900
},
{
"epoch": 12.96,
"learning_rate": 1.760355029585799e-05,
"loss": 1.4109,
"step": 10950
},
{
"epoch": 13.02,
"learning_rate": 1.745562130177515e-05,
"loss": 1.4044,
"step": 11000
},
{
"epoch": 13.08,
"learning_rate": 1.730769230769231e-05,
"loss": 1.4006,
"step": 11050
},
{
"epoch": 13.14,
"learning_rate": 1.7159763313609466e-05,
"loss": 1.4004,
"step": 11100
},
{
"epoch": 13.2,
"learning_rate": 1.701183431952663e-05,
"loss": 1.3985,
"step": 11150
},
{
"epoch": 13.25,
"learning_rate": 1.6863905325443787e-05,
"loss": 1.4041,
"step": 11200
},
{
"epoch": 13.31,
"learning_rate": 1.6715976331360947e-05,
"loss": 1.4045,
"step": 11250
},
{
"epoch": 13.37,
"learning_rate": 1.6568047337278108e-05,
"loss": 1.4003,
"step": 11300
},
{
"epoch": 13.43,
"learning_rate": 1.6420118343195268e-05,
"loss": 1.3994,
"step": 11350
},
{
"epoch": 13.49,
"learning_rate": 1.6272189349112425e-05,
"loss": 1.403,
"step": 11400
},
{
"epoch": 13.55,
"learning_rate": 1.6124260355029585e-05,
"loss": 1.4027,
"step": 11450
},
{
"epoch": 13.61,
"learning_rate": 1.5976331360946746e-05,
"loss": 1.4023,
"step": 11500
},
{
"epoch": 13.67,
"learning_rate": 1.5828402366863906e-05,
"loss": 1.4011,
"step": 11550
},
{
"epoch": 13.73,
"learning_rate": 1.5680473372781066e-05,
"loss": 1.3999,
"step": 11600
},
{
"epoch": 13.79,
"learning_rate": 1.5532544378698223e-05,
"loss": 1.4028,
"step": 11650
},
{
"epoch": 13.85,
"learning_rate": 1.5384615384615387e-05,
"loss": 1.404,
"step": 11700
},
{
"epoch": 13.91,
"learning_rate": 1.5236686390532546e-05,
"loss": 1.4037,
"step": 11750
},
{
"epoch": 13.96,
"learning_rate": 1.5088757396449705e-05,
"loss": 1.4025,
"step": 11800
},
{
"epoch": 14.02,
"learning_rate": 1.4940828402366867e-05,
"loss": 1.4015,
"step": 11850
},
{
"epoch": 14.08,
"learning_rate": 1.4792899408284025e-05,
"loss": 1.3906,
"step": 11900
},
{
"epoch": 14.14,
"learning_rate": 1.4644970414201184e-05,
"loss": 1.3972,
"step": 11950
},
{
"epoch": 14.2,
"learning_rate": 1.4497041420118343e-05,
"loss": 1.3938,
"step": 12000
},
{
"epoch": 14.26,
"learning_rate": 1.4349112426035505e-05,
"loss": 1.3925,
"step": 12050
},
{
"epoch": 14.32,
"learning_rate": 1.4201183431952663e-05,
"loss": 1.3954,
"step": 12100
},
{
"epoch": 14.38,
"learning_rate": 1.4053254437869822e-05,
"loss": 1.3917,
"step": 12150
},
{
"epoch": 14.44,
"learning_rate": 1.3905325443786982e-05,
"loss": 1.3955,
"step": 12200
},
{
"epoch": 14.5,
"learning_rate": 1.3757396449704144e-05,
"loss": 1.3981,
"step": 12250
},
{
"epoch": 14.56,
"learning_rate": 1.3609467455621303e-05,
"loss": 1.3969,
"step": 12300
},
{
"epoch": 14.62,
"learning_rate": 1.3461538461538462e-05,
"loss": 1.3936,
"step": 12350
},
{
"epoch": 14.67,
"learning_rate": 1.3313609467455624e-05,
"loss": 1.3959,
"step": 12400
},
{
"epoch": 14.73,
"learning_rate": 1.3165680473372782e-05,
"loss": 1.3921,
"step": 12450
},
{
"epoch": 14.79,
"learning_rate": 1.3017751479289941e-05,
"loss": 1.396,
"step": 12500
},
{
"epoch": 14.85,
"learning_rate": 1.28698224852071e-05,
"loss": 1.4021,
"step": 12550
},
{
"epoch": 14.91,
"learning_rate": 1.2721893491124262e-05,
"loss": 1.3903,
"step": 12600
},
{
"epoch": 14.97,
"learning_rate": 1.257396449704142e-05,
"loss": 1.3984,
"step": 12650
},
{
"epoch": 15.03,
"learning_rate": 1.242603550295858e-05,
"loss": 1.3901,
"step": 12700
},
{
"epoch": 15.09,
"learning_rate": 1.2278106508875741e-05,
"loss": 1.3892,
"step": 12750
},
{
"epoch": 15.15,
"learning_rate": 1.21301775147929e-05,
"loss": 1.3915,
"step": 12800
},
{
"epoch": 15.21,
"learning_rate": 1.198224852071006e-05,
"loss": 1.3855,
"step": 12850
},
{
"epoch": 15.27,
"learning_rate": 1.1834319526627219e-05,
"loss": 1.3946,
"step": 12900
},
{
"epoch": 15.33,
"learning_rate": 1.168639053254438e-05,
"loss": 1.3925,
"step": 12950
},
{
"epoch": 15.38,
"learning_rate": 1.153846153846154e-05,
"loss": 1.3904,
"step": 13000
},
{
"epoch": 15.44,
"learning_rate": 1.1390532544378698e-05,
"loss": 1.3887,
"step": 13050
},
{
"epoch": 15.5,
"learning_rate": 1.1242603550295859e-05,
"loss": 1.3902,
"step": 13100
},
{
"epoch": 15.56,
"learning_rate": 1.1094674556213017e-05,
"loss": 1.3915,
"step": 13150
},
{
"epoch": 15.62,
"learning_rate": 1.094674556213018e-05,
"loss": 1.3895,
"step": 13200
},
{
"epoch": 15.68,
"learning_rate": 1.0798816568047338e-05,
"loss": 1.3852,
"step": 13250
},
{
"epoch": 15.74,
"learning_rate": 1.0650887573964498e-05,
"loss": 1.3863,
"step": 13300
},
{
"epoch": 15.8,
"learning_rate": 1.0502958579881657e-05,
"loss": 1.3872,
"step": 13350
},
{
"epoch": 15.86,
"learning_rate": 1.0355029585798817e-05,
"loss": 1.3875,
"step": 13400
},
{
"epoch": 15.92,
"learning_rate": 1.0207100591715976e-05,
"loss": 1.3919,
"step": 13450
},
{
"epoch": 15.98,
"learning_rate": 1.0059171597633136e-05,
"loss": 1.3883,
"step": 13500
},
{
"epoch": 16.04,
"learning_rate": 9.911242603550297e-06,
"loss": 1.3864,
"step": 13550
},
{
"epoch": 16.09,
"learning_rate": 9.763313609467455e-06,
"loss": 1.3801,
"step": 13600
},
{
"epoch": 16.15,
"learning_rate": 9.615384615384616e-06,
"loss": 1.3839,
"step": 13650
},
{
"epoch": 16.21,
"learning_rate": 9.467455621301776e-06,
"loss": 1.3796,
"step": 13700
},
{
"epoch": 16.27,
"learning_rate": 9.319526627218936e-06,
"loss": 1.3852,
"step": 13750
},
{
"epoch": 16.33,
"learning_rate": 9.171597633136095e-06,
"loss": 1.384,
"step": 13800
},
{
"epoch": 16.39,
"learning_rate": 9.023668639053255e-06,
"loss": 1.3887,
"step": 13850
},
{
"epoch": 16.45,
"learning_rate": 8.875739644970414e-06,
"loss": 1.3861,
"step": 13900
},
{
"epoch": 16.51,
"learning_rate": 8.727810650887574e-06,
"loss": 1.3819,
"step": 13950
},
{
"epoch": 16.57,
"learning_rate": 8.579881656804733e-06,
"loss": 1.3845,
"step": 14000
},
{
"epoch": 16.63,
"learning_rate": 8.431952662721893e-06,
"loss": 1.3875,
"step": 14050
},
{
"epoch": 16.69,
"learning_rate": 8.284023668639054e-06,
"loss": 1.3842,
"step": 14100
},
{
"epoch": 16.75,
"learning_rate": 8.136094674556213e-06,
"loss": 1.3803,
"step": 14150
},
{
"epoch": 16.8,
"learning_rate": 7.988165680473373e-06,
"loss": 1.3822,
"step": 14200
},
{
"epoch": 16.86,
"learning_rate": 7.840236686390533e-06,
"loss": 1.3879,
"step": 14250
},
{
"epoch": 16.92,
"learning_rate": 7.692307692307694e-06,
"loss": 1.3829,
"step": 14300
},
{
"epoch": 16.98,
"learning_rate": 7.544378698224852e-06,
"loss": 1.3861,
"step": 14350
},
{
"epoch": 17.04,
"learning_rate": 7.396449704142013e-06,
"loss": 1.3802,
"step": 14400
},
{
"epoch": 17.1,
"learning_rate": 7.248520710059171e-06,
"loss": 1.3734,
"step": 14450
},
{
"epoch": 17.16,
"learning_rate": 7.100591715976332e-06,
"loss": 1.3826,
"step": 14500
},
{
"epoch": 17.22,
"learning_rate": 6.952662721893491e-06,
"loss": 1.3858,
"step": 14550
},
{
"epoch": 17.28,
"learning_rate": 6.8047337278106515e-06,
"loss": 1.3781,
"step": 14600
},
{
"epoch": 17.34,
"learning_rate": 6.656804733727812e-06,
"loss": 1.3831,
"step": 14650
},
{
"epoch": 17.4,
"learning_rate": 6.5088757396449705e-06,
"loss": 1.3755,
"step": 14700
},
{
"epoch": 17.46,
"learning_rate": 6.360946745562131e-06,
"loss": 1.3841,
"step": 14750
},
{
"epoch": 17.51,
"learning_rate": 6.21301775147929e-06,
"loss": 1.3796,
"step": 14800
},
{
"epoch": 17.57,
"learning_rate": 6.06508875739645e-06,
"loss": 1.3799,
"step": 14850
},
{
"epoch": 17.63,
"learning_rate": 5.917159763313609e-06,
"loss": 1.3814,
"step": 14900
},
{
"epoch": 17.69,
"learning_rate": 5.76923076923077e-06,
"loss": 1.3783,
"step": 14950
},
{
"epoch": 17.75,
"learning_rate": 5.621301775147929e-06,
"loss": 1.378,
"step": 15000
},
{
"epoch": 17.81,
"learning_rate": 5.47337278106509e-06,
"loss": 1.379,
"step": 15050
},
{
"epoch": 17.87,
"learning_rate": 5.325443786982249e-06,
"loss": 1.3806,
"step": 15100
},
{
"epoch": 17.93,
"learning_rate": 5.177514792899409e-06,
"loss": 1.3806,
"step": 15150
},
{
"epoch": 17.99,
"learning_rate": 5.029585798816568e-06,
"loss": 1.3809,
"step": 15200
},
{
"epoch": 18.05,
"learning_rate": 4.881656804733728e-06,
"loss": 1.3748,
"step": 15250
},
{
"epoch": 18.11,
"learning_rate": 4.733727810650888e-06,
"loss": 1.38,
"step": 15300
},
{
"epoch": 18.17,
"learning_rate": 4.5857988165680475e-06,
"loss": 1.3729,
"step": 15350
},
{
"epoch": 18.22,
"learning_rate": 4.437869822485207e-06,
"loss": 1.3798,
"step": 15400
},
{
"epoch": 18.28,
"learning_rate": 4.2899408284023666e-06,
"loss": 1.3779,
"step": 15450
},
{
"epoch": 18.34,
"learning_rate": 4.142011834319527e-06,
"loss": 1.377,
"step": 15500
},
{
"epoch": 18.4,
"learning_rate": 3.9940828402366864e-06,
"loss": 1.3722,
"step": 15550
},
{
"epoch": 18.46,
"learning_rate": 3.846153846153847e-06,
"loss": 1.377,
"step": 15600
},
{
"epoch": 18.52,
"learning_rate": 3.6982248520710063e-06,
"loss": 1.3774,
"step": 15650
},
{
"epoch": 18.58,
"learning_rate": 3.550295857988166e-06,
"loss": 1.3803,
"step": 15700
},
{
"epoch": 18.64,
"learning_rate": 3.4023668639053257e-06,
"loss": 1.3754,
"step": 15750
},
{
"epoch": 18.7,
"learning_rate": 3.2544378698224853e-06,
"loss": 1.3825,
"step": 15800
},
{
"epoch": 18.76,
"learning_rate": 3.106508875739645e-06,
"loss": 1.3737,
"step": 15850
},
{
"epoch": 18.82,
"learning_rate": 2.9585798816568047e-06,
"loss": 1.3765,
"step": 15900
},
{
"epoch": 18.88,
"learning_rate": 2.8106508875739646e-06,
"loss": 1.3763,
"step": 15950
},
{
"epoch": 18.93,
"learning_rate": 2.6627218934911246e-06,
"loss": 1.3733,
"step": 16000
},
{
"epoch": 18.99,
"learning_rate": 2.514792899408284e-06,
"loss": 1.3763,
"step": 16050
},
{
"epoch": 19.05,
"learning_rate": 2.366863905325444e-06,
"loss": 1.3739,
"step": 16100
},
{
"epoch": 19.11,
"learning_rate": 2.2189349112426035e-06,
"loss": 1.3749,
"step": 16150
},
{
"epoch": 19.17,
"learning_rate": 2.0710059171597635e-06,
"loss": 1.3695,
"step": 16200
},
{
"epoch": 19.23,
"learning_rate": 1.9230769230769234e-06,
"loss": 1.3725,
"step": 16250
},
{
"epoch": 19.29,
"learning_rate": 1.775147928994083e-06,
"loss": 1.373,
"step": 16300
},
{
"epoch": 19.35,
"learning_rate": 1.6272189349112426e-06,
"loss": 1.3751,
"step": 16350
},
{
"epoch": 19.41,
"learning_rate": 1.4792899408284024e-06,
"loss": 1.375,
"step": 16400
},
{
"epoch": 19.47,
"learning_rate": 1.3313609467455623e-06,
"loss": 1.3767,
"step": 16450
},
{
"epoch": 19.53,
"learning_rate": 1.183431952662722e-06,
"loss": 1.369,
"step": 16500
},
{
"epoch": 19.59,
"learning_rate": 1.0355029585798817e-06,
"loss": 1.3773,
"step": 16550
},
{
"epoch": 19.64,
"learning_rate": 8.875739644970415e-07,
"loss": 1.3749,
"step": 16600
},
{
"epoch": 19.7,
"learning_rate": 7.396449704142012e-07,
"loss": 1.3725,
"step": 16650
},
{
"epoch": 19.76,
"learning_rate": 5.91715976331361e-07,
"loss": 1.374,
"step": 16700
},
{
"epoch": 19.82,
"learning_rate": 4.4378698224852073e-07,
"loss": 1.3735,
"step": 16750
},
{
"epoch": 19.88,
"learning_rate": 2.958579881656805e-07,
"loss": 1.3759,
"step": 16800
},
{
"epoch": 19.94,
"learning_rate": 1.4792899408284025e-07,
"loss": 1.3747,
"step": 16850
},
{
"epoch": 20.0,
"learning_rate": 0.0,
"loss": 1.3793,
"step": 16900
},
{
"epoch": 20.0,
"step": 16900,
"total_flos": 6.27801669500928e+16,
"train_loss": 1.4740209875163242,
"train_runtime": 8294.5138,
"train_samples_per_second": 260.678,
"train_steps_per_second": 2.037
}
],
"max_steps": 16900,
"num_train_epochs": 20,
"total_flos": 6.27801669500928e+16,
"trial_name": null,
"trial_params": null
}