jasoneden's picture
Upload 10 files
e50f87b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 125600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4,
"learning_rate": 2.988296178343949e-05,
"loss": 7.6975,
"step": 500
},
{
"epoch": 0.8,
"learning_rate": 2.9763535031847134e-05,
"loss": 4.413,
"step": 1000
},
{
"epoch": 1.19,
"learning_rate": 2.964410828025478e-05,
"loss": 3.5707,
"step": 1500
},
{
"epoch": 1.59,
"learning_rate": 2.952468152866242e-05,
"loss": 3.4966,
"step": 2000
},
{
"epoch": 1.99,
"learning_rate": 2.9405254777070064e-05,
"loss": 3.7477,
"step": 2500
},
{
"epoch": 2.39,
"learning_rate": 2.9285828025477707e-05,
"loss": 3.4618,
"step": 3000
},
{
"epoch": 2.79,
"learning_rate": 2.916640127388535e-05,
"loss": 3.4774,
"step": 3500
},
{
"epoch": 3.18,
"learning_rate": 2.9046974522292994e-05,
"loss": 3.4214,
"step": 4000
},
{
"epoch": 3.58,
"learning_rate": 2.8927786624203823e-05,
"loss": 2.5773,
"step": 4500
},
{
"epoch": 3.98,
"learning_rate": 2.8808359872611467e-05,
"loss": 3.2549,
"step": 5000
},
{
"epoch": 4.38,
"learning_rate": 2.868893312101911e-05,
"loss": 2.4922,
"step": 5500
},
{
"epoch": 4.78,
"learning_rate": 2.856950636942675e-05,
"loss": 2.6262,
"step": 6000
},
{
"epoch": 5.18,
"learning_rate": 2.845031847133758e-05,
"loss": 2.5596,
"step": 6500
},
{
"epoch": 5.57,
"learning_rate": 2.8331130573248408e-05,
"loss": 2.7913,
"step": 7000
},
{
"epoch": 5.97,
"learning_rate": 2.821170382165605e-05,
"loss": 3.0112,
"step": 7500
},
{
"epoch": 6.37,
"learning_rate": 2.8092277070063698e-05,
"loss": 2.6159,
"step": 8000
},
{
"epoch": 6.77,
"learning_rate": 2.7972850318471338e-05,
"loss": 3.9984,
"step": 8500
},
{
"epoch": 7.17,
"learning_rate": 2.785342356687898e-05,
"loss": 2.6099,
"step": 9000
},
{
"epoch": 7.56,
"learning_rate": 2.7733996815286625e-05,
"loss": 2.5667,
"step": 9500
},
{
"epoch": 7.96,
"learning_rate": 2.7614570063694268e-05,
"loss": 2.5934,
"step": 10000
},
{
"epoch": 8.36,
"learning_rate": 2.7495382165605094e-05,
"loss": 2.7481,
"step": 10500
},
{
"epoch": 8.76,
"learning_rate": 2.737595541401274e-05,
"loss": 3.0303,
"step": 11000
},
{
"epoch": 9.16,
"learning_rate": 2.7256528662420384e-05,
"loss": 2.8187,
"step": 11500
},
{
"epoch": 9.55,
"learning_rate": 2.7137101910828027e-05,
"loss": 2.8765,
"step": 12000
},
{
"epoch": 9.95,
"learning_rate": 2.7017914012738853e-05,
"loss": 2.8891,
"step": 12500
},
{
"epoch": 10.35,
"learning_rate": 2.6898487261146496e-05,
"loss": 2.6341,
"step": 13000
},
{
"epoch": 10.75,
"learning_rate": 2.677906050955414e-05,
"loss": 2.9864,
"step": 13500
},
{
"epoch": 11.15,
"learning_rate": 2.6659633757961786e-05,
"loss": 2.3945,
"step": 14000
},
{
"epoch": 11.54,
"learning_rate": 2.6540207006369426e-05,
"loss": 2.4969,
"step": 14500
},
{
"epoch": 11.94,
"learning_rate": 2.6421019108280255e-05,
"loss": 3.4386,
"step": 15000
},
{
"epoch": 12.34,
"learning_rate": 2.63015923566879e-05,
"loss": 2.1529,
"step": 15500
},
{
"epoch": 12.74,
"learning_rate": 2.6182165605095542e-05,
"loss": 2.4111,
"step": 16000
},
{
"epoch": 13.14,
"learning_rate": 2.6062738853503186e-05,
"loss": 2.1625,
"step": 16500
},
{
"epoch": 13.54,
"learning_rate": 2.594355095541401e-05,
"loss": 2.221,
"step": 17000
},
{
"epoch": 13.93,
"learning_rate": 2.5824124203821655e-05,
"loss": 2.3075,
"step": 17500
},
{
"epoch": 14.33,
"learning_rate": 2.57046974522293e-05,
"loss": 2.0438,
"step": 18000
},
{
"epoch": 14.73,
"learning_rate": 2.5585270700636945e-05,
"loss": 2.2311,
"step": 18500
},
{
"epoch": 15.13,
"learning_rate": 2.5465843949044585e-05,
"loss": 2.3121,
"step": 19000
},
{
"epoch": 15.53,
"learning_rate": 2.5346417197452228e-05,
"loss": 1.8979,
"step": 19500
},
{
"epoch": 15.92,
"learning_rate": 2.5226990445859875e-05,
"loss": 2.0793,
"step": 20000
},
{
"epoch": 16.32,
"learning_rate": 2.5107563694267518e-05,
"loss": 2.4946,
"step": 20500
},
{
"epoch": 16.72,
"learning_rate": 2.4988136942675158e-05,
"loss": 2.6039,
"step": 21000
},
{
"epoch": 17.12,
"learning_rate": 2.4868949044585987e-05,
"loss": 2.3325,
"step": 21500
},
{
"epoch": 17.52,
"learning_rate": 2.4749761146496816e-05,
"loss": 2.5405,
"step": 22000
},
{
"epoch": 17.91,
"learning_rate": 2.463033439490446e-05,
"loss": 2.2666,
"step": 22500
},
{
"epoch": 18.31,
"learning_rate": 2.4510907643312103e-05,
"loss": 1.8855,
"step": 23000
},
{
"epoch": 18.71,
"learning_rate": 2.439171974522293e-05,
"loss": 2.5188,
"step": 23500
},
{
"epoch": 19.11,
"learning_rate": 2.4272292993630572e-05,
"loss": 2.0856,
"step": 24000
},
{
"epoch": 19.51,
"learning_rate": 2.415286624203822e-05,
"loss": 2.04,
"step": 24500
},
{
"epoch": 19.9,
"learning_rate": 2.4033439490445862e-05,
"loss": 2.0085,
"step": 25000
},
{
"epoch": 20.3,
"learning_rate": 2.3914012738853502e-05,
"loss": 2.2144,
"step": 25500
},
{
"epoch": 20.7,
"learning_rate": 2.3794585987261145e-05,
"loss": 2.1031,
"step": 26000
},
{
"epoch": 21.1,
"learning_rate": 2.3675159235668792e-05,
"loss": 2.1336,
"step": 26500
},
{
"epoch": 21.5,
"learning_rate": 2.3555732484076436e-05,
"loss": 2.3206,
"step": 27000
},
{
"epoch": 21.89,
"learning_rate": 2.3436783439490447e-05,
"loss": 2.0794,
"step": 27500
},
{
"epoch": 22.29,
"learning_rate": 2.3317356687898087e-05,
"loss": 2.0694,
"step": 28000
},
{
"epoch": 22.69,
"learning_rate": 2.3197929936305734e-05,
"loss": 2.0866,
"step": 28500
},
{
"epoch": 23.09,
"learning_rate": 2.3078503184713377e-05,
"loss": 2.2699,
"step": 29000
},
{
"epoch": 23.49,
"learning_rate": 2.295907643312102e-05,
"loss": 2.0621,
"step": 29500
},
{
"epoch": 23.89,
"learning_rate": 2.283964968152866e-05,
"loss": 2.1662,
"step": 30000
},
{
"epoch": 24.28,
"learning_rate": 2.2720222929936307e-05,
"loss": 1.9482,
"step": 30500
},
{
"epoch": 24.68,
"learning_rate": 2.260079617834395e-05,
"loss": 1.9606,
"step": 31000
},
{
"epoch": 25.08,
"learning_rate": 2.2481369426751594e-05,
"loss": 2.2075,
"step": 31500
},
{
"epoch": 25.48,
"learning_rate": 2.2361942675159234e-05,
"loss": 1.9507,
"step": 32000
},
{
"epoch": 25.88,
"learning_rate": 2.224251592356688e-05,
"loss": 1.99,
"step": 32500
},
{
"epoch": 26.27,
"learning_rate": 2.2123089171974524e-05,
"loss": 2.0022,
"step": 33000
},
{
"epoch": 26.67,
"learning_rate": 2.2003662420382167e-05,
"loss": 2.0491,
"step": 33500
},
{
"epoch": 27.07,
"learning_rate": 2.1884235668789807e-05,
"loss": 2.1034,
"step": 34000
},
{
"epoch": 27.47,
"learning_rate": 2.1765047770700636e-05,
"loss": 2.0613,
"step": 34500
},
{
"epoch": 27.87,
"learning_rate": 2.164562101910828e-05,
"loss": 2.0392,
"step": 35000
},
{
"epoch": 28.26,
"learning_rate": 2.1526194267515926e-05,
"loss": 1.9726,
"step": 35500
},
{
"epoch": 28.66,
"learning_rate": 2.1406767515923566e-05,
"loss": 1.6027,
"step": 36000
},
{
"epoch": 29.06,
"learning_rate": 2.128734076433121e-05,
"loss": 2.5043,
"step": 36500
},
{
"epoch": 29.46,
"learning_rate": 2.1167914012738853e-05,
"loss": 2.111,
"step": 37000
},
{
"epoch": 29.86,
"learning_rate": 2.10484872611465e-05,
"loss": 2.0965,
"step": 37500
},
{
"epoch": 30.25,
"learning_rate": 2.092906050955414e-05,
"loss": 1.9389,
"step": 38000
},
{
"epoch": 30.65,
"learning_rate": 2.0809633757961783e-05,
"loss": 2.1853,
"step": 38500
},
{
"epoch": 31.05,
"learning_rate": 2.0690207006369427e-05,
"loss": 1.946,
"step": 39000
},
{
"epoch": 31.45,
"learning_rate": 2.0570780254777073e-05,
"loss": 2.0071,
"step": 39500
},
{
"epoch": 31.85,
"learning_rate": 2.0451831210191085e-05,
"loss": 2.2209,
"step": 40000
},
{
"epoch": 32.25,
"learning_rate": 2.0332404458598725e-05,
"loss": 2.0641,
"step": 40500
},
{
"epoch": 32.64,
"learning_rate": 2.021297770700637e-05,
"loss": 1.9674,
"step": 41000
},
{
"epoch": 33.04,
"learning_rate": 2.0093550955414015e-05,
"loss": 2.0169,
"step": 41500
},
{
"epoch": 33.44,
"learning_rate": 1.9974124203821658e-05,
"loss": 2.0076,
"step": 42000
},
{
"epoch": 33.84,
"learning_rate": 1.9854936305732484e-05,
"loss": 2.2492,
"step": 42500
},
{
"epoch": 34.24,
"learning_rate": 1.9735509554140127e-05,
"loss": 1.7799,
"step": 43000
},
{
"epoch": 34.63,
"learning_rate": 1.9616321656050956e-05,
"loss": 2.1029,
"step": 43500
},
{
"epoch": 35.03,
"learning_rate": 1.94968949044586e-05,
"loss": 1.8489,
"step": 44000
},
{
"epoch": 35.43,
"learning_rate": 1.937770700636943e-05,
"loss": 1.6914,
"step": 44500
},
{
"epoch": 35.83,
"learning_rate": 1.925828025477707e-05,
"loss": 2.4834,
"step": 45000
},
{
"epoch": 36.23,
"learning_rate": 1.9138853503184712e-05,
"loss": 1.7828,
"step": 45500
},
{
"epoch": 36.62,
"learning_rate": 1.901942675159236e-05,
"loss": 1.7454,
"step": 46000
},
{
"epoch": 37.02,
"learning_rate": 1.8900000000000002e-05,
"loss": 2.2061,
"step": 46500
},
{
"epoch": 37.42,
"learning_rate": 1.8780573248407642e-05,
"loss": 1.9779,
"step": 47000
},
{
"epoch": 37.82,
"learning_rate": 1.8661146496815285e-05,
"loss": 1.9194,
"step": 47500
},
{
"epoch": 38.22,
"learning_rate": 1.8541719745222932e-05,
"loss": 1.9433,
"step": 48000
},
{
"epoch": 38.61,
"learning_rate": 1.8422292993630575e-05,
"loss": 1.9242,
"step": 48500
},
{
"epoch": 39.01,
"learning_rate": 1.8302866242038215e-05,
"loss": 1.9324,
"step": 49000
},
{
"epoch": 39.41,
"learning_rate": 1.818343949044586e-05,
"loss": 1.9326,
"step": 49500
},
{
"epoch": 39.81,
"learning_rate": 1.8064012738853506e-05,
"loss": 1.8975,
"step": 50000
},
{
"epoch": 40.21,
"learning_rate": 1.794458598726115e-05,
"loss": 2.0671,
"step": 50500
},
{
"epoch": 40.61,
"learning_rate": 1.782515923566879e-05,
"loss": 2.209,
"step": 51000
},
{
"epoch": 41.0,
"learning_rate": 1.7705971337579618e-05,
"loss": 1.9638,
"step": 51500
},
{
"epoch": 41.4,
"learning_rate": 1.758654458598726e-05,
"loss": 1.6372,
"step": 52000
},
{
"epoch": 41.8,
"learning_rate": 1.7467117834394905e-05,
"loss": 2.1012,
"step": 52500
},
{
"epoch": 42.2,
"learning_rate": 1.7347691082802548e-05,
"loss": 2.1207,
"step": 53000
},
{
"epoch": 42.6,
"learning_rate": 1.722826433121019e-05,
"loss": 1.8135,
"step": 53500
},
{
"epoch": 42.99,
"learning_rate": 1.7108837579617835e-05,
"loss": 2.1076,
"step": 54000
},
{
"epoch": 43.39,
"learning_rate": 1.6989410828025478e-05,
"loss": 1.8442,
"step": 54500
},
{
"epoch": 43.79,
"learning_rate": 1.686998407643312e-05,
"loss": 1.8598,
"step": 55000
},
{
"epoch": 44.19,
"learning_rate": 1.6750557324840765e-05,
"loss": 1.8612,
"step": 55500
},
{
"epoch": 44.59,
"learning_rate": 1.6631369426751594e-05,
"loss": 1.7965,
"step": 56000
},
{
"epoch": 44.98,
"learning_rate": 1.6511942675159237e-05,
"loss": 1.8743,
"step": 56500
},
{
"epoch": 45.38,
"learning_rate": 1.6392515923566877e-05,
"loss": 1.8581,
"step": 57000
},
{
"epoch": 45.78,
"learning_rate": 1.627308917197452e-05,
"loss": 1.8212,
"step": 57500
},
{
"epoch": 46.18,
"learning_rate": 1.6153662420382167e-05,
"loss": 1.7475,
"step": 58000
},
{
"epoch": 46.58,
"learning_rate": 1.603423566878981e-05,
"loss": 1.5722,
"step": 58500
},
{
"epoch": 46.97,
"learning_rate": 1.591480891719745e-05,
"loss": 1.7929,
"step": 59000
},
{
"epoch": 47.37,
"learning_rate": 1.5795382165605094e-05,
"loss": 1.7854,
"step": 59500
},
{
"epoch": 47.77,
"learning_rate": 1.5676194267515923e-05,
"loss": 1.6686,
"step": 60000
},
{
"epoch": 48.17,
"learning_rate": 1.5556767515923566e-05,
"loss": 1.8969,
"step": 60500
},
{
"epoch": 48.57,
"learning_rate": 1.543734076433121e-05,
"loss": 1.6989,
"step": 61000
},
{
"epoch": 48.96,
"learning_rate": 1.5317914012738853e-05,
"loss": 1.9204,
"step": 61500
},
{
"epoch": 49.36,
"learning_rate": 1.5198726114649682e-05,
"loss": 1.7297,
"step": 62000
},
{
"epoch": 49.76,
"learning_rate": 1.5079299363057326e-05,
"loss": 1.6225,
"step": 62500
},
{
"epoch": 50.16,
"learning_rate": 1.4959872611464969e-05,
"loss": 1.8492,
"step": 63000
},
{
"epoch": 50.56,
"learning_rate": 1.4840445859872612e-05,
"loss": 1.948,
"step": 63500
},
{
"epoch": 50.96,
"learning_rate": 1.472125796178344e-05,
"loss": 1.4926,
"step": 64000
},
{
"epoch": 51.35,
"learning_rate": 1.4601831210191083e-05,
"loss": 1.8296,
"step": 64500
},
{
"epoch": 51.75,
"learning_rate": 1.4482404458598726e-05,
"loss": 1.672,
"step": 65000
},
{
"epoch": 52.15,
"learning_rate": 1.436297770700637e-05,
"loss": 1.7793,
"step": 65500
},
{
"epoch": 52.55,
"learning_rate": 1.4243789808917199e-05,
"loss": 1.6569,
"step": 66000
},
{
"epoch": 52.95,
"learning_rate": 1.412436305732484e-05,
"loss": 2.2555,
"step": 66500
},
{
"epoch": 53.34,
"learning_rate": 1.4004936305732486e-05,
"loss": 1.7448,
"step": 67000
},
{
"epoch": 53.74,
"learning_rate": 1.3885509554140127e-05,
"loss": 1.8515,
"step": 67500
},
{
"epoch": 54.14,
"learning_rate": 1.3766321656050956e-05,
"loss": 1.5126,
"step": 68000
},
{
"epoch": 54.54,
"learning_rate": 1.3646894904458598e-05,
"loss": 1.8567,
"step": 68500
},
{
"epoch": 54.94,
"learning_rate": 1.3527468152866243e-05,
"loss": 1.8848,
"step": 69000
},
{
"epoch": 55.33,
"learning_rate": 1.340828025477707e-05,
"loss": 1.6216,
"step": 69500
},
{
"epoch": 55.73,
"learning_rate": 1.3288853503184714e-05,
"loss": 2.0117,
"step": 70000
},
{
"epoch": 56.13,
"learning_rate": 1.3169426751592357e-05,
"loss": 1.8113,
"step": 70500
},
{
"epoch": 56.53,
"learning_rate": 1.305e-05,
"loss": 1.7053,
"step": 71000
},
{
"epoch": 56.93,
"learning_rate": 1.2930573248407644e-05,
"loss": 1.7271,
"step": 71500
},
{
"epoch": 57.32,
"learning_rate": 1.2811385350318471e-05,
"loss": 1.6382,
"step": 72000
},
{
"epoch": 57.72,
"learning_rate": 1.2691958598726116e-05,
"loss": 1.5688,
"step": 72500
},
{
"epoch": 58.12,
"learning_rate": 1.2572531847133758e-05,
"loss": 1.6947,
"step": 73000
},
{
"epoch": 58.52,
"learning_rate": 1.2453105095541403e-05,
"loss": 1.5709,
"step": 73500
},
{
"epoch": 58.92,
"learning_rate": 1.2333678343949045e-05,
"loss": 1.8282,
"step": 74000
},
{
"epoch": 59.32,
"learning_rate": 1.221425159235669e-05,
"loss": 1.6447,
"step": 74500
},
{
"epoch": 59.71,
"learning_rate": 1.2095063694267515e-05,
"loss": 2.1066,
"step": 75000
},
{
"epoch": 60.11,
"learning_rate": 1.197563694267516e-05,
"loss": 1.7348,
"step": 75500
},
{
"epoch": 60.51,
"learning_rate": 1.1856210191082802e-05,
"loss": 1.5,
"step": 76000
},
{
"epoch": 60.91,
"learning_rate": 1.1736783439490447e-05,
"loss": 1.8146,
"step": 76500
},
{
"epoch": 61.31,
"learning_rate": 1.1617356687898089e-05,
"loss": 1.7271,
"step": 77000
},
{
"epoch": 61.7,
"learning_rate": 1.1497929936305734e-05,
"loss": 1.4879,
"step": 77500
},
{
"epoch": 62.1,
"learning_rate": 1.1378503184713375e-05,
"loss": 1.7706,
"step": 78000
},
{
"epoch": 62.5,
"learning_rate": 1.125907643312102e-05,
"loss": 1.7463,
"step": 78500
},
{
"epoch": 62.9,
"learning_rate": 1.1139888535031846e-05,
"loss": 1.4587,
"step": 79000
},
{
"epoch": 63.3,
"learning_rate": 1.1020461783439491e-05,
"loss": 1.754,
"step": 79500
},
{
"epoch": 63.69,
"learning_rate": 1.0901035031847133e-05,
"loss": 1.6684,
"step": 80000
},
{
"epoch": 64.09,
"learning_rate": 1.0781847133757962e-05,
"loss": 1.4427,
"step": 80500
},
{
"epoch": 64.49,
"learning_rate": 1.0662420382165605e-05,
"loss": 1.5719,
"step": 81000
},
{
"epoch": 64.89,
"learning_rate": 1.0542993630573249e-05,
"loss": 1.7179,
"step": 81500
},
{
"epoch": 65.29,
"learning_rate": 1.0423566878980892e-05,
"loss": 1.6181,
"step": 82000
},
{
"epoch": 65.68,
"learning_rate": 1.0304140127388535e-05,
"loss": 1.4319,
"step": 82500
},
{
"epoch": 66.08,
"learning_rate": 1.0185191082802548e-05,
"loss": 1.78,
"step": 83000
},
{
"epoch": 66.48,
"learning_rate": 1.0065764331210192e-05,
"loss": 1.5097,
"step": 83500
},
{
"epoch": 66.88,
"learning_rate": 9.946337579617835e-06,
"loss": 1.6796,
"step": 84000
},
{
"epoch": 67.28,
"learning_rate": 9.826910828025479e-06,
"loss": 1.7987,
"step": 84500
},
{
"epoch": 67.68,
"learning_rate": 9.707484076433122e-06,
"loss": 1.5529,
"step": 85000
},
{
"epoch": 68.07,
"learning_rate": 9.588057324840764e-06,
"loss": 1.7102,
"step": 85500
},
{
"epoch": 68.47,
"learning_rate": 9.468630573248409e-06,
"loss": 1.7089,
"step": 86000
},
{
"epoch": 68.87,
"learning_rate": 9.34920382165605e-06,
"loss": 1.5696,
"step": 86500
},
{
"epoch": 69.27,
"learning_rate": 9.229777070063695e-06,
"loss": 1.4092,
"step": 87000
},
{
"epoch": 69.67,
"learning_rate": 9.110589171974523e-06,
"loss": 1.7112,
"step": 87500
},
{
"epoch": 70.06,
"learning_rate": 8.991162420382166e-06,
"loss": 1.6769,
"step": 88000
},
{
"epoch": 70.46,
"learning_rate": 8.87173566878981e-06,
"loss": 1.7474,
"step": 88500
},
{
"epoch": 70.86,
"learning_rate": 8.752547770700637e-06,
"loss": 1.3906,
"step": 89000
},
{
"epoch": 71.26,
"learning_rate": 8.63312101910828e-06,
"loss": 1.6714,
"step": 89500
},
{
"epoch": 71.66,
"learning_rate": 8.513694267515923e-06,
"loss": 1.6018,
"step": 90000
},
{
"epoch": 72.05,
"learning_rate": 8.394267515923567e-06,
"loss": 1.4916,
"step": 90500
},
{
"epoch": 72.45,
"learning_rate": 8.27484076433121e-06,
"loss": 1.8035,
"step": 91000
},
{
"epoch": 72.85,
"learning_rate": 8.155414012738854e-06,
"loss": 1.3387,
"step": 91500
},
{
"epoch": 73.25,
"learning_rate": 8.035987261146497e-06,
"loss": 1.5553,
"step": 92000
},
{
"epoch": 73.65,
"learning_rate": 7.91656050955414e-06,
"loss": 1.5534,
"step": 92500
},
{
"epoch": 74.04,
"learning_rate": 7.797133757961784e-06,
"loss": 1.3926,
"step": 93000
},
{
"epoch": 74.44,
"learning_rate": 7.677707006369427e-06,
"loss": 1.5332,
"step": 93500
},
{
"epoch": 74.84,
"learning_rate": 7.5582802547770704e-06,
"loss": 1.5935,
"step": 94000
},
{
"epoch": 75.24,
"learning_rate": 7.438853503184713e-06,
"loss": 1.6671,
"step": 94500
},
{
"epoch": 75.64,
"learning_rate": 7.319665605095542e-06,
"loss": 1.662,
"step": 95000
},
{
"epoch": 76.04,
"learning_rate": 7.200238853503185e-06,
"loss": 1.6435,
"step": 95500
},
{
"epoch": 76.43,
"learning_rate": 7.080812101910829e-06,
"loss": 1.5282,
"step": 96000
},
{
"epoch": 76.83,
"learning_rate": 6.961385350318472e-06,
"loss": 1.6214,
"step": 96500
},
{
"epoch": 77.23,
"learning_rate": 6.842197452229299e-06,
"loss": 1.2999,
"step": 97000
},
{
"epoch": 77.63,
"learning_rate": 6.722770700636943e-06,
"loss": 1.4861,
"step": 97500
},
{
"epoch": 78.03,
"learning_rate": 6.603343949044586e-06,
"loss": 1.493,
"step": 98000
},
{
"epoch": 78.42,
"learning_rate": 6.484156050955414e-06,
"loss": 1.5782,
"step": 98500
},
{
"epoch": 78.82,
"learning_rate": 6.364729299363058e-06,
"loss": 1.6043,
"step": 99000
},
{
"epoch": 79.22,
"learning_rate": 6.245302547770701e-06,
"loss": 1.7383,
"step": 99500
},
{
"epoch": 79.62,
"learning_rate": 6.1258757961783444e-06,
"loss": 1.5453,
"step": 100000
},
{
"epoch": 80.02,
"learning_rate": 6.006449044585987e-06,
"loss": 1.6063,
"step": 100500
},
{
"epoch": 80.41,
"learning_rate": 5.88702229299363e-06,
"loss": 1.5471,
"step": 101000
},
{
"epoch": 80.81,
"learning_rate": 5.767595541401274e-06,
"loss": 1.6526,
"step": 101500
},
{
"epoch": 81.21,
"learning_rate": 5.648168789808917e-06,
"loss": 1.4817,
"step": 102000
},
{
"epoch": 81.61,
"learning_rate": 5.528980891719745e-06,
"loss": 1.3771,
"step": 102500
},
{
"epoch": 82.01,
"learning_rate": 5.409554140127389e-06,
"loss": 1.5052,
"step": 103000
},
{
"epoch": 82.4,
"learning_rate": 5.290127388535032e-06,
"loss": 1.7024,
"step": 103500
},
{
"epoch": 82.8,
"learning_rate": 5.170700636942675e-06,
"loss": 1.6985,
"step": 104000
},
{
"epoch": 83.2,
"learning_rate": 5.051512738853503e-06,
"loss": 1.5487,
"step": 104500
},
{
"epoch": 83.6,
"learning_rate": 4.932085987261146e-06,
"loss": 1.3429,
"step": 105000
},
{
"epoch": 84.0,
"learning_rate": 4.812659235668789e-06,
"loss": 1.6937,
"step": 105500
},
{
"epoch": 84.39,
"learning_rate": 4.6934713375796184e-06,
"loss": 1.5639,
"step": 106000
},
{
"epoch": 84.79,
"learning_rate": 4.574044585987262e-06,
"loss": 1.5466,
"step": 106500
},
{
"epoch": 85.19,
"learning_rate": 4.454617834394905e-06,
"loss": 1.4726,
"step": 107000
},
{
"epoch": 85.59,
"learning_rate": 4.3351910828025485e-06,
"loss": 1.5633,
"step": 107500
},
{
"epoch": 85.99,
"learning_rate": 4.215764331210192e-06,
"loss": 1.5687,
"step": 108000
},
{
"epoch": 86.39,
"learning_rate": 4.096337579617834e-06,
"loss": 1.2633,
"step": 108500
},
{
"epoch": 86.78,
"learning_rate": 3.976910828025478e-06,
"loss": 1.5035,
"step": 109000
},
{
"epoch": 87.18,
"learning_rate": 3.857484076433121e-06,
"loss": 1.6182,
"step": 109500
},
{
"epoch": 87.58,
"learning_rate": 3.7380573248407645e-06,
"loss": 1.6295,
"step": 110000
},
{
"epoch": 87.98,
"learning_rate": 3.618630573248408e-06,
"loss": 1.5837,
"step": 110500
},
{
"epoch": 88.38,
"learning_rate": 3.4992038216560512e-06,
"loss": 1.3664,
"step": 111000
},
{
"epoch": 88.77,
"learning_rate": 3.3797770700636946e-06,
"loss": 1.6751,
"step": 111500
},
{
"epoch": 89.17,
"learning_rate": 3.2605891719745224e-06,
"loss": 1.5701,
"step": 112000
},
{
"epoch": 89.57,
"learning_rate": 3.1411624203821653e-06,
"loss": 1.4553,
"step": 112500
},
{
"epoch": 89.97,
"learning_rate": 3.0217356687898087e-06,
"loss": 1.4953,
"step": 113000
},
{
"epoch": 90.37,
"learning_rate": 2.902308917197452e-06,
"loss": 1.6721,
"step": 113500
},
{
"epoch": 90.76,
"learning_rate": 2.7831210191082802e-06,
"loss": 1.4171,
"step": 114000
},
{
"epoch": 91.16,
"learning_rate": 2.6636942675159236e-06,
"loss": 1.8271,
"step": 114500
},
{
"epoch": 91.56,
"learning_rate": 2.544267515923567e-06,
"loss": 1.6508,
"step": 115000
},
{
"epoch": 91.96,
"learning_rate": 2.4248407643312103e-06,
"loss": 1.5092,
"step": 115500
},
{
"epoch": 92.36,
"learning_rate": 2.305652866242038e-06,
"loss": 1.5075,
"step": 116000
},
{
"epoch": 92.75,
"learning_rate": 2.1864649681528663e-06,
"loss": 1.5702,
"step": 116500
},
{
"epoch": 93.15,
"learning_rate": 2.0670382165605097e-06,
"loss": 1.5646,
"step": 117000
},
{
"epoch": 93.55,
"learning_rate": 1.947611464968153e-06,
"loss": 1.4956,
"step": 117500
},
{
"epoch": 93.95,
"learning_rate": 1.8281847133757964e-06,
"loss": 1.5299,
"step": 118000
},
{
"epoch": 94.35,
"learning_rate": 1.7087579617834395e-06,
"loss": 1.5027,
"step": 118500
},
{
"epoch": 94.75,
"learning_rate": 1.5893312101910827e-06,
"loss": 1.5207,
"step": 119000
},
{
"epoch": 95.14,
"learning_rate": 1.469904458598726e-06,
"loss": 1.3726,
"step": 119500
},
{
"epoch": 95.54,
"learning_rate": 1.3504777070063694e-06,
"loss": 1.3402,
"step": 120000
},
{
"epoch": 95.94,
"learning_rate": 1.2312898089171974e-06,
"loss": 1.7752,
"step": 120500
},
{
"epoch": 96.34,
"learning_rate": 1.1121019108280256e-06,
"loss": 1.6895,
"step": 121000
},
{
"epoch": 96.74,
"learning_rate": 9.926751592356687e-07,
"loss": 1.633,
"step": 121500
},
{
"epoch": 97.13,
"learning_rate": 8.732484076433121e-07,
"loss": 1.6033,
"step": 122000
},
{
"epoch": 97.53,
"learning_rate": 7.538216560509554e-07,
"loss": 1.3726,
"step": 122500
},
{
"epoch": 97.93,
"learning_rate": 6.343949044585987e-07,
"loss": 1.3725,
"step": 123000
},
{
"epoch": 98.33,
"learning_rate": 5.149681528662421e-07,
"loss": 1.5944,
"step": 123500
},
{
"epoch": 98.73,
"learning_rate": 3.9554140127388536e-07,
"loss": 1.466,
"step": 124000
},
{
"epoch": 99.12,
"learning_rate": 2.7611464968152867e-07,
"loss": 1.421,
"step": 124500
},
{
"epoch": 99.52,
"learning_rate": 1.569267515923567e-07,
"loss": 1.6345,
"step": 125000
},
{
"epoch": 99.92,
"learning_rate": 3.7500000000000005e-08,
"loss": 1.5021,
"step": 125500
},
{
"epoch": 100.0,
"step": 125600,
"total_flos": 5.249087478816768e+17,
"train_loss": 1.9559951608499904,
"train_runtime": 50817.2182,
"train_samples_per_second": 14.83,
"train_steps_per_second": 2.472
}
],
"max_steps": 125600,
"num_train_epochs": 100,
"total_flos": 5.249087478816768e+17,
"trial_name": null,
"trial_params": null
}