amhric_xlmr-small / trainer_state.json
Atnafu's picture
initial
4dd4dad
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"global_step": 230695,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.989293222653287e-05,
"loss": 9.2291,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.978456403476452e-05,
"loss": 8.5435,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.967619584299617e-05,
"loss": 8.4546,
"step": 1500
},
{
"epoch": 0.04,
"learning_rate": 4.9567827651227814e-05,
"loss": 8.3564,
"step": 2000
},
{
"epoch": 0.05,
"learning_rate": 4.945945945945946e-05,
"loss": 8.3088,
"step": 2500
},
{
"epoch": 0.07,
"learning_rate": 4.935109126769111e-05,
"loss": 8.2156,
"step": 3000
},
{
"epoch": 0.08,
"learning_rate": 4.924272307592276e-05,
"loss": 8.1808,
"step": 3500
},
{
"epoch": 0.09,
"learning_rate": 4.9134354884154406e-05,
"loss": 8.1035,
"step": 4000
},
{
"epoch": 0.1,
"learning_rate": 4.902598669238606e-05,
"loss": 8.032,
"step": 4500
},
{
"epoch": 0.11,
"learning_rate": 4.89176185006177e-05,
"loss": 7.9508,
"step": 5000
},
{
"epoch": 0.12,
"learning_rate": 4.8809250308849345e-05,
"loss": 7.8717,
"step": 5500
},
{
"epoch": 0.13,
"learning_rate": 4.8700882117081e-05,
"loss": 7.8179,
"step": 6000
},
{
"epoch": 0.14,
"learning_rate": 4.8592513925312644e-05,
"loss": 7.729,
"step": 6500
},
{
"epoch": 0.15,
"learning_rate": 4.84841457335443e-05,
"loss": 7.7062,
"step": 7000
},
{
"epoch": 0.16,
"learning_rate": 4.837577754177594e-05,
"loss": 7.623,
"step": 7500
},
{
"epoch": 0.17,
"learning_rate": 4.826740935000759e-05,
"loss": 7.5969,
"step": 8000
},
{
"epoch": 0.18,
"learning_rate": 4.8159041158239236e-05,
"loss": 7.5306,
"step": 8500
},
{
"epoch": 0.2,
"learning_rate": 4.805067296647088e-05,
"loss": 7.4785,
"step": 9000
},
{
"epoch": 0.21,
"learning_rate": 4.794252151108607e-05,
"loss": 7.4348,
"step": 9500
},
{
"epoch": 0.22,
"learning_rate": 4.7834153319317714e-05,
"loss": 7.399,
"step": 10000
},
{
"epoch": 0.23,
"learning_rate": 4.772578512754937e-05,
"loss": 7.3493,
"step": 10500
},
{
"epoch": 0.24,
"learning_rate": 4.7617416935781014e-05,
"loss": 7.287,
"step": 11000
},
{
"epoch": 0.25,
"learning_rate": 4.750904874401266e-05,
"loss": 7.2254,
"step": 11500
},
{
"epoch": 0.26,
"learning_rate": 4.7400897288627846e-05,
"loss": 7.1961,
"step": 12000
},
{
"epoch": 0.27,
"learning_rate": 4.729252909685949e-05,
"loss": 7.1299,
"step": 12500
},
{
"epoch": 0.28,
"learning_rate": 4.718416090509114e-05,
"loss": 7.1257,
"step": 13000
},
{
"epoch": 0.29,
"learning_rate": 4.7075792713322785e-05,
"loss": 7.074,
"step": 13500
},
{
"epoch": 0.3,
"learning_rate": 4.696742452155444e-05,
"loss": 7.052,
"step": 14000
},
{
"epoch": 0.31,
"learning_rate": 4.685927306616962e-05,
"loss": 6.9863,
"step": 14500
},
{
"epoch": 0.33,
"learning_rate": 4.675090487440127e-05,
"loss": 6.9617,
"step": 15000
},
{
"epoch": 0.34,
"learning_rate": 4.664253668263292e-05,
"loss": 6.9034,
"step": 15500
},
{
"epoch": 0.35,
"learning_rate": 4.653416849086456e-05,
"loss": 6.8683,
"step": 16000
},
{
"epoch": 0.36,
"learning_rate": 4.642601703547975e-05,
"loss": 6.8567,
"step": 16500
},
{
"epoch": 0.37,
"learning_rate": 4.6317865580094936e-05,
"loss": 6.8268,
"step": 17000
},
{
"epoch": 0.38,
"learning_rate": 4.6209497388326575e-05,
"loss": 6.7758,
"step": 17500
},
{
"epoch": 0.39,
"learning_rate": 4.610112919655823e-05,
"loss": 6.7915,
"step": 18000
},
{
"epoch": 0.4,
"learning_rate": 4.5992761004789874e-05,
"loss": 6.7391,
"step": 18500
},
{
"epoch": 0.41,
"learning_rate": 4.588439281302153e-05,
"loss": 6.7204,
"step": 19000
},
{
"epoch": 0.42,
"learning_rate": 4.5776024621253173e-05,
"loss": 6.6562,
"step": 19500
},
{
"epoch": 0.43,
"learning_rate": 4.566765642948482e-05,
"loss": 6.6638,
"step": 20000
},
{
"epoch": 0.44,
"learning_rate": 4.5559288237716466e-05,
"loss": 6.6292,
"step": 20500
},
{
"epoch": 0.46,
"learning_rate": 4.545092004594811e-05,
"loss": 6.6086,
"step": 21000
},
{
"epoch": 0.47,
"learning_rate": 4.5342551854179765e-05,
"loss": 6.5568,
"step": 21500
},
{
"epoch": 0.48,
"learning_rate": 4.523418366241141e-05,
"loss": 6.5345,
"step": 22000
},
{
"epoch": 0.49,
"learning_rate": 4.512581547064306e-05,
"loss": 6.556,
"step": 22500
},
{
"epoch": 0.5,
"learning_rate": 4.501744727887471e-05,
"loss": 6.5297,
"step": 23000
},
{
"epoch": 0.51,
"learning_rate": 4.490907908710636e-05,
"loss": 6.4959,
"step": 23500
},
{
"epoch": 0.52,
"learning_rate": 4.4800710895338e-05,
"loss": 6.4772,
"step": 24000
},
{
"epoch": 0.53,
"learning_rate": 4.469255943995318e-05,
"loss": 6.4633,
"step": 24500
},
{
"epoch": 0.54,
"learning_rate": 4.4584191248184836e-05,
"loss": 6.4274,
"step": 25000
},
{
"epoch": 0.55,
"learning_rate": 4.447582305641648e-05,
"loss": 6.4137,
"step": 25500
},
{
"epoch": 0.56,
"learning_rate": 4.436745486464813e-05,
"loss": 6.3865,
"step": 26000
},
{
"epoch": 0.57,
"learning_rate": 4.4259303409263314e-05,
"loss": 6.4233,
"step": 26500
},
{
"epoch": 0.59,
"learning_rate": 4.415093521749497e-05,
"loss": 6.3557,
"step": 27000
},
{
"epoch": 0.6,
"learning_rate": 4.404278376211015e-05,
"loss": 6.3371,
"step": 27500
},
{
"epoch": 0.61,
"learning_rate": 4.39344155703418e-05,
"loss": 6.3477,
"step": 28000
},
{
"epoch": 0.62,
"learning_rate": 4.382604737857344e-05,
"loss": 6.3256,
"step": 28500
},
{
"epoch": 0.63,
"learning_rate": 4.3717679186805086e-05,
"loss": 6.3146,
"step": 29000
},
{
"epoch": 0.64,
"learning_rate": 4.360931099503674e-05,
"loss": 6.2812,
"step": 29500
},
{
"epoch": 0.65,
"learning_rate": 4.3500942803268385e-05,
"loss": 6.2653,
"step": 30000
},
{
"epoch": 0.66,
"learning_rate": 4.339257461150004e-05,
"loss": 6.2625,
"step": 30500
},
{
"epoch": 0.67,
"learning_rate": 4.3284206419731684e-05,
"loss": 6.2311,
"step": 31000
},
{
"epoch": 0.68,
"learning_rate": 4.317583822796333e-05,
"loss": 6.2153,
"step": 31500
},
{
"epoch": 0.69,
"learning_rate": 4.306747003619498e-05,
"loss": 6.2252,
"step": 32000
},
{
"epoch": 0.7,
"learning_rate": 4.295910184442662e-05,
"loss": 6.2012,
"step": 32500
},
{
"epoch": 0.72,
"learning_rate": 4.2850733652658276e-05,
"loss": 6.1814,
"step": 33000
},
{
"epoch": 0.73,
"learning_rate": 4.274236546088992e-05,
"loss": 6.1341,
"step": 33500
},
{
"epoch": 0.74,
"learning_rate": 4.263399726912157e-05,
"loss": 6.1333,
"step": 34000
},
{
"epoch": 0.75,
"learning_rate": 4.252562907735322e-05,
"loss": 6.137,
"step": 34500
},
{
"epoch": 0.76,
"learning_rate": 4.241726088558486e-05,
"loss": 6.1115,
"step": 35000
},
{
"epoch": 0.77,
"learning_rate": 4.230932616658359e-05,
"loss": 6.1138,
"step": 35500
},
{
"epoch": 0.78,
"learning_rate": 4.2200957974815234e-05,
"loss": 6.1054,
"step": 36000
},
{
"epoch": 0.79,
"learning_rate": 4.209258978304688e-05,
"loss": 6.0865,
"step": 36500
},
{
"epoch": 0.8,
"learning_rate": 4.1984438327662066e-05,
"loss": 6.0722,
"step": 37000
},
{
"epoch": 0.81,
"learning_rate": 4.187607013589371e-05,
"loss": 6.0801,
"step": 37500
},
{
"epoch": 0.82,
"learning_rate": 4.1767701944125365e-05,
"loss": 6.0378,
"step": 38000
},
{
"epoch": 0.83,
"learning_rate": 4.165933375235701e-05,
"loss": 6.0162,
"step": 38500
},
{
"epoch": 0.85,
"learning_rate": 4.155096556058866e-05,
"loss": 6.0366,
"step": 39000
},
{
"epoch": 0.86,
"learning_rate": 4.1442597368820304e-05,
"loss": 6.0294,
"step": 39500
},
{
"epoch": 0.87,
"learning_rate": 4.133422917705195e-05,
"loss": 6.0124,
"step": 40000
},
{
"epoch": 0.88,
"learning_rate": 4.12258609852836e-05,
"loss": 5.9881,
"step": 40500
},
{
"epoch": 0.89,
"learning_rate": 4.111749279351525e-05,
"loss": 5.9678,
"step": 41000
},
{
"epoch": 0.9,
"learning_rate": 4.1009124601746896e-05,
"loss": 5.9707,
"step": 41500
},
{
"epoch": 0.91,
"learning_rate": 4.090097314636208e-05,
"loss": 5.976,
"step": 42000
},
{
"epoch": 0.92,
"learning_rate": 4.079260495459373e-05,
"loss": 5.9419,
"step": 42500
},
{
"epoch": 0.93,
"learning_rate": 4.0684453499208915e-05,
"loss": 5.9158,
"step": 43000
},
{
"epoch": 0.94,
"learning_rate": 4.057608530744057e-05,
"loss": 5.9611,
"step": 43500
},
{
"epoch": 0.95,
"learning_rate": 4.046771711567221e-05,
"loss": 5.9104,
"step": 44000
},
{
"epoch": 0.96,
"learning_rate": 4.035934892390385e-05,
"loss": 5.914,
"step": 44500
},
{
"epoch": 0.98,
"learning_rate": 4.0250980732135506e-05,
"loss": 5.8819,
"step": 45000
},
{
"epoch": 0.99,
"learning_rate": 4.014261254036715e-05,
"loss": 5.891,
"step": 45500
},
{
"epoch": 1.0,
"learning_rate": 4.0034244348598806e-05,
"loss": 5.8627,
"step": 46000
},
{
"epoch": 1.01,
"learning_rate": 3.992587615683045e-05,
"loss": 5.8258,
"step": 46500
},
{
"epoch": 1.02,
"learning_rate": 3.98175079650621e-05,
"loss": 5.8346,
"step": 47000
},
{
"epoch": 1.03,
"learning_rate": 3.970957324606082e-05,
"loss": 5.8269,
"step": 47500
},
{
"epoch": 1.04,
"learning_rate": 3.960120505429247e-05,
"loss": 5.8253,
"step": 48000
},
{
"epoch": 1.05,
"learning_rate": 3.949283686252411e-05,
"loss": 5.8178,
"step": 48500
},
{
"epoch": 1.06,
"learning_rate": 3.938446867075576e-05,
"loss": 5.7758,
"step": 49000
},
{
"epoch": 1.07,
"learning_rate": 3.927610047898741e-05,
"loss": 5.7911,
"step": 49500
},
{
"epoch": 1.08,
"learning_rate": 3.9167732287219056e-05,
"loss": 5.7876,
"step": 50000
},
{
"epoch": 1.09,
"learning_rate": 3.905936409545071e-05,
"loss": 5.7695,
"step": 50500
},
{
"epoch": 1.11,
"learning_rate": 3.895121264006589e-05,
"loss": 5.7523,
"step": 51000
},
{
"epoch": 1.12,
"learning_rate": 3.884284444829754e-05,
"loss": 5.7571,
"step": 51500
},
{
"epoch": 1.13,
"learning_rate": 3.873447625652919e-05,
"loss": 5.7947,
"step": 52000
},
{
"epoch": 1.14,
"learning_rate": 3.8626108064760834e-05,
"loss": 5.7286,
"step": 52500
},
{
"epoch": 1.15,
"learning_rate": 3.851773987299248e-05,
"loss": 5.7481,
"step": 53000
},
{
"epoch": 1.16,
"learning_rate": 3.8409371681224126e-05,
"loss": 5.7334,
"step": 53500
},
{
"epoch": 1.17,
"learning_rate": 3.830100348945578e-05,
"loss": 5.7235,
"step": 54000
},
{
"epoch": 1.18,
"learning_rate": 3.8192852034070965e-05,
"loss": 5.6954,
"step": 54500
},
{
"epoch": 1.19,
"learning_rate": 3.808448384230261e-05,
"loss": 5.7148,
"step": 55000
},
{
"epoch": 1.2,
"learning_rate": 3.797611565053426e-05,
"loss": 5.6986,
"step": 55500
},
{
"epoch": 1.21,
"learning_rate": 3.7867747458765904e-05,
"loss": 5.693,
"step": 56000
},
{
"epoch": 1.22,
"learning_rate": 3.775937926699755e-05,
"loss": 5.6841,
"step": 56500
},
{
"epoch": 1.24,
"learning_rate": 3.7651011075229197e-05,
"loss": 5.6561,
"step": 57000
},
{
"epoch": 1.25,
"learning_rate": 3.754264288346085e-05,
"loss": 5.634,
"step": 57500
},
{
"epoch": 1.26,
"learning_rate": 3.7434274691692496e-05,
"loss": 5.641,
"step": 58000
},
{
"epoch": 1.27,
"learning_rate": 3.732612323630768e-05,
"loss": 5.6495,
"step": 58500
},
{
"epoch": 1.28,
"learning_rate": 3.721775504453933e-05,
"loss": 5.6402,
"step": 59000
},
{
"epoch": 1.29,
"learning_rate": 3.7109386852770975e-05,
"loss": 5.5963,
"step": 59500
},
{
"epoch": 1.3,
"learning_rate": 3.700101866100262e-05,
"loss": 5.6276,
"step": 60000
},
{
"epoch": 1.31,
"learning_rate": 3.6892650469234274e-05,
"loss": 5.6346,
"step": 60500
},
{
"epoch": 1.32,
"learning_rate": 3.678428227746592e-05,
"loss": 5.6149,
"step": 61000
},
{
"epoch": 1.33,
"learning_rate": 3.6676130822081106e-05,
"loss": 5.5947,
"step": 61500
},
{
"epoch": 1.34,
"learning_rate": 3.656776263031275e-05,
"loss": 5.5881,
"step": 62000
},
{
"epoch": 1.35,
"learning_rate": 3.64593944385444e-05,
"loss": 5.5827,
"step": 62500
},
{
"epoch": 1.37,
"learning_rate": 3.635102624677605e-05,
"loss": 5.5667,
"step": 63000
},
{
"epoch": 1.38,
"learning_rate": 3.624265805500769e-05,
"loss": 5.5804,
"step": 63500
},
{
"epoch": 1.39,
"learning_rate": 3.6134289863239344e-05,
"loss": 5.5845,
"step": 64000
},
{
"epoch": 1.4,
"learning_rate": 3.602592167147099e-05,
"loss": 5.5827,
"step": 64500
},
{
"epoch": 1.41,
"learning_rate": 3.591755347970264e-05,
"loss": 5.5525,
"step": 65000
},
{
"epoch": 1.42,
"learning_rate": 3.580918528793429e-05,
"loss": 5.5575,
"step": 65500
},
{
"epoch": 1.43,
"learning_rate": 3.5700817096165936e-05,
"loss": 5.5406,
"step": 66000
},
{
"epoch": 1.44,
"learning_rate": 3.559266564078112e-05,
"loss": 5.5457,
"step": 66500
},
{
"epoch": 1.45,
"learning_rate": 3.548429744901277e-05,
"loss": 5.5298,
"step": 67000
},
{
"epoch": 1.46,
"learning_rate": 3.5375929257244415e-05,
"loss": 5.5226,
"step": 67500
},
{
"epoch": 1.47,
"learning_rate": 3.526756106547606e-05,
"loss": 5.4872,
"step": 68000
},
{
"epoch": 1.48,
"learning_rate": 3.5159192873707714e-05,
"loss": 5.496,
"step": 68500
},
{
"epoch": 1.5,
"learning_rate": 3.5051041418322894e-05,
"loss": 5.4846,
"step": 69000
},
{
"epoch": 1.51,
"learning_rate": 3.494267322655455e-05,
"loss": 5.494,
"step": 69500
},
{
"epoch": 1.52,
"learning_rate": 3.483430503478619e-05,
"loss": 5.4753,
"step": 70000
},
{
"epoch": 1.53,
"learning_rate": 3.472593684301784e-05,
"loss": 5.5017,
"step": 70500
},
{
"epoch": 1.54,
"learning_rate": 3.4618002124016566e-05,
"loss": 5.489,
"step": 71000
},
{
"epoch": 1.55,
"learning_rate": 3.450963393224821e-05,
"loss": 5.4613,
"step": 71500
},
{
"epoch": 1.56,
"learning_rate": 3.440126574047985e-05,
"loss": 5.4371,
"step": 72000
},
{
"epoch": 1.57,
"learning_rate": 3.4292897548711504e-05,
"loss": 5.4453,
"step": 72500
},
{
"epoch": 1.58,
"learning_rate": 3.418452935694315e-05,
"loss": 5.4485,
"step": 73000
},
{
"epoch": 1.59,
"learning_rate": 3.40761611651748e-05,
"loss": 5.4734,
"step": 73500
},
{
"epoch": 1.6,
"learning_rate": 3.396779297340645e-05,
"loss": 5.4371,
"step": 74000
},
{
"epoch": 1.61,
"learning_rate": 3.3859641518021636e-05,
"loss": 5.4367,
"step": 74500
},
{
"epoch": 1.63,
"learning_rate": 3.375127332625328e-05,
"loss": 5.4137,
"step": 75000
},
{
"epoch": 1.64,
"learning_rate": 3.364290513448493e-05,
"loss": 5.4397,
"step": 75500
},
{
"epoch": 1.65,
"learning_rate": 3.3534536942716575e-05,
"loss": 5.4075,
"step": 76000
},
{
"epoch": 1.66,
"learning_rate": 3.342616875094822e-05,
"loss": 5.426,
"step": 76500
},
{
"epoch": 1.67,
"learning_rate": 3.3317800559179874e-05,
"loss": 5.4072,
"step": 77000
},
{
"epoch": 1.68,
"learning_rate": 3.320943236741152e-05,
"loss": 5.3822,
"step": 77500
},
{
"epoch": 1.69,
"learning_rate": 3.3101064175643166e-05,
"loss": 5.3867,
"step": 78000
},
{
"epoch": 1.7,
"learning_rate": 3.299291272025835e-05,
"loss": 5.4032,
"step": 78500
},
{
"epoch": 1.71,
"learning_rate": 3.288454452849e-05,
"loss": 5.3875,
"step": 79000
},
{
"epoch": 1.72,
"learning_rate": 3.2776176336721645e-05,
"loss": 5.382,
"step": 79500
},
{
"epoch": 1.73,
"learning_rate": 3.266802488133683e-05,
"loss": 5.3784,
"step": 80000
},
{
"epoch": 1.74,
"learning_rate": 3.255965668956848e-05,
"loss": 5.3607,
"step": 80500
},
{
"epoch": 1.76,
"learning_rate": 3.2451288497800124e-05,
"loss": 5.3523,
"step": 81000
},
{
"epoch": 1.77,
"learning_rate": 3.234292030603178e-05,
"loss": 5.3397,
"step": 81500
},
{
"epoch": 1.78,
"learning_rate": 3.223455211426342e-05,
"loss": 5.3615,
"step": 82000
},
{
"epoch": 1.79,
"learning_rate": 3.2126183922495076e-05,
"loss": 5.3337,
"step": 82500
},
{
"epoch": 1.8,
"learning_rate": 3.2017815730726716e-05,
"loss": 5.3297,
"step": 83000
},
{
"epoch": 1.81,
"learning_rate": 3.190944753895836e-05,
"loss": 5.3278,
"step": 83500
},
{
"epoch": 1.82,
"learning_rate": 3.1801079347190015e-05,
"loss": 5.3225,
"step": 84000
},
{
"epoch": 1.83,
"learning_rate": 3.1692927891805194e-05,
"loss": 5.3084,
"step": 84500
},
{
"epoch": 1.84,
"learning_rate": 3.158477643642038e-05,
"loss": 5.3226,
"step": 85000
},
{
"epoch": 1.85,
"learning_rate": 3.1476408244652034e-05,
"loss": 5.3311,
"step": 85500
},
{
"epoch": 1.86,
"learning_rate": 3.136804005288368e-05,
"loss": 5.3289,
"step": 86000
},
{
"epoch": 1.87,
"learning_rate": 3.1259671861115326e-05,
"loss": 5.3217,
"step": 86500
},
{
"epoch": 1.89,
"learning_rate": 3.115152040573051e-05,
"loss": 5.3092,
"step": 87000
},
{
"epoch": 1.9,
"learning_rate": 3.104315221396216e-05,
"loss": 5.3042,
"step": 87500
},
{
"epoch": 1.91,
"learning_rate": 3.0934784022193805e-05,
"loss": 5.3147,
"step": 88000
},
{
"epoch": 1.92,
"learning_rate": 3.082641583042545e-05,
"loss": 5.2633,
"step": 88500
},
{
"epoch": 1.93,
"learning_rate": 3.0718047638657104e-05,
"loss": 5.2619,
"step": 89000
},
{
"epoch": 1.94,
"learning_rate": 3.0609896183272284e-05,
"loss": 5.3033,
"step": 89500
},
{
"epoch": 1.95,
"learning_rate": 3.0501527991503937e-05,
"loss": 5.2377,
"step": 90000
},
{
"epoch": 1.96,
"learning_rate": 3.0393159799735583e-05,
"loss": 5.266,
"step": 90500
},
{
"epoch": 1.97,
"learning_rate": 3.0284791607967233e-05,
"loss": 5.2726,
"step": 91000
},
{
"epoch": 1.98,
"learning_rate": 3.017642341619888e-05,
"loss": 5.2806,
"step": 91500
},
{
"epoch": 1.99,
"learning_rate": 3.0068271960814065e-05,
"loss": 5.2758,
"step": 92000
},
{
"epoch": 2.0,
"learning_rate": 2.995990376904571e-05,
"loss": 5.2737,
"step": 92500
},
{
"epoch": 2.02,
"learning_rate": 2.9851535577277358e-05,
"loss": 5.2566,
"step": 93000
},
{
"epoch": 2.03,
"learning_rate": 2.9743384121892544e-05,
"loss": 5.2196,
"step": 93500
},
{
"epoch": 2.04,
"learning_rate": 2.9635015930124194e-05,
"loss": 5.2356,
"step": 94000
},
{
"epoch": 2.05,
"learning_rate": 2.952664773835584e-05,
"loss": 5.2116,
"step": 94500
},
{
"epoch": 2.06,
"learning_rate": 2.9418279546587486e-05,
"loss": 5.235,
"step": 95000
},
{
"epoch": 2.07,
"learning_rate": 2.9309911354819136e-05,
"loss": 5.2186,
"step": 95500
},
{
"epoch": 2.08,
"learning_rate": 2.9201543163050782e-05,
"loss": 5.2145,
"step": 96000
},
{
"epoch": 2.09,
"learning_rate": 2.909317497128243e-05,
"loss": 5.1935,
"step": 96500
},
{
"epoch": 2.1,
"learning_rate": 2.8984806779514078e-05,
"loss": 5.2145,
"step": 97000
},
{
"epoch": 2.11,
"learning_rate": 2.8876438587745724e-05,
"loss": 5.2262,
"step": 97500
},
{
"epoch": 2.12,
"learning_rate": 2.876828713236091e-05,
"loss": 5.211,
"step": 98000
},
{
"epoch": 2.13,
"learning_rate": 2.8659918940592557e-05,
"loss": 5.1676,
"step": 98500
},
{
"epoch": 2.15,
"learning_rate": 2.8551550748824206e-05,
"loss": 5.1791,
"step": 99000
},
{
"epoch": 2.16,
"learning_rate": 2.8443182557055852e-05,
"loss": 5.1993,
"step": 99500
},
{
"epoch": 2.17,
"learning_rate": 2.833503110167104e-05,
"loss": 5.1698,
"step": 100000
},
{
"epoch": 2.18,
"learning_rate": 2.8226662909902685e-05,
"loss": 5.1661,
"step": 100500
},
{
"epoch": 2.19,
"learning_rate": 2.8118294718134335e-05,
"loss": 5.1772,
"step": 101000
},
{
"epoch": 2.2,
"learning_rate": 2.800992652636598e-05,
"loss": 5.1629,
"step": 101500
},
{
"epoch": 2.21,
"learning_rate": 2.7901558334597634e-05,
"loss": 5.1636,
"step": 102000
},
{
"epoch": 2.22,
"learning_rate": 2.7793406879212813e-05,
"loss": 5.1611,
"step": 102500
},
{
"epoch": 2.23,
"learning_rate": 2.7685038687444463e-05,
"loss": 5.172,
"step": 103000
},
{
"epoch": 2.24,
"learning_rate": 2.757667049567611e-05,
"loss": 5.1525,
"step": 103500
},
{
"epoch": 2.25,
"learning_rate": 2.7468302303907756e-05,
"loss": 5.1161,
"step": 104000
},
{
"epoch": 2.26,
"learning_rate": 2.735993411213941e-05,
"loss": 5.159,
"step": 104500
},
{
"epoch": 2.28,
"learning_rate": 2.725178265675459e-05,
"loss": 5.1327,
"step": 105000
},
{
"epoch": 2.29,
"learning_rate": 2.7143414464986238e-05,
"loss": 5.1263,
"step": 105500
},
{
"epoch": 2.3,
"learning_rate": 2.7035046273217884e-05,
"loss": 5.1287,
"step": 106000
},
{
"epoch": 2.31,
"learning_rate": 2.6926678081449537e-05,
"loss": 5.1209,
"step": 106500
},
{
"epoch": 2.32,
"learning_rate": 2.681830988968118e-05,
"loss": 5.1138,
"step": 107000
},
{
"epoch": 2.33,
"learning_rate": 2.6709941697912833e-05,
"loss": 5.1045,
"step": 107500
},
{
"epoch": 2.34,
"learning_rate": 2.660157350614448e-05,
"loss": 5.1258,
"step": 108000
},
{
"epoch": 2.35,
"learning_rate": 2.6493205314376125e-05,
"loss": 5.1379,
"step": 108500
},
{
"epoch": 2.36,
"learning_rate": 2.6384837122607775e-05,
"loss": 5.0773,
"step": 109000
},
{
"epoch": 2.37,
"learning_rate": 2.6276685667222954e-05,
"loss": 5.1031,
"step": 109500
},
{
"epoch": 2.38,
"learning_rate": 2.6168317475454607e-05,
"loss": 5.0969,
"step": 110000
},
{
"epoch": 2.39,
"learning_rate": 2.6059949283686254e-05,
"loss": 5.0868,
"step": 110500
},
{
"epoch": 2.41,
"learning_rate": 2.5951581091917903e-05,
"loss": 5.0737,
"step": 111000
},
{
"epoch": 2.42,
"learning_rate": 2.5843429636533083e-05,
"loss": 5.0793,
"step": 111500
},
{
"epoch": 2.43,
"learning_rate": 2.5735061444764736e-05,
"loss": 5.0712,
"step": 112000
},
{
"epoch": 2.44,
"learning_rate": 2.5626693252996382e-05,
"loss": 5.0871,
"step": 112500
},
{
"epoch": 2.45,
"learning_rate": 2.5518325061228032e-05,
"loss": 5.064,
"step": 113000
},
{
"epoch": 2.46,
"learning_rate": 2.5409956869459678e-05,
"loss": 5.0995,
"step": 113500
},
{
"epoch": 2.47,
"learning_rate": 2.5301588677691324e-05,
"loss": 5.0805,
"step": 114000
},
{
"epoch": 2.48,
"learning_rate": 2.5193220485922974e-05,
"loss": 5.0899,
"step": 114500
},
{
"epoch": 2.49,
"learning_rate": 2.508485229415462e-05,
"loss": 5.0422,
"step": 115000
},
{
"epoch": 2.5,
"learning_rate": 2.4976700838769806e-05,
"loss": 5.0442,
"step": 115500
},
{
"epoch": 2.51,
"learning_rate": 2.4868332647001453e-05,
"loss": 5.0505,
"step": 116000
},
{
"epoch": 2.52,
"learning_rate": 2.4759964455233102e-05,
"loss": 5.0718,
"step": 116500
},
{
"epoch": 2.54,
"learning_rate": 2.4651812999848285e-05,
"loss": 5.0682,
"step": 117000
},
{
"epoch": 2.55,
"learning_rate": 2.454344480807993e-05,
"loss": 5.0645,
"step": 117500
},
{
"epoch": 2.56,
"learning_rate": 2.443507661631158e-05,
"loss": 5.0679,
"step": 118000
},
{
"epoch": 2.57,
"learning_rate": 2.432670842454323e-05,
"loss": 5.0509,
"step": 118500
},
{
"epoch": 2.58,
"learning_rate": 2.4218340232774877e-05,
"loss": 5.0678,
"step": 119000
},
{
"epoch": 2.59,
"learning_rate": 2.4109972041006526e-05,
"loss": 5.0296,
"step": 119500
},
{
"epoch": 2.6,
"learning_rate": 2.4001603849238173e-05,
"loss": 5.0411,
"step": 120000
},
{
"epoch": 2.61,
"learning_rate": 2.389323565746982e-05,
"loss": 5.0054,
"step": 120500
},
{
"epoch": 2.62,
"learning_rate": 2.378486746570147e-05,
"loss": 5.0269,
"step": 121000
},
{
"epoch": 2.63,
"learning_rate": 2.367671601031665e-05,
"loss": 5.0299,
"step": 121500
},
{
"epoch": 2.64,
"learning_rate": 2.35683478185483e-05,
"loss": 5.0179,
"step": 122000
},
{
"epoch": 2.66,
"learning_rate": 2.3460196363163487e-05,
"loss": 5.0287,
"step": 122500
},
{
"epoch": 2.67,
"learning_rate": 2.3351828171395134e-05,
"loss": 5.0147,
"step": 123000
},
{
"epoch": 2.68,
"learning_rate": 2.324345997962678e-05,
"loss": 5.0182,
"step": 123500
},
{
"epoch": 2.69,
"learning_rate": 2.313509178785843e-05,
"loss": 4.9981,
"step": 124000
},
{
"epoch": 2.7,
"learning_rate": 2.3026723596090076e-05,
"loss": 4.9984,
"step": 124500
},
{
"epoch": 2.71,
"learning_rate": 2.2918355404321725e-05,
"loss": 4.9986,
"step": 125000
},
{
"epoch": 2.72,
"learning_rate": 2.280998721255337e-05,
"loss": 4.9822,
"step": 125500
},
{
"epoch": 2.73,
"learning_rate": 2.270161902078502e-05,
"loss": 4.9897,
"step": 126000
},
{
"epoch": 2.74,
"learning_rate": 2.2593467565400208e-05,
"loss": 5.0029,
"step": 126500
},
{
"epoch": 2.75,
"learning_rate": 2.248531611001539e-05,
"loss": 4.9913,
"step": 127000
},
{
"epoch": 2.76,
"learning_rate": 2.2376947918247037e-05,
"loss": 4.9867,
"step": 127500
},
{
"epoch": 2.77,
"learning_rate": 2.2268579726478686e-05,
"loss": 4.9702,
"step": 128000
},
{
"epoch": 2.79,
"learning_rate": 2.2160211534710333e-05,
"loss": 5.0042,
"step": 128500
},
{
"epoch": 2.8,
"learning_rate": 2.205184334294198e-05,
"loss": 4.9958,
"step": 129000
},
{
"epoch": 2.81,
"learning_rate": 2.1943691887557165e-05,
"loss": 4.969,
"step": 129500
},
{
"epoch": 2.82,
"learning_rate": 2.183532369578881e-05,
"loss": 4.9888,
"step": 130000
},
{
"epoch": 2.83,
"learning_rate": 2.172695550402046e-05,
"loss": 4.9643,
"step": 130500
},
{
"epoch": 2.84,
"learning_rate": 2.1618804048635647e-05,
"loss": 4.9661,
"step": 131000
},
{
"epoch": 2.85,
"learning_rate": 2.1510435856867294e-05,
"loss": 4.9511,
"step": 131500
},
{
"epoch": 2.86,
"learning_rate": 2.140206766509894e-05,
"loss": 4.9586,
"step": 132000
},
{
"epoch": 2.87,
"learning_rate": 2.129369947333059e-05,
"loss": 4.9791,
"step": 132500
},
{
"epoch": 2.88,
"learning_rate": 2.118533128156224e-05,
"loss": 4.9457,
"step": 133000
},
{
"epoch": 2.89,
"learning_rate": 2.1076963089793885e-05,
"loss": 4.9577,
"step": 133500
},
{
"epoch": 2.9,
"learning_rate": 2.096859489802553e-05,
"loss": 4.9646,
"step": 134000
},
{
"epoch": 2.92,
"learning_rate": 2.086022670625718e-05,
"loss": 4.9457,
"step": 134500
},
{
"epoch": 2.93,
"learning_rate": 2.0751858514488827e-05,
"loss": 4.9405,
"step": 135000
},
{
"epoch": 2.94,
"learning_rate": 2.064370705910401e-05,
"loss": 4.9576,
"step": 135500
},
{
"epoch": 2.95,
"learning_rate": 2.053533886733566e-05,
"loss": 4.9494,
"step": 136000
},
{
"epoch": 2.96,
"learning_rate": 2.042697067556731e-05,
"loss": 4.9348,
"step": 136500
},
{
"epoch": 2.97,
"learning_rate": 2.0318819220182492e-05,
"loss": 4.938,
"step": 137000
},
{
"epoch": 2.98,
"learning_rate": 2.021045102841414e-05,
"loss": 4.9657,
"step": 137500
},
{
"epoch": 2.99,
"learning_rate": 2.0102082836645788e-05,
"loss": 4.9181,
"step": 138000
},
{
"epoch": 3.0,
"learning_rate": 1.9993714644877438e-05,
"loss": 4.9687,
"step": 138500
},
{
"epoch": 3.01,
"learning_rate": 1.9885346453109084e-05,
"loss": 4.9063,
"step": 139000
},
{
"epoch": 3.02,
"learning_rate": 1.977697826134073e-05,
"loss": 4.9311,
"step": 139500
},
{
"epoch": 3.03,
"learning_rate": 1.966861006957238e-05,
"loss": 4.887,
"step": 140000
},
{
"epoch": 3.05,
"learning_rate": 1.956024187780403e-05,
"loss": 4.9098,
"step": 140500
},
{
"epoch": 3.06,
"learning_rate": 1.9451873686035676e-05,
"loss": 4.8869,
"step": 141000
},
{
"epoch": 3.07,
"learning_rate": 1.9343505494267325e-05,
"loss": 4.9259,
"step": 141500
},
{
"epoch": 3.08,
"learning_rate": 1.9235137302498972e-05,
"loss": 4.8914,
"step": 142000
},
{
"epoch": 3.09,
"learning_rate": 1.9126769110730618e-05,
"loss": 4.8805,
"step": 142500
},
{
"epoch": 3.1,
"learning_rate": 1.9018617655345804e-05,
"loss": 4.8959,
"step": 143000
},
{
"epoch": 3.11,
"learning_rate": 1.891024946357745e-05,
"loss": 4.9006,
"step": 143500
},
{
"epoch": 3.12,
"learning_rate": 1.8802098008192637e-05,
"loss": 4.8775,
"step": 144000
},
{
"epoch": 3.13,
"learning_rate": 1.8693729816424286e-05,
"loss": 4.8962,
"step": 144500
},
{
"epoch": 3.14,
"learning_rate": 1.858536162465593e-05,
"loss": 4.8837,
"step": 145000
},
{
"epoch": 3.15,
"learning_rate": 1.847699343288758e-05,
"loss": 4.8987,
"step": 145500
},
{
"epoch": 3.16,
"learning_rate": 1.836862524111923e-05,
"loss": 4.8875,
"step": 146000
},
{
"epoch": 3.18,
"learning_rate": 1.8260257049350875e-05,
"loss": 4.8839,
"step": 146500
},
{
"epoch": 3.19,
"learning_rate": 1.815188885758252e-05,
"loss": 4.8876,
"step": 147000
},
{
"epoch": 3.2,
"learning_rate": 1.804352066581417e-05,
"loss": 4.9015,
"step": 147500
},
{
"epoch": 3.21,
"learning_rate": 1.793515247404582e-05,
"loss": 4.8669,
"step": 148000
},
{
"epoch": 3.22,
"learning_rate": 1.782721775504454e-05,
"loss": 4.9037,
"step": 148500
},
{
"epoch": 3.23,
"learning_rate": 1.771884956327619e-05,
"loss": 4.8772,
"step": 149000
},
{
"epoch": 3.24,
"learning_rate": 1.7610481371507836e-05,
"loss": 4.8542,
"step": 149500
},
{
"epoch": 3.25,
"learning_rate": 1.7502113179739485e-05,
"loss": 4.8429,
"step": 150000
},
{
"epoch": 3.26,
"learning_rate": 1.7393961724354668e-05,
"loss": 4.8799,
"step": 150500
},
{
"epoch": 3.27,
"learning_rate": 1.7285593532586318e-05,
"loss": 4.8603,
"step": 151000
},
{
"epoch": 3.28,
"learning_rate": 1.7177225340817964e-05,
"loss": 4.8515,
"step": 151500
},
{
"epoch": 3.29,
"learning_rate": 1.706885714904961e-05,
"loss": 4.8639,
"step": 152000
},
{
"epoch": 3.31,
"learning_rate": 1.696048895728126e-05,
"loss": 4.8507,
"step": 152500
},
{
"epoch": 3.32,
"learning_rate": 1.6852120765512906e-05,
"loss": 4.8426,
"step": 153000
},
{
"epoch": 3.33,
"learning_rate": 1.6743752573744556e-05,
"loss": 4.8277,
"step": 153500
},
{
"epoch": 3.34,
"learning_rate": 1.6635384381976205e-05,
"loss": 4.8831,
"step": 154000
},
{
"epoch": 3.35,
"learning_rate": 1.6527016190207852e-05,
"loss": 4.8377,
"step": 154500
},
{
"epoch": 3.36,
"learning_rate": 1.6418647998439498e-05,
"loss": 4.8549,
"step": 155000
},
{
"epoch": 3.37,
"learning_rate": 1.6310279806671148e-05,
"loss": 4.8403,
"step": 155500
},
{
"epoch": 3.38,
"learning_rate": 1.6201911614902797e-05,
"loss": 4.8526,
"step": 156000
},
{
"epoch": 3.39,
"learning_rate": 1.609354342313444e-05,
"loss": 4.8472,
"step": 156500
},
{
"epoch": 3.4,
"learning_rate": 1.5985391967749626e-05,
"loss": 4.8356,
"step": 157000
},
{
"epoch": 3.41,
"learning_rate": 1.5877023775981276e-05,
"loss": 4.8552,
"step": 157500
},
{
"epoch": 3.42,
"learning_rate": 1.5768655584212922e-05,
"loss": 4.8528,
"step": 158000
},
{
"epoch": 3.44,
"learning_rate": 1.566028739244457e-05,
"loss": 4.8505,
"step": 158500
},
{
"epoch": 3.45,
"learning_rate": 1.5552135937059755e-05,
"loss": 4.8677,
"step": 159000
},
{
"epoch": 3.46,
"learning_rate": 1.54437677452914e-05,
"loss": 4.844,
"step": 159500
},
{
"epoch": 3.47,
"learning_rate": 1.533539955352305e-05,
"loss": 4.8299,
"step": 160000
},
{
"epoch": 3.48,
"learning_rate": 1.5227248098138235e-05,
"loss": 4.8197,
"step": 160500
},
{
"epoch": 3.49,
"learning_rate": 1.5118879906369885e-05,
"loss": 4.8289,
"step": 161000
},
{
"epoch": 3.5,
"learning_rate": 1.501051171460153e-05,
"loss": 4.8317,
"step": 161500
},
{
"epoch": 3.51,
"learning_rate": 1.4902360259216716e-05,
"loss": 4.8262,
"step": 162000
},
{
"epoch": 3.52,
"learning_rate": 1.4793992067448365e-05,
"loss": 4.8332,
"step": 162500
},
{
"epoch": 3.53,
"learning_rate": 1.468562387568001e-05,
"loss": 4.8332,
"step": 163000
},
{
"epoch": 3.54,
"learning_rate": 1.457725568391166e-05,
"loss": 4.839,
"step": 163500
},
{
"epoch": 3.55,
"learning_rate": 1.4468887492143307e-05,
"loss": 4.8062,
"step": 164000
},
{
"epoch": 3.57,
"learning_rate": 1.4360519300374955e-05,
"loss": 4.8388,
"step": 164500
},
{
"epoch": 3.58,
"learning_rate": 1.4252151108606602e-05,
"loss": 4.7874,
"step": 165000
},
{
"epoch": 3.59,
"learning_rate": 1.414378291683825e-05,
"loss": 4.8314,
"step": 165500
},
{
"epoch": 3.6,
"learning_rate": 1.4035414725069897e-05,
"loss": 4.8281,
"step": 166000
},
{
"epoch": 3.61,
"learning_rate": 1.3927046533301547e-05,
"loss": 4.805,
"step": 166500
},
{
"epoch": 3.62,
"learning_rate": 1.3818678341533195e-05,
"loss": 4.8146,
"step": 167000
},
{
"epoch": 3.63,
"learning_rate": 1.3710310149764841e-05,
"loss": 4.8093,
"step": 167500
},
{
"epoch": 3.64,
"learning_rate": 1.360194195799649e-05,
"loss": 4.8045,
"step": 168000
},
{
"epoch": 3.65,
"learning_rate": 1.3493790502611675e-05,
"loss": 4.812,
"step": 168500
},
{
"epoch": 3.66,
"learning_rate": 1.338542231084332e-05,
"loss": 4.8153,
"step": 169000
},
{
"epoch": 3.67,
"learning_rate": 1.327705411907497e-05,
"loss": 4.7973,
"step": 169500
},
{
"epoch": 3.68,
"learning_rate": 1.3168685927306618e-05,
"loss": 4.7848,
"step": 170000
},
{
"epoch": 3.7,
"learning_rate": 1.3060317735538265e-05,
"loss": 4.7788,
"step": 170500
},
{
"epoch": 3.71,
"learning_rate": 1.2951949543769915e-05,
"loss": 4.7904,
"step": 171000
},
{
"epoch": 3.72,
"learning_rate": 1.284358135200156e-05,
"loss": 4.8071,
"step": 171500
},
{
"epoch": 3.73,
"learning_rate": 1.273521316023321e-05,
"loss": 4.7705,
"step": 172000
},
{
"epoch": 3.74,
"learning_rate": 1.2627061704848394e-05,
"loss": 4.7761,
"step": 172500
},
{
"epoch": 3.75,
"learning_rate": 1.251869351308004e-05,
"loss": 4.7745,
"step": 173000
},
{
"epoch": 3.76,
"learning_rate": 1.2410325321311688e-05,
"loss": 4.8037,
"step": 173500
},
{
"epoch": 3.77,
"learning_rate": 1.2301957129543338e-05,
"loss": 4.7899,
"step": 174000
},
{
"epoch": 3.78,
"learning_rate": 1.2193805674158522e-05,
"loss": 4.7707,
"step": 174500
},
{
"epoch": 3.79,
"learning_rate": 1.2085437482390169e-05,
"loss": 4.7628,
"step": 175000
},
{
"epoch": 3.8,
"learning_rate": 1.1977069290621818e-05,
"loss": 4.8077,
"step": 175500
},
{
"epoch": 3.81,
"learning_rate": 1.1868701098853464e-05,
"loss": 4.7716,
"step": 176000
},
{
"epoch": 3.83,
"learning_rate": 1.1760549643468649e-05,
"loss": 4.7809,
"step": 176500
},
{
"epoch": 3.84,
"learning_rate": 1.1652181451700297e-05,
"loss": 4.7824,
"step": 177000
},
{
"epoch": 3.85,
"learning_rate": 1.1543813259931947e-05,
"loss": 4.7673,
"step": 177500
},
{
"epoch": 3.86,
"learning_rate": 1.1435445068163593e-05,
"loss": 4.7763,
"step": 178000
},
{
"epoch": 3.87,
"learning_rate": 1.132707687639524e-05,
"loss": 4.7798,
"step": 178500
},
{
"epoch": 3.88,
"learning_rate": 1.1218708684626889e-05,
"loss": 4.7793,
"step": 179000
},
{
"epoch": 3.89,
"learning_rate": 1.1110557229242073e-05,
"loss": 4.7544,
"step": 179500
},
{
"epoch": 3.9,
"learning_rate": 1.1002189037473721e-05,
"loss": 4.7661,
"step": 180000
},
{
"epoch": 3.91,
"learning_rate": 1.0893820845705369e-05,
"loss": 4.7634,
"step": 180500
},
{
"epoch": 3.92,
"learning_rate": 1.0785452653937017e-05,
"loss": 4.784,
"step": 181000
},
{
"epoch": 3.93,
"learning_rate": 1.0677084462168665e-05,
"loss": 4.7577,
"step": 181500
},
{
"epoch": 3.94,
"learning_rate": 1.0568716270400313e-05,
"loss": 4.7507,
"step": 182000
},
{
"epoch": 3.96,
"learning_rate": 1.0460348078631961e-05,
"loss": 4.7365,
"step": 182500
},
{
"epoch": 3.97,
"learning_rate": 1.0351979886863609e-05,
"loss": 4.7678,
"step": 183000
},
{
"epoch": 3.98,
"learning_rate": 1.0243828431478793e-05,
"loss": 4.7701,
"step": 183500
},
{
"epoch": 3.99,
"learning_rate": 1.0135460239710441e-05,
"loss": 4.7522,
"step": 184000
},
{
"epoch": 4.0,
"learning_rate": 1.0027092047942088e-05,
"loss": 4.7454,
"step": 184500
},
{
"epoch": 4.01,
"learning_rate": 9.918723856173737e-06,
"loss": 4.7236,
"step": 185000
},
{
"epoch": 4.02,
"learning_rate": 9.810355664405383e-06,
"loss": 4.7409,
"step": 185500
},
{
"epoch": 4.03,
"learning_rate": 9.701987472637031e-06,
"loss": 4.74,
"step": 186000
},
{
"epoch": 4.04,
"learning_rate": 9.593836017252216e-06,
"loss": 4.7351,
"step": 186500
},
{
"epoch": 4.05,
"learning_rate": 9.485467825483864e-06,
"loss": 4.7128,
"step": 187000
},
{
"epoch": 4.06,
"learning_rate": 9.377316370099049e-06,
"loss": 4.7234,
"step": 187500
},
{
"epoch": 4.07,
"learning_rate": 9.268948178330696e-06,
"loss": 4.7467,
"step": 188000
},
{
"epoch": 4.09,
"learning_rate": 9.160579986562344e-06,
"loss": 4.7249,
"step": 188500
},
{
"epoch": 4.1,
"learning_rate": 9.052211794793992e-06,
"loss": 4.7212,
"step": 189000
},
{
"epoch": 4.11,
"learning_rate": 8.94384360302564e-06,
"loss": 4.7368,
"step": 189500
},
{
"epoch": 4.12,
"learning_rate": 8.835475411257288e-06,
"loss": 4.7173,
"step": 190000
},
{
"epoch": 4.13,
"learning_rate": 8.727107219488936e-06,
"loss": 4.748,
"step": 190500
},
{
"epoch": 4.14,
"learning_rate": 8.618739027720584e-06,
"loss": 4.7198,
"step": 191000
},
{
"epoch": 4.15,
"learning_rate": 8.510370835952232e-06,
"loss": 4.7166,
"step": 191500
},
{
"epoch": 4.16,
"learning_rate": 8.402219380567417e-06,
"loss": 4.7247,
"step": 192000
},
{
"epoch": 4.17,
"learning_rate": 8.293851188799063e-06,
"loss": 4.7295,
"step": 192500
},
{
"epoch": 4.18,
"learning_rate": 8.185482997030712e-06,
"loss": 4.7058,
"step": 193000
},
{
"epoch": 4.19,
"learning_rate": 8.077114805262359e-06,
"loss": 4.6972,
"step": 193500
},
{
"epoch": 4.2,
"learning_rate": 7.968963349877543e-06,
"loss": 4.7311,
"step": 194000
},
{
"epoch": 4.22,
"learning_rate": 7.860595158109193e-06,
"loss": 4.7356,
"step": 194500
},
{
"epoch": 4.23,
"learning_rate": 7.752443702724378e-06,
"loss": 4.7023,
"step": 195000
},
{
"epoch": 4.24,
"learning_rate": 7.644292247339562e-06,
"loss": 4.7178,
"step": 195500
},
{
"epoch": 4.25,
"learning_rate": 7.535924055571208e-06,
"loss": 4.7288,
"step": 196000
},
{
"epoch": 4.26,
"learning_rate": 7.427555863802857e-06,
"loss": 4.735,
"step": 196500
},
{
"epoch": 4.27,
"learning_rate": 7.319187672034505e-06,
"loss": 4.742,
"step": 197000
},
{
"epoch": 4.28,
"learning_rate": 7.210819480266152e-06,
"loss": 4.7199,
"step": 197500
},
{
"epoch": 4.29,
"learning_rate": 7.102451288497801e-06,
"loss": 4.7203,
"step": 198000
},
{
"epoch": 4.3,
"learning_rate": 6.994083096729448e-06,
"loss": 4.6962,
"step": 198500
},
{
"epoch": 4.31,
"learning_rate": 6.885714904961097e-06,
"loss": 4.7219,
"step": 199000
},
{
"epoch": 4.32,
"learning_rate": 6.777563449576281e-06,
"loss": 4.705,
"step": 199500
},
{
"epoch": 4.33,
"learning_rate": 6.6691952578079285e-06,
"loss": 4.7085,
"step": 200000
},
{
"epoch": 4.35,
"learning_rate": 6.560827066039576e-06,
"loss": 4.7082,
"step": 200500
},
{
"epoch": 4.36,
"learning_rate": 6.4524588742712235e-06,
"loss": 4.705,
"step": 201000
},
{
"epoch": 4.37,
"learning_rate": 6.344090682502872e-06,
"loss": 4.6962,
"step": 201500
},
{
"epoch": 4.38,
"learning_rate": 6.235722490734519e-06,
"loss": 4.706,
"step": 202000
},
{
"epoch": 4.39,
"learning_rate": 6.127354298966167e-06,
"loss": 4.7202,
"step": 202500
},
{
"epoch": 4.4,
"learning_rate": 6.018986107197815e-06,
"loss": 4.7005,
"step": 203000
},
{
"epoch": 4.41,
"learning_rate": 5.910617915429464e-06,
"loss": 4.701,
"step": 203500
},
{
"epoch": 4.42,
"learning_rate": 5.802466460044648e-06,
"loss": 4.6878,
"step": 204000
},
{
"epoch": 4.43,
"learning_rate": 5.694098268276296e-06,
"loss": 4.7178,
"step": 204500
},
{
"epoch": 4.44,
"learning_rate": 5.585730076507944e-06,
"loss": 4.6609,
"step": 205000
},
{
"epoch": 4.45,
"learning_rate": 5.4773618847395916e-06,
"loss": 4.7039,
"step": 205500
},
{
"epoch": 4.46,
"learning_rate": 5.369210429354776e-06,
"loss": 4.7098,
"step": 206000
},
{
"epoch": 4.48,
"learning_rate": 5.260842237586424e-06,
"loss": 4.7026,
"step": 206500
},
{
"epoch": 4.49,
"learning_rate": 5.152474045818072e-06,
"loss": 4.6955,
"step": 207000
},
{
"epoch": 4.5,
"learning_rate": 5.044105854049719e-06,
"loss": 4.7102,
"step": 207500
},
{
"epoch": 4.51,
"learning_rate": 4.935737662281367e-06,
"loss": 4.6803,
"step": 208000
},
{
"epoch": 4.52,
"learning_rate": 4.827586206896552e-06,
"loss": 4.7065,
"step": 208500
},
{
"epoch": 4.53,
"learning_rate": 4.7192180151282e-06,
"loss": 4.7135,
"step": 209000
},
{
"epoch": 4.54,
"learning_rate": 4.6108498233598475e-06,
"loss": 4.7195,
"step": 209500
},
{
"epoch": 4.55,
"learning_rate": 4.5024816315914954e-06,
"loss": 4.7074,
"step": 210000
},
{
"epoch": 4.56,
"learning_rate": 4.39433017620668e-06,
"loss": 4.6901,
"step": 210500
},
{
"epoch": 4.57,
"learning_rate": 4.286178720821865e-06,
"loss": 4.7073,
"step": 211000
},
{
"epoch": 4.58,
"learning_rate": 4.177810529053512e-06,
"loss": 4.7132,
"step": 211500
},
{
"epoch": 4.59,
"learning_rate": 4.06944233728516e-06,
"loss": 4.6883,
"step": 212000
},
{
"epoch": 4.61,
"learning_rate": 3.9610741455168085e-06,
"loss": 4.6894,
"step": 212500
},
{
"epoch": 4.62,
"learning_rate": 3.852705953748456e-06,
"loss": 4.6735,
"step": 213000
},
{
"epoch": 4.63,
"learning_rate": 3.744337761980104e-06,
"loss": 4.6991,
"step": 213500
},
{
"epoch": 4.64,
"learning_rate": 3.6359695702117514e-06,
"loss": 4.6894,
"step": 214000
},
{
"epoch": 4.65,
"learning_rate": 3.5276013784433993e-06,
"loss": 4.6913,
"step": 214500
},
{
"epoch": 4.66,
"learning_rate": 3.4192331866750473e-06,
"loss": 4.6822,
"step": 215000
},
{
"epoch": 4.67,
"learning_rate": 3.3108649949066948e-06,
"loss": 4.7032,
"step": 215500
},
{
"epoch": 4.68,
"learning_rate": 3.2027135395218794e-06,
"loss": 4.6878,
"step": 216000
},
{
"epoch": 4.69,
"learning_rate": 3.0943453477535277e-06,
"loss": 4.6907,
"step": 216500
},
{
"epoch": 4.7,
"learning_rate": 2.9859771559851752e-06,
"loss": 4.7031,
"step": 217000
},
{
"epoch": 4.71,
"learning_rate": 2.877608964216823e-06,
"loss": 4.6638,
"step": 217500
},
{
"epoch": 4.72,
"learning_rate": 2.769240772448471e-06,
"loss": 4.6622,
"step": 218000
},
{
"epoch": 4.74,
"learning_rate": 2.6610893170636553e-06,
"loss": 4.6783,
"step": 218500
},
{
"epoch": 4.75,
"learning_rate": 2.5527211252953036e-06,
"loss": 4.7027,
"step": 219000
},
{
"epoch": 4.76,
"learning_rate": 2.444352933526951e-06,
"loss": 4.6681,
"step": 219500
},
{
"epoch": 4.77,
"learning_rate": 2.335984741758599e-06,
"loss": 4.6985,
"step": 220000
},
{
"epoch": 4.78,
"learning_rate": 2.2278332863737837e-06,
"loss": 4.693,
"step": 220500
},
{
"epoch": 4.79,
"learning_rate": 2.1196818309889683e-06,
"loss": 4.6522,
"step": 221000
},
{
"epoch": 4.8,
"learning_rate": 2.0113136392206162e-06,
"loss": 4.6671,
"step": 221500
},
{
"epoch": 4.81,
"learning_rate": 1.902945447452264e-06,
"loss": 4.6676,
"step": 222000
},
{
"epoch": 4.82,
"learning_rate": 1.7945772556839117e-06,
"loss": 4.6786,
"step": 222500
},
{
"epoch": 4.83,
"learning_rate": 1.6862090639155596e-06,
"loss": 4.673,
"step": 223000
},
{
"epoch": 4.84,
"learning_rate": 1.5778408721472075e-06,
"loss": 4.6652,
"step": 223500
},
{
"epoch": 4.85,
"learning_rate": 1.4694726803788553e-06,
"loss": 4.666,
"step": 224000
},
{
"epoch": 4.87,
"learning_rate": 1.3613212249940399e-06,
"loss": 4.7056,
"step": 224500
},
{
"epoch": 4.88,
"learning_rate": 1.2529530332256876e-06,
"loss": 4.6693,
"step": 225000
},
{
"epoch": 4.89,
"learning_rate": 1.1445848414573355e-06,
"loss": 4.6729,
"step": 225500
},
{
"epoch": 4.9,
"learning_rate": 1.0362166496889832e-06,
"loss": 4.6815,
"step": 226000
},
{
"epoch": 4.91,
"learning_rate": 9.278484579206312e-07,
"loss": 4.6881,
"step": 226500
},
{
"epoch": 4.92,
"learning_rate": 8.194802661522789e-07,
"loss": 4.6657,
"step": 227000
},
{
"epoch": 4.93,
"learning_rate": 7.111120743839268e-07,
"loss": 4.6779,
"step": 227500
},
{
"epoch": 4.94,
"learning_rate": 6.027438826155747e-07,
"loss": 4.6812,
"step": 228000
},
{
"epoch": 4.95,
"learning_rate": 4.943756908472226e-07,
"loss": 4.6778,
"step": 228500
},
{
"epoch": 4.96,
"learning_rate": 3.862242354624071e-07,
"loss": 4.6756,
"step": 229000
},
{
"epoch": 4.97,
"learning_rate": 2.778560436940549e-07,
"loss": 4.673,
"step": 229500
},
{
"epoch": 4.98,
"learning_rate": 1.6948785192570278e-07,
"loss": 4.6745,
"step": 230000
},
{
"epoch": 5.0,
"learning_rate": 6.111966015735062e-08,
"loss": 4.6569,
"step": 230500
},
{
"epoch": 5.0,
"step": 230695,
"total_flos": 4.869967130385408e+17,
"train_loss": 5.372212270862655,
"train_runtime": 34970.0124,
"train_samples_per_second": 52.775,
"train_steps_per_second": 6.597
}
],
"max_steps": 230695,
"num_train_epochs": 5,
"total_flos": 4.869967130385408e+17,
"trial_name": null,
"trial_params": null
}