cendol-llama2-7b-chat / trainer_state.json
afaji's picture
add model
024d800
raw
history blame
122 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9995701848205272,
"eval_steps": 200000,
"global_step": 100000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.990303975370099e-06,
"loss": 1.6732,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 9.980308073689788e-06,
"loss": 1.3978,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 9.970312172009477e-06,
"loss": 1.3189,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 9.960316270329166e-06,
"loss": 1.294,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 9.950320368648854e-06,
"loss": 1.269,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 9.940324466968543e-06,
"loss": 1.2399,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 9.930328565288232e-06,
"loss": 1.2272,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 9.920332663607922e-06,
"loss": 1.2275,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 9.91033676192761e-06,
"loss": 1.204,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 9.9003408602473e-06,
"loss": 1.2009,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 9.89034495856699e-06,
"loss": 1.1841,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 9.880349056886678e-06,
"loss": 1.1711,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 9.870353155206366e-06,
"loss": 1.1601,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 9.860357253526055e-06,
"loss": 1.1601,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 9.850361351845744e-06,
"loss": 1.1313,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 9.840365450165434e-06,
"loss": 1.1443,
"step": 1600
},
{
"epoch": 0.02,
"learning_rate": 9.830369548485121e-06,
"loss": 1.144,
"step": 1700
},
{
"epoch": 0.02,
"learning_rate": 9.82037364680481e-06,
"loss": 1.1347,
"step": 1800
},
{
"epoch": 0.02,
"learning_rate": 9.8103777451245e-06,
"loss": 1.1323,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 9.800381843444189e-06,
"loss": 1.1229,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 9.790385941763878e-06,
"loss": 1.1027,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 9.780390040083566e-06,
"loss": 1.1113,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 9.770394138403255e-06,
"loss": 1.1093,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 9.760398236722944e-06,
"loss": 1.0941,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 9.750402335042633e-06,
"loss": 1.0985,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 9.740406433362322e-06,
"loss": 1.0881,
"step": 2600
},
{
"epoch": 0.03,
"learning_rate": 9.73041053168201e-06,
"loss": 1.106,
"step": 2700
},
{
"epoch": 0.03,
"learning_rate": 9.7204146300017e-06,
"loss": 1.0959,
"step": 2800
},
{
"epoch": 0.03,
"learning_rate": 9.710418728321388e-06,
"loss": 1.0974,
"step": 2900
},
{
"epoch": 0.03,
"learning_rate": 9.700422826641078e-06,
"loss": 1.0783,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 9.690426924960767e-06,
"loss": 1.0669,
"step": 3100
},
{
"epoch": 0.03,
"learning_rate": 9.680431023280456e-06,
"loss": 1.081,
"step": 3200
},
{
"epoch": 0.03,
"learning_rate": 9.670435121600145e-06,
"loss": 1.0611,
"step": 3300
},
{
"epoch": 0.03,
"learning_rate": 9.660439219919835e-06,
"loss": 1.0572,
"step": 3400
},
{
"epoch": 0.03,
"learning_rate": 9.650443318239524e-06,
"loss": 1.0654,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 9.640447416559211e-06,
"loss": 1.0671,
"step": 3600
},
{
"epoch": 0.04,
"learning_rate": 9.6304515148789e-06,
"loss": 1.0655,
"step": 3700
},
{
"epoch": 0.04,
"learning_rate": 9.62045561319859e-06,
"loss": 1.0507,
"step": 3800
},
{
"epoch": 0.04,
"learning_rate": 9.610459711518279e-06,
"loss": 1.0482,
"step": 3900
},
{
"epoch": 0.04,
"learning_rate": 9.600463809837968e-06,
"loss": 1.0543,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 9.590467908157656e-06,
"loss": 1.054,
"step": 4100
},
{
"epoch": 0.04,
"learning_rate": 9.580472006477345e-06,
"loss": 1.0551,
"step": 4200
},
{
"epoch": 0.04,
"learning_rate": 9.570476104797034e-06,
"loss": 1.0425,
"step": 4300
},
{
"epoch": 0.04,
"learning_rate": 9.560480203116723e-06,
"loss": 1.0417,
"step": 4400
},
{
"epoch": 0.04,
"learning_rate": 9.550484301436413e-06,
"loss": 1.0477,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 9.5404883997561e-06,
"loss": 1.0298,
"step": 4600
},
{
"epoch": 0.05,
"learning_rate": 9.53049249807579e-06,
"loss": 1.0305,
"step": 4700
},
{
"epoch": 0.05,
"learning_rate": 9.520496596395479e-06,
"loss": 1.0279,
"step": 4800
},
{
"epoch": 0.05,
"learning_rate": 9.510500694715168e-06,
"loss": 1.0308,
"step": 4900
},
{
"epoch": 0.05,
"learning_rate": 9.500504793034855e-06,
"loss": 1.0328,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 9.490508891354544e-06,
"loss": 1.027,
"step": 5100
},
{
"epoch": 0.05,
"learning_rate": 9.480512989674234e-06,
"loss": 1.0251,
"step": 5200
},
{
"epoch": 0.05,
"learning_rate": 9.470517087993923e-06,
"loss": 1.0273,
"step": 5300
},
{
"epoch": 0.05,
"learning_rate": 9.460521186313612e-06,
"loss": 1.0258,
"step": 5400
},
{
"epoch": 0.05,
"learning_rate": 9.450525284633301e-06,
"loss": 1.0239,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 9.44052938295299e-06,
"loss": 1.0263,
"step": 5600
},
{
"epoch": 0.06,
"learning_rate": 9.43053348127268e-06,
"loss": 1.009,
"step": 5700
},
{
"epoch": 0.06,
"learning_rate": 9.420537579592367e-06,
"loss": 1.005,
"step": 5800
},
{
"epoch": 0.06,
"learning_rate": 9.410541677912057e-06,
"loss": 1.0067,
"step": 5900
},
{
"epoch": 0.06,
"learning_rate": 9.400545776231746e-06,
"loss": 1.0088,
"step": 6000
},
{
"epoch": 0.06,
"learning_rate": 9.390549874551435e-06,
"loss": 1.0019,
"step": 6100
},
{
"epoch": 0.06,
"learning_rate": 9.380553972871124e-06,
"loss": 0.9987,
"step": 6200
},
{
"epoch": 0.06,
"learning_rate": 9.370558071190813e-06,
"loss": 1.0008,
"step": 6300
},
{
"epoch": 0.06,
"learning_rate": 9.360562169510503e-06,
"loss": 0.9994,
"step": 6400
},
{
"epoch": 0.06,
"learning_rate": 9.35056626783019e-06,
"loss": 0.9955,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 9.34057036614988e-06,
"loss": 0.9904,
"step": 6600
},
{
"epoch": 0.07,
"learning_rate": 9.330574464469569e-06,
"loss": 0.9859,
"step": 6700
},
{
"epoch": 0.07,
"learning_rate": 9.320578562789258e-06,
"loss": 0.9792,
"step": 6800
},
{
"epoch": 0.07,
"learning_rate": 9.310582661108945e-06,
"loss": 0.995,
"step": 6900
},
{
"epoch": 0.07,
"learning_rate": 9.300586759428635e-06,
"loss": 0.9833,
"step": 7000
},
{
"epoch": 0.07,
"learning_rate": 9.290590857748324e-06,
"loss": 0.9749,
"step": 7100
},
{
"epoch": 0.07,
"learning_rate": 9.280594956068013e-06,
"loss": 0.9891,
"step": 7200
},
{
"epoch": 0.07,
"learning_rate": 9.270599054387702e-06,
"loss": 0.9881,
"step": 7300
},
{
"epoch": 0.07,
"learning_rate": 9.26060315270739e-06,
"loss": 0.9832,
"step": 7400
},
{
"epoch": 0.07,
"learning_rate": 9.250607251027079e-06,
"loss": 0.9816,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 9.240611349346768e-06,
"loss": 0.9743,
"step": 7600
},
{
"epoch": 0.08,
"learning_rate": 9.230615447666457e-06,
"loss": 0.9834,
"step": 7700
},
{
"epoch": 0.08,
"learning_rate": 9.220619545986147e-06,
"loss": 0.9774,
"step": 7800
},
{
"epoch": 0.08,
"learning_rate": 9.210623644305836e-06,
"loss": 0.9845,
"step": 7900
},
{
"epoch": 0.08,
"learning_rate": 9.200627742625523e-06,
"loss": 0.9652,
"step": 8000
},
{
"epoch": 0.08,
"learning_rate": 9.190631840945213e-06,
"loss": 0.969,
"step": 8100
},
{
"epoch": 0.08,
"learning_rate": 9.180635939264902e-06,
"loss": 0.9679,
"step": 8200
},
{
"epoch": 0.08,
"learning_rate": 9.170640037584591e-06,
"loss": 0.9624,
"step": 8300
},
{
"epoch": 0.08,
"learning_rate": 9.16064413590428e-06,
"loss": 0.9813,
"step": 8400
},
{
"epoch": 0.08,
"learning_rate": 9.15064823422397e-06,
"loss": 0.9702,
"step": 8500
},
{
"epoch": 0.09,
"learning_rate": 9.140652332543659e-06,
"loss": 0.9634,
"step": 8600
},
{
"epoch": 0.09,
"learning_rate": 9.130656430863348e-06,
"loss": 0.9598,
"step": 8700
},
{
"epoch": 0.09,
"learning_rate": 9.120660529183035e-06,
"loss": 0.9662,
"step": 8800
},
{
"epoch": 0.09,
"learning_rate": 9.110664627502725e-06,
"loss": 0.9588,
"step": 8900
},
{
"epoch": 0.09,
"learning_rate": 9.100668725822414e-06,
"loss": 0.9451,
"step": 9000
},
{
"epoch": 0.09,
"learning_rate": 9.090672824142103e-06,
"loss": 0.967,
"step": 9100
},
{
"epoch": 0.09,
"learning_rate": 9.080676922461792e-06,
"loss": 0.9545,
"step": 9200
},
{
"epoch": 0.09,
"learning_rate": 9.07068102078148e-06,
"loss": 0.9568,
"step": 9300
},
{
"epoch": 0.09,
"learning_rate": 9.060685119101169e-06,
"loss": 0.9483,
"step": 9400
},
{
"epoch": 0.09,
"learning_rate": 9.050689217420858e-06,
"loss": 0.9444,
"step": 9500
},
{
"epoch": 0.1,
"learning_rate": 9.040693315740547e-06,
"loss": 0.9379,
"step": 9600
},
{
"epoch": 0.1,
"learning_rate": 9.030697414060237e-06,
"loss": 0.9445,
"step": 9700
},
{
"epoch": 0.1,
"learning_rate": 9.020701512379924e-06,
"loss": 0.939,
"step": 9800
},
{
"epoch": 0.1,
"learning_rate": 9.010705610699613e-06,
"loss": 0.9506,
"step": 9900
},
{
"epoch": 0.1,
"learning_rate": 9.000709709019303e-06,
"loss": 0.941,
"step": 10000
},
{
"epoch": 0.1,
"learning_rate": 8.990713807338992e-06,
"loss": 0.9444,
"step": 10100
},
{
"epoch": 0.1,
"learning_rate": 8.98071790565868e-06,
"loss": 0.9526,
"step": 10200
},
{
"epoch": 0.1,
"learning_rate": 8.970722003978369e-06,
"loss": 0.9294,
"step": 10300
},
{
"epoch": 0.1,
"learning_rate": 8.960726102298058e-06,
"loss": 0.9332,
"step": 10400
},
{
"epoch": 0.1,
"learning_rate": 8.950730200617747e-06,
"loss": 0.9357,
"step": 10500
},
{
"epoch": 0.11,
"learning_rate": 8.940734298937436e-06,
"loss": 0.9408,
"step": 10600
},
{
"epoch": 0.11,
"learning_rate": 8.930738397257125e-06,
"loss": 0.9301,
"step": 10700
},
{
"epoch": 0.11,
"learning_rate": 8.920742495576815e-06,
"loss": 0.9462,
"step": 10800
},
{
"epoch": 0.11,
"learning_rate": 8.910746593896504e-06,
"loss": 0.9295,
"step": 10900
},
{
"epoch": 0.11,
"learning_rate": 8.900750692216193e-06,
"loss": 0.9418,
"step": 11000
},
{
"epoch": 0.11,
"learning_rate": 8.890754790535882e-06,
"loss": 0.9315,
"step": 11100
},
{
"epoch": 0.11,
"learning_rate": 8.88075888885557e-06,
"loss": 0.9356,
"step": 11200
},
{
"epoch": 0.11,
"learning_rate": 8.870762987175259e-06,
"loss": 0.9216,
"step": 11300
},
{
"epoch": 0.11,
"learning_rate": 8.860767085494948e-06,
"loss": 0.9298,
"step": 11400
},
{
"epoch": 0.11,
"learning_rate": 8.850771183814638e-06,
"loss": 0.9207,
"step": 11500
},
{
"epoch": 0.12,
"learning_rate": 8.840775282134325e-06,
"loss": 0.923,
"step": 11600
},
{
"epoch": 0.12,
"learning_rate": 8.830779380454014e-06,
"loss": 0.91,
"step": 11700
},
{
"epoch": 0.12,
"learning_rate": 8.820783478773703e-06,
"loss": 0.9171,
"step": 11800
},
{
"epoch": 0.12,
"learning_rate": 8.810787577093393e-06,
"loss": 0.9179,
"step": 11900
},
{
"epoch": 0.12,
"learning_rate": 8.800791675413082e-06,
"loss": 0.9185,
"step": 12000
},
{
"epoch": 0.12,
"learning_rate": 8.79079577373277e-06,
"loss": 0.9071,
"step": 12100
},
{
"epoch": 0.12,
"learning_rate": 8.780799872052459e-06,
"loss": 0.9142,
"step": 12200
},
{
"epoch": 0.12,
"learning_rate": 8.770803970372148e-06,
"loss": 0.9079,
"step": 12300
},
{
"epoch": 0.12,
"learning_rate": 8.760808068691837e-06,
"loss": 0.9105,
"step": 12400
},
{
"epoch": 0.12,
"learning_rate": 8.750812167011526e-06,
"loss": 0.9027,
"step": 12500
},
{
"epoch": 0.13,
"learning_rate": 8.740816265331214e-06,
"loss": 0.9074,
"step": 12600
},
{
"epoch": 0.13,
"learning_rate": 8.730820363650903e-06,
"loss": 0.9064,
"step": 12700
},
{
"epoch": 0.13,
"learning_rate": 8.720824461970592e-06,
"loss": 0.8988,
"step": 12800
},
{
"epoch": 0.13,
"learning_rate": 8.710828560290282e-06,
"loss": 0.9,
"step": 12900
},
{
"epoch": 0.13,
"learning_rate": 8.70083265860997e-06,
"loss": 0.888,
"step": 13000
},
{
"epoch": 0.13,
"learning_rate": 8.69083675692966e-06,
"loss": 0.9012,
"step": 13100
},
{
"epoch": 0.13,
"learning_rate": 8.68084085524935e-06,
"loss": 0.9036,
"step": 13200
},
{
"epoch": 0.13,
"learning_rate": 8.670844953569038e-06,
"loss": 0.9023,
"step": 13300
},
{
"epoch": 0.13,
"learning_rate": 8.660849051888726e-06,
"loss": 0.9017,
"step": 13400
},
{
"epoch": 0.13,
"learning_rate": 8.650853150208415e-06,
"loss": 0.8999,
"step": 13500
},
{
"epoch": 0.14,
"learning_rate": 8.640857248528104e-06,
"loss": 0.895,
"step": 13600
},
{
"epoch": 0.14,
"learning_rate": 8.630861346847794e-06,
"loss": 0.9,
"step": 13700
},
{
"epoch": 0.14,
"learning_rate": 8.620865445167483e-06,
"loss": 0.907,
"step": 13800
},
{
"epoch": 0.14,
"learning_rate": 8.610869543487172e-06,
"loss": 0.8963,
"step": 13900
},
{
"epoch": 0.14,
"learning_rate": 8.60087364180686e-06,
"loss": 0.8953,
"step": 14000
},
{
"epoch": 0.14,
"learning_rate": 8.590877740126549e-06,
"loss": 0.9034,
"step": 14100
},
{
"epoch": 0.14,
"learning_rate": 8.580881838446238e-06,
"loss": 0.8916,
"step": 14200
},
{
"epoch": 0.14,
"learning_rate": 8.570885936765927e-06,
"loss": 0.8964,
"step": 14300
},
{
"epoch": 0.14,
"learning_rate": 8.560890035085616e-06,
"loss": 0.8894,
"step": 14400
},
{
"epoch": 0.14,
"learning_rate": 8.550894133405304e-06,
"loss": 0.8894,
"step": 14500
},
{
"epoch": 0.15,
"learning_rate": 8.540898231724993e-06,
"loss": 0.8941,
"step": 14600
},
{
"epoch": 0.15,
"learning_rate": 8.530902330044682e-06,
"loss": 0.8888,
"step": 14700
},
{
"epoch": 0.15,
"learning_rate": 8.520906428364372e-06,
"loss": 0.8898,
"step": 14800
},
{
"epoch": 0.15,
"learning_rate": 8.510910526684059e-06,
"loss": 0.8812,
"step": 14900
},
{
"epoch": 0.15,
"learning_rate": 8.500914625003748e-06,
"loss": 0.8833,
"step": 15000
},
{
"epoch": 0.15,
"learning_rate": 8.490918723323438e-06,
"loss": 0.8907,
"step": 15100
},
{
"epoch": 0.15,
"learning_rate": 8.480922821643127e-06,
"loss": 0.8732,
"step": 15200
},
{
"epoch": 0.15,
"learning_rate": 8.470926919962816e-06,
"loss": 0.8894,
"step": 15300
},
{
"epoch": 0.15,
"learning_rate": 8.460931018282505e-06,
"loss": 0.8731,
"step": 15400
},
{
"epoch": 0.15,
"learning_rate": 8.450935116602193e-06,
"loss": 0.8811,
"step": 15500
},
{
"epoch": 0.16,
"learning_rate": 8.440939214921882e-06,
"loss": 0.8649,
"step": 15600
},
{
"epoch": 0.16,
"learning_rate": 8.430943313241571e-06,
"loss": 0.8654,
"step": 15700
},
{
"epoch": 0.16,
"learning_rate": 8.42094741156126e-06,
"loss": 0.8688,
"step": 15800
},
{
"epoch": 0.16,
"learning_rate": 8.41095150988095e-06,
"loss": 0.8713,
"step": 15900
},
{
"epoch": 0.16,
"learning_rate": 8.400955608200639e-06,
"loss": 0.8694,
"step": 16000
},
{
"epoch": 0.16,
"learning_rate": 8.390959706520328e-06,
"loss": 0.8617,
"step": 16100
},
{
"epoch": 0.16,
"learning_rate": 8.380963804840017e-06,
"loss": 0.8764,
"step": 16200
},
{
"epoch": 0.16,
"learning_rate": 8.370967903159706e-06,
"loss": 0.8627,
"step": 16300
},
{
"epoch": 0.16,
"learning_rate": 8.360972001479394e-06,
"loss": 0.8749,
"step": 16400
},
{
"epoch": 0.16,
"learning_rate": 8.350976099799083e-06,
"loss": 0.8702,
"step": 16500
},
{
"epoch": 0.17,
"learning_rate": 8.340980198118772e-06,
"loss": 0.868,
"step": 16600
},
{
"epoch": 0.17,
"learning_rate": 8.330984296438462e-06,
"loss": 0.8545,
"step": 16700
},
{
"epoch": 0.17,
"learning_rate": 8.32098839475815e-06,
"loss": 0.865,
"step": 16800
},
{
"epoch": 0.17,
"learning_rate": 8.310992493077838e-06,
"loss": 0.864,
"step": 16900
},
{
"epoch": 0.17,
"learning_rate": 8.300996591397528e-06,
"loss": 0.8676,
"step": 17000
},
{
"epoch": 0.17,
"learning_rate": 8.291000689717217e-06,
"loss": 0.8636,
"step": 17100
},
{
"epoch": 0.17,
"learning_rate": 8.281004788036906e-06,
"loss": 0.8555,
"step": 17200
},
{
"epoch": 0.17,
"learning_rate": 8.271008886356594e-06,
"loss": 0.8548,
"step": 17300
},
{
"epoch": 0.17,
"learning_rate": 8.261012984676283e-06,
"loss": 0.8546,
"step": 17400
},
{
"epoch": 0.17,
"learning_rate": 8.251017082995972e-06,
"loss": 0.8606,
"step": 17500
},
{
"epoch": 0.18,
"learning_rate": 8.241021181315661e-06,
"loss": 0.8485,
"step": 17600
},
{
"epoch": 0.18,
"learning_rate": 8.23102527963535e-06,
"loss": 0.8421,
"step": 17700
},
{
"epoch": 0.18,
"learning_rate": 8.221029377955038e-06,
"loss": 0.8468,
"step": 17800
},
{
"epoch": 0.18,
"learning_rate": 8.211033476274727e-06,
"loss": 0.8493,
"step": 17900
},
{
"epoch": 0.18,
"learning_rate": 8.201037574594416e-06,
"loss": 0.8508,
"step": 18000
},
{
"epoch": 0.18,
"learning_rate": 8.191041672914106e-06,
"loss": 0.8441,
"step": 18100
},
{
"epoch": 0.18,
"learning_rate": 8.181045771233795e-06,
"loss": 0.8554,
"step": 18200
},
{
"epoch": 0.18,
"learning_rate": 8.171049869553484e-06,
"loss": 0.8458,
"step": 18300
},
{
"epoch": 0.18,
"learning_rate": 8.161053967873173e-06,
"loss": 0.8433,
"step": 18400
},
{
"epoch": 0.18,
"learning_rate": 8.151058066192863e-06,
"loss": 0.8494,
"step": 18500
},
{
"epoch": 0.19,
"learning_rate": 8.141062164512552e-06,
"loss": 0.842,
"step": 18600
},
{
"epoch": 0.19,
"learning_rate": 8.13106626283224e-06,
"loss": 0.849,
"step": 18700
},
{
"epoch": 0.19,
"learning_rate": 8.121070361151928e-06,
"loss": 0.8382,
"step": 18800
},
{
"epoch": 0.19,
"learning_rate": 8.111074459471618e-06,
"loss": 0.8385,
"step": 18900
},
{
"epoch": 0.19,
"learning_rate": 8.101078557791307e-06,
"loss": 0.8397,
"step": 19000
},
{
"epoch": 0.19,
"learning_rate": 8.091082656110996e-06,
"loss": 0.8538,
"step": 19100
},
{
"epoch": 0.19,
"learning_rate": 8.081086754430684e-06,
"loss": 0.8392,
"step": 19200
},
{
"epoch": 0.19,
"learning_rate": 8.071090852750373e-06,
"loss": 0.8379,
"step": 19300
},
{
"epoch": 0.19,
"learning_rate": 8.061094951070062e-06,
"loss": 0.8409,
"step": 19400
},
{
"epoch": 0.19,
"learning_rate": 8.051099049389751e-06,
"loss": 0.8332,
"step": 19500
},
{
"epoch": 0.2,
"learning_rate": 8.04110314770944e-06,
"loss": 0.8292,
"step": 19600
},
{
"epoch": 0.2,
"learning_rate": 8.031107246029128e-06,
"loss": 0.8382,
"step": 19700
},
{
"epoch": 0.2,
"learning_rate": 8.021111344348817e-06,
"loss": 0.8329,
"step": 19800
},
{
"epoch": 0.2,
"learning_rate": 8.011115442668507e-06,
"loss": 0.825,
"step": 19900
},
{
"epoch": 0.2,
"learning_rate": 8.001119540988196e-06,
"loss": 0.8274,
"step": 20000
},
{
"epoch": 0.2,
"learning_rate": 7.991123639307883e-06,
"loss": 0.8341,
"step": 20100
},
{
"epoch": 0.2,
"learning_rate": 7.981127737627572e-06,
"loss": 0.8302,
"step": 20200
},
{
"epoch": 0.2,
"learning_rate": 7.971131835947262e-06,
"loss": 0.8247,
"step": 20300
},
{
"epoch": 0.2,
"learning_rate": 7.961135934266951e-06,
"loss": 0.8355,
"step": 20400
},
{
"epoch": 0.2,
"learning_rate": 7.95114003258664e-06,
"loss": 0.8298,
"step": 20500
},
{
"epoch": 0.21,
"learning_rate": 7.94114413090633e-06,
"loss": 0.8282,
"step": 20600
},
{
"epoch": 0.21,
"learning_rate": 7.931148229226019e-06,
"loss": 0.8202,
"step": 20700
},
{
"epoch": 0.21,
"learning_rate": 7.921152327545708e-06,
"loss": 0.8203,
"step": 20800
},
{
"epoch": 0.21,
"learning_rate": 7.911156425865395e-06,
"loss": 0.8295,
"step": 20900
},
{
"epoch": 0.21,
"learning_rate": 7.901160524185085e-06,
"loss": 0.8229,
"step": 21000
},
{
"epoch": 0.21,
"learning_rate": 7.891164622504774e-06,
"loss": 0.818,
"step": 21100
},
{
"epoch": 0.21,
"learning_rate": 7.881168720824463e-06,
"loss": 0.8154,
"step": 21200
},
{
"epoch": 0.21,
"learning_rate": 7.871172819144152e-06,
"loss": 0.8166,
"step": 21300
},
{
"epoch": 0.21,
"learning_rate": 7.861176917463841e-06,
"loss": 0.8104,
"step": 21400
},
{
"epoch": 0.21,
"learning_rate": 7.851181015783529e-06,
"loss": 0.8178,
"step": 21500
},
{
"epoch": 0.22,
"learning_rate": 7.841185114103218e-06,
"loss": 0.8025,
"step": 21600
},
{
"epoch": 0.22,
"learning_rate": 7.831189212422907e-06,
"loss": 0.8099,
"step": 21700
},
{
"epoch": 0.22,
"learning_rate": 7.821193310742597e-06,
"loss": 0.8119,
"step": 21800
},
{
"epoch": 0.22,
"learning_rate": 7.811197409062286e-06,
"loss": 0.8049,
"step": 21900
},
{
"epoch": 0.22,
"learning_rate": 7.801201507381973e-06,
"loss": 0.8157,
"step": 22000
},
{
"epoch": 0.22,
"learning_rate": 7.791205605701663e-06,
"loss": 0.8031,
"step": 22100
},
{
"epoch": 0.22,
"learning_rate": 7.781209704021352e-06,
"loss": 0.8061,
"step": 22200
},
{
"epoch": 0.22,
"learning_rate": 7.771213802341041e-06,
"loss": 0.8175,
"step": 22300
},
{
"epoch": 0.22,
"learning_rate": 7.76121790066073e-06,
"loss": 0.8002,
"step": 22400
},
{
"epoch": 0.22,
"learning_rate": 7.751221998980418e-06,
"loss": 0.8115,
"step": 22500
},
{
"epoch": 0.23,
"learning_rate": 7.741226097300107e-06,
"loss": 0.8095,
"step": 22600
},
{
"epoch": 0.23,
"learning_rate": 7.731230195619796e-06,
"loss": 0.7981,
"step": 22700
},
{
"epoch": 0.23,
"learning_rate": 7.721234293939485e-06,
"loss": 0.802,
"step": 22800
},
{
"epoch": 0.23,
"learning_rate": 7.711238392259175e-06,
"loss": 0.7983,
"step": 22900
},
{
"epoch": 0.23,
"learning_rate": 7.701242490578862e-06,
"loss": 0.7987,
"step": 23000
},
{
"epoch": 0.23,
"learning_rate": 7.691246588898551e-06,
"loss": 0.8006,
"step": 23100
},
{
"epoch": 0.23,
"learning_rate": 7.68125068721824e-06,
"loss": 0.8005,
"step": 23200
},
{
"epoch": 0.23,
"learning_rate": 7.67125478553793e-06,
"loss": 0.7952,
"step": 23300
},
{
"epoch": 0.23,
"learning_rate": 7.661258883857619e-06,
"loss": 0.7929,
"step": 23400
},
{
"epoch": 0.23,
"learning_rate": 7.651262982177308e-06,
"loss": 0.7874,
"step": 23500
},
{
"epoch": 0.24,
"learning_rate": 7.641267080496997e-06,
"loss": 0.7971,
"step": 23600
},
{
"epoch": 0.24,
"learning_rate": 7.631271178816687e-06,
"loss": 0.7972,
"step": 23700
},
{
"epoch": 0.24,
"learning_rate": 7.621275277136375e-06,
"loss": 0.7909,
"step": 23800
},
{
"epoch": 0.24,
"learning_rate": 7.611279375456063e-06,
"loss": 0.8056,
"step": 23900
},
{
"epoch": 0.24,
"learning_rate": 7.601283473775753e-06,
"loss": 0.7985,
"step": 24000
},
{
"epoch": 0.24,
"learning_rate": 7.591287572095442e-06,
"loss": 0.7947,
"step": 24100
},
{
"epoch": 0.24,
"learning_rate": 7.581291670415131e-06,
"loss": 0.7843,
"step": 24200
},
{
"epoch": 0.24,
"learning_rate": 7.5712957687348194e-06,
"loss": 0.8008,
"step": 24300
},
{
"epoch": 0.24,
"learning_rate": 7.561299867054508e-06,
"loss": 0.786,
"step": 24400
},
{
"epoch": 0.24,
"learning_rate": 7.551303965374197e-06,
"loss": 0.7837,
"step": 24500
},
{
"epoch": 0.25,
"learning_rate": 7.541308063693886e-06,
"loss": 0.7978,
"step": 24600
},
{
"epoch": 0.25,
"learning_rate": 7.5313121620135755e-06,
"loss": 0.7869,
"step": 24700
},
{
"epoch": 0.25,
"learning_rate": 7.521316260333264e-06,
"loss": 0.7944,
"step": 24800
},
{
"epoch": 0.25,
"learning_rate": 7.511320358652952e-06,
"loss": 0.7884,
"step": 24900
},
{
"epoch": 0.25,
"learning_rate": 7.5013244569726414e-06,
"loss": 0.7783,
"step": 25000
},
{
"epoch": 0.25,
"learning_rate": 7.491328555292331e-06,
"loss": 0.7859,
"step": 25100
},
{
"epoch": 0.25,
"learning_rate": 7.48133265361202e-06,
"loss": 0.7741,
"step": 25200
},
{
"epoch": 0.25,
"learning_rate": 7.471336751931708e-06,
"loss": 0.7788,
"step": 25300
},
{
"epoch": 0.25,
"learning_rate": 7.4613408502513975e-06,
"loss": 0.7745,
"step": 25400
},
{
"epoch": 0.25,
"learning_rate": 7.451344948571087e-06,
"loss": 0.7833,
"step": 25500
},
{
"epoch": 0.26,
"learning_rate": 7.441349046890776e-06,
"loss": 0.7831,
"step": 25600
},
{
"epoch": 0.26,
"learning_rate": 7.431353145210465e-06,
"loss": 0.781,
"step": 25700
},
{
"epoch": 0.26,
"learning_rate": 7.421357243530153e-06,
"loss": 0.7754,
"step": 25800
},
{
"epoch": 0.26,
"learning_rate": 7.411361341849842e-06,
"loss": 0.7647,
"step": 25900
},
{
"epoch": 0.26,
"learning_rate": 7.401365440169531e-06,
"loss": 0.7641,
"step": 26000
},
{
"epoch": 0.26,
"learning_rate": 7.39136953848922e-06,
"loss": 0.7744,
"step": 26100
},
{
"epoch": 0.26,
"learning_rate": 7.3813736368089095e-06,
"loss": 0.774,
"step": 26200
},
{
"epoch": 0.26,
"learning_rate": 7.371377735128598e-06,
"loss": 0.7651,
"step": 26300
},
{
"epoch": 0.26,
"learning_rate": 7.361381833448287e-06,
"loss": 0.7605,
"step": 26400
},
{
"epoch": 0.26,
"learning_rate": 7.3513859317679755e-06,
"loss": 0.7753,
"step": 26500
},
{
"epoch": 0.27,
"learning_rate": 7.341390030087665e-06,
"loss": 0.7682,
"step": 26600
},
{
"epoch": 0.27,
"learning_rate": 7.331394128407353e-06,
"loss": 0.7663,
"step": 26700
},
{
"epoch": 0.27,
"learning_rate": 7.321398226727042e-06,
"loss": 0.7652,
"step": 26800
},
{
"epoch": 0.27,
"learning_rate": 7.3114023250467315e-06,
"loss": 0.7623,
"step": 26900
},
{
"epoch": 0.27,
"learning_rate": 7.301406423366421e-06,
"loss": 0.768,
"step": 27000
},
{
"epoch": 0.27,
"learning_rate": 7.29141052168611e-06,
"loss": 0.7627,
"step": 27100
},
{
"epoch": 0.27,
"learning_rate": 7.2814146200057975e-06,
"loss": 0.7555,
"step": 27200
},
{
"epoch": 0.27,
"learning_rate": 7.271418718325487e-06,
"loss": 0.7667,
"step": 27300
},
{
"epoch": 0.27,
"learning_rate": 7.261422816645176e-06,
"loss": 0.7544,
"step": 27400
},
{
"epoch": 0.27,
"learning_rate": 7.251426914964865e-06,
"loss": 0.7584,
"step": 27500
},
{
"epoch": 0.28,
"learning_rate": 7.241431013284554e-06,
"loss": 0.764,
"step": 27600
},
{
"epoch": 0.28,
"learning_rate": 7.231435111604243e-06,
"loss": 0.766,
"step": 27700
},
{
"epoch": 0.28,
"learning_rate": 7.221439209923932e-06,
"loss": 0.7607,
"step": 27800
},
{
"epoch": 0.28,
"learning_rate": 7.211443308243621e-06,
"loss": 0.748,
"step": 27900
},
{
"epoch": 0.28,
"learning_rate": 7.20144740656331e-06,
"loss": 0.7565,
"step": 28000
},
{
"epoch": 0.28,
"learning_rate": 7.191451504882998e-06,
"loss": 0.7553,
"step": 28100
},
{
"epoch": 0.28,
"learning_rate": 7.181455603202687e-06,
"loss": 0.7592,
"step": 28200
},
{
"epoch": 0.28,
"learning_rate": 7.171459701522376e-06,
"loss": 0.7463,
"step": 28300
},
{
"epoch": 0.28,
"learning_rate": 7.1614637998420656e-06,
"loss": 0.7468,
"step": 28400
},
{
"epoch": 0.28,
"learning_rate": 7.151467898161755e-06,
"loss": 0.7543,
"step": 28500
},
{
"epoch": 0.29,
"learning_rate": 7.141471996481443e-06,
"loss": 0.759,
"step": 28600
},
{
"epoch": 0.29,
"learning_rate": 7.1314760948011315e-06,
"loss": 0.7443,
"step": 28700
},
{
"epoch": 0.29,
"learning_rate": 7.121480193120821e-06,
"loss": 0.7498,
"step": 28800
},
{
"epoch": 0.29,
"learning_rate": 7.11148429144051e-06,
"loss": 0.7458,
"step": 28900
},
{
"epoch": 0.29,
"learning_rate": 7.101488389760199e-06,
"loss": 0.7495,
"step": 29000
},
{
"epoch": 0.29,
"learning_rate": 7.0914924880798875e-06,
"loss": 0.7317,
"step": 29100
},
{
"epoch": 0.29,
"learning_rate": 7.081496586399577e-06,
"loss": 0.7429,
"step": 29200
},
{
"epoch": 0.29,
"learning_rate": 7.071500684719266e-06,
"loss": 0.7284,
"step": 29300
},
{
"epoch": 0.29,
"learning_rate": 7.061504783038955e-06,
"loss": 0.7388,
"step": 29400
},
{
"epoch": 0.29,
"learning_rate": 7.0515088813586444e-06,
"loss": 0.7364,
"step": 29500
},
{
"epoch": 0.3,
"learning_rate": 7.041512979678332e-06,
"loss": 0.7364,
"step": 29600
},
{
"epoch": 0.3,
"learning_rate": 7.031517077998021e-06,
"loss": 0.7358,
"step": 29700
},
{
"epoch": 0.3,
"learning_rate": 7.02152117631771e-06,
"loss": 0.7347,
"step": 29800
},
{
"epoch": 0.3,
"learning_rate": 7.0115252746374e-06,
"loss": 0.7338,
"step": 29900
},
{
"epoch": 0.3,
"learning_rate": 7.001529372957088e-06,
"loss": 0.7403,
"step": 30000
},
{
"epoch": 0.3,
"learning_rate": 6.991533471276777e-06,
"loss": 0.7319,
"step": 30100
},
{
"epoch": 0.3,
"learning_rate": 6.981537569596466e-06,
"loss": 0.7356,
"step": 30200
},
{
"epoch": 0.3,
"learning_rate": 6.971541667916155e-06,
"loss": 0.725,
"step": 30300
},
{
"epoch": 0.3,
"learning_rate": 6.961545766235844e-06,
"loss": 0.7262,
"step": 30400
},
{
"epoch": 0.3,
"learning_rate": 6.951549864555532e-06,
"loss": 0.7348,
"step": 30500
},
{
"epoch": 0.31,
"learning_rate": 6.941553962875222e-06,
"loss": 0.7276,
"step": 30600
},
{
"epoch": 0.31,
"learning_rate": 6.931558061194911e-06,
"loss": 0.7252,
"step": 30700
},
{
"epoch": 0.31,
"learning_rate": 6.9215621595146e-06,
"loss": 0.7324,
"step": 30800
},
{
"epoch": 0.31,
"learning_rate": 6.911566257834289e-06,
"loss": 0.7426,
"step": 30900
},
{
"epoch": 0.31,
"learning_rate": 6.901570356153977e-06,
"loss": 0.7332,
"step": 31000
},
{
"epoch": 0.31,
"learning_rate": 6.891574454473666e-06,
"loss": 0.7355,
"step": 31100
},
{
"epoch": 0.31,
"learning_rate": 6.881578552793355e-06,
"loss": 0.7296,
"step": 31200
},
{
"epoch": 0.31,
"learning_rate": 6.8715826511130444e-06,
"loss": 0.7284,
"step": 31300
},
{
"epoch": 0.31,
"learning_rate": 6.861586749432733e-06,
"loss": 0.7217,
"step": 31400
},
{
"epoch": 0.31,
"learning_rate": 6.851590847752422e-06,
"loss": 0.7216,
"step": 31500
},
{
"epoch": 0.32,
"learning_rate": 6.841594946072111e-06,
"loss": 0.7139,
"step": 31600
},
{
"epoch": 0.32,
"learning_rate": 6.8315990443918005e-06,
"loss": 0.7227,
"step": 31700
},
{
"epoch": 0.32,
"learning_rate": 6.82160314271149e-06,
"loss": 0.7221,
"step": 31800
},
{
"epoch": 0.32,
"learning_rate": 6.811607241031177e-06,
"loss": 0.7082,
"step": 31900
},
{
"epoch": 0.32,
"learning_rate": 6.8016113393508664e-06,
"loss": 0.72,
"step": 32000
},
{
"epoch": 0.32,
"learning_rate": 6.791615437670556e-06,
"loss": 0.7134,
"step": 32100
},
{
"epoch": 0.32,
"learning_rate": 6.781619535990245e-06,
"loss": 0.719,
"step": 32200
},
{
"epoch": 0.32,
"learning_rate": 6.771623634309934e-06,
"loss": 0.7083,
"step": 32300
},
{
"epoch": 0.32,
"learning_rate": 6.761627732629622e-06,
"loss": 0.7148,
"step": 32400
},
{
"epoch": 0.32,
"learning_rate": 6.751631830949311e-06,
"loss": 0.7229,
"step": 32500
},
{
"epoch": 0.33,
"learning_rate": 6.741635929269e-06,
"loss": 0.7173,
"step": 32600
},
{
"epoch": 0.33,
"learning_rate": 6.731640027588689e-06,
"loss": 0.7118,
"step": 32700
},
{
"epoch": 0.33,
"learning_rate": 6.7216441259083785e-06,
"loss": 0.7072,
"step": 32800
},
{
"epoch": 0.33,
"learning_rate": 6.711648224228067e-06,
"loss": 0.7174,
"step": 32900
},
{
"epoch": 0.33,
"learning_rate": 6.701652322547756e-06,
"loss": 0.7125,
"step": 33000
},
{
"epoch": 0.33,
"learning_rate": 6.691656420867445e-06,
"loss": 0.705,
"step": 33100
},
{
"epoch": 0.33,
"learning_rate": 6.6816605191871345e-06,
"loss": 0.7023,
"step": 33200
},
{
"epoch": 0.33,
"learning_rate": 6.671664617506822e-06,
"loss": 0.7114,
"step": 33300
},
{
"epoch": 0.33,
"learning_rate": 6.661668715826511e-06,
"loss": 0.7038,
"step": 33400
},
{
"epoch": 0.33,
"learning_rate": 6.6516728141462005e-06,
"loss": 0.7051,
"step": 33500
},
{
"epoch": 0.34,
"learning_rate": 6.64167691246589e-06,
"loss": 0.6988,
"step": 33600
},
{
"epoch": 0.34,
"learning_rate": 6.631681010785579e-06,
"loss": 0.7131,
"step": 33700
},
{
"epoch": 0.34,
"learning_rate": 6.621685109105267e-06,
"loss": 0.6983,
"step": 33800
},
{
"epoch": 0.34,
"learning_rate": 6.6116892074249565e-06,
"loss": 0.6978,
"step": 33900
},
{
"epoch": 0.34,
"learning_rate": 6.601693305744645e-06,
"loss": 0.6941,
"step": 34000
},
{
"epoch": 0.34,
"learning_rate": 6.591697404064334e-06,
"loss": 0.7028,
"step": 34100
},
{
"epoch": 0.34,
"learning_rate": 6.581701502384023e-06,
"loss": 0.6925,
"step": 34200
},
{
"epoch": 0.34,
"learning_rate": 6.571705600703712e-06,
"loss": 0.6993,
"step": 34300
},
{
"epoch": 0.34,
"learning_rate": 6.561709699023401e-06,
"loss": 0.6894,
"step": 34400
},
{
"epoch": 0.34,
"learning_rate": 6.55171379734309e-06,
"loss": 0.7054,
"step": 34500
},
{
"epoch": 0.35,
"learning_rate": 6.541717895662779e-06,
"loss": 0.708,
"step": 34600
},
{
"epoch": 0.35,
"learning_rate": 6.531721993982467e-06,
"loss": 0.6977,
"step": 34700
},
{
"epoch": 0.35,
"learning_rate": 6.521726092302156e-06,
"loss": 0.6997,
"step": 34800
},
{
"epoch": 0.35,
"learning_rate": 6.511730190621845e-06,
"loss": 0.6984,
"step": 34900
},
{
"epoch": 0.35,
"learning_rate": 6.5017342889415345e-06,
"loss": 0.6955,
"step": 35000
},
{
"epoch": 0.35,
"learning_rate": 6.491738387261224e-06,
"loss": 0.6885,
"step": 35100
},
{
"epoch": 0.35,
"learning_rate": 6.481742485580912e-06,
"loss": 0.6926,
"step": 35200
},
{
"epoch": 0.35,
"learning_rate": 6.471746583900601e-06,
"loss": 0.6927,
"step": 35300
},
{
"epoch": 0.35,
"learning_rate": 6.4617506822202906e-06,
"loss": 0.6877,
"step": 35400
},
{
"epoch": 0.35,
"learning_rate": 6.45175478053998e-06,
"loss": 0.6911,
"step": 35500
},
{
"epoch": 0.36,
"learning_rate": 6.441758878859668e-06,
"loss": 0.6959,
"step": 35600
},
{
"epoch": 0.36,
"learning_rate": 6.4317629771793565e-06,
"loss": 0.6741,
"step": 35700
},
{
"epoch": 0.36,
"learning_rate": 6.421767075499046e-06,
"loss": 0.6853,
"step": 35800
},
{
"epoch": 0.36,
"learning_rate": 6.411771173818735e-06,
"loss": 0.6892,
"step": 35900
},
{
"epoch": 0.36,
"learning_rate": 6.401775272138424e-06,
"loss": 0.6864,
"step": 36000
},
{
"epoch": 0.36,
"learning_rate": 6.3917793704581125e-06,
"loss": 0.6843,
"step": 36100
},
{
"epoch": 0.36,
"learning_rate": 6.381783468777801e-06,
"loss": 0.6959,
"step": 36200
},
{
"epoch": 0.36,
"learning_rate": 6.37178756709749e-06,
"loss": 0.6783,
"step": 36300
},
{
"epoch": 0.36,
"learning_rate": 6.361791665417179e-06,
"loss": 0.6741,
"step": 36400
},
{
"epoch": 0.36,
"learning_rate": 6.3517957637368686e-06,
"loss": 0.6771,
"step": 36500
},
{
"epoch": 0.37,
"learning_rate": 6.341799862056557e-06,
"loss": 0.6699,
"step": 36600
},
{
"epoch": 0.37,
"learning_rate": 6.331803960376246e-06,
"loss": 0.684,
"step": 36700
},
{
"epoch": 0.37,
"learning_rate": 6.321808058695935e-06,
"loss": 0.675,
"step": 36800
},
{
"epoch": 0.37,
"learning_rate": 6.311812157015625e-06,
"loss": 0.6857,
"step": 36900
},
{
"epoch": 0.37,
"learning_rate": 6.301816255335314e-06,
"loss": 0.6782,
"step": 37000
},
{
"epoch": 0.37,
"learning_rate": 6.291820353655001e-06,
"loss": 0.6692,
"step": 37100
},
{
"epoch": 0.37,
"learning_rate": 6.2818244519746906e-06,
"loss": 0.6789,
"step": 37200
},
{
"epoch": 0.37,
"learning_rate": 6.27182855029438e-06,
"loss": 0.6692,
"step": 37300
},
{
"epoch": 0.37,
"learning_rate": 6.261832648614069e-06,
"loss": 0.668,
"step": 37400
},
{
"epoch": 0.37,
"learning_rate": 6.251836746933758e-06,
"loss": 0.6657,
"step": 37500
},
{
"epoch": 0.38,
"learning_rate": 6.241840845253447e-06,
"loss": 0.6699,
"step": 37600
},
{
"epoch": 0.38,
"learning_rate": 6.231844943573136e-06,
"loss": 0.6695,
"step": 37700
},
{
"epoch": 0.38,
"learning_rate": 6.221849041892824e-06,
"loss": 0.6613,
"step": 37800
},
{
"epoch": 0.38,
"learning_rate": 6.211853140212513e-06,
"loss": 0.6711,
"step": 37900
},
{
"epoch": 0.38,
"learning_rate": 6.201857238532202e-06,
"loss": 0.6647,
"step": 38000
},
{
"epoch": 0.38,
"learning_rate": 6.191861336851891e-06,
"loss": 0.6625,
"step": 38100
},
{
"epoch": 0.38,
"learning_rate": 6.18186543517158e-06,
"loss": 0.6593,
"step": 38200
},
{
"epoch": 0.38,
"learning_rate": 6.1718695334912694e-06,
"loss": 0.6718,
"step": 38300
},
{
"epoch": 0.38,
"learning_rate": 6.161873631810959e-06,
"loss": 0.6642,
"step": 38400
},
{
"epoch": 0.38,
"learning_rate": 6.151877730130646e-06,
"loss": 0.6564,
"step": 38500
},
{
"epoch": 0.39,
"learning_rate": 6.141881828450335e-06,
"loss": 0.6584,
"step": 38600
},
{
"epoch": 0.39,
"learning_rate": 6.131885926770025e-06,
"loss": 0.6569,
"step": 38700
},
{
"epoch": 0.39,
"learning_rate": 6.121890025089714e-06,
"loss": 0.6643,
"step": 38800
},
{
"epoch": 0.39,
"learning_rate": 6.111894123409403e-06,
"loss": 0.6603,
"step": 38900
},
{
"epoch": 0.39,
"learning_rate": 6.101898221729091e-06,
"loss": 0.6656,
"step": 39000
},
{
"epoch": 0.39,
"learning_rate": 6.091902320048781e-06,
"loss": 0.664,
"step": 39100
},
{
"epoch": 0.39,
"learning_rate": 6.08190641836847e-06,
"loss": 0.6555,
"step": 39200
},
{
"epoch": 0.39,
"learning_rate": 6.071910516688159e-06,
"loss": 0.6534,
"step": 39300
},
{
"epoch": 0.39,
"learning_rate": 6.061914615007847e-06,
"loss": 0.6555,
"step": 39400
},
{
"epoch": 0.39,
"learning_rate": 6.051918713327536e-06,
"loss": 0.6525,
"step": 39500
},
{
"epoch": 0.4,
"learning_rate": 6.041922811647225e-06,
"loss": 0.6548,
"step": 39600
},
{
"epoch": 0.4,
"learning_rate": 6.031926909966914e-06,
"loss": 0.6439,
"step": 39700
},
{
"epoch": 0.4,
"learning_rate": 6.0219310082866035e-06,
"loss": 0.6604,
"step": 39800
},
{
"epoch": 0.4,
"learning_rate": 6.011935106606291e-06,
"loss": 0.6473,
"step": 39900
},
{
"epoch": 0.4,
"learning_rate": 6.00193920492598e-06,
"loss": 0.6473,
"step": 40000
},
{
"epoch": 0.4,
"learning_rate": 5.9919433032456694e-06,
"loss": 0.6405,
"step": 40100
},
{
"epoch": 0.4,
"learning_rate": 5.981947401565359e-06,
"loss": 0.6438,
"step": 40200
},
{
"epoch": 0.4,
"learning_rate": 5.971951499885048e-06,
"loss": 0.6455,
"step": 40300
},
{
"epoch": 0.4,
"learning_rate": 5.961955598204736e-06,
"loss": 0.6465,
"step": 40400
},
{
"epoch": 0.4,
"learning_rate": 5.9519596965244255e-06,
"loss": 0.6487,
"step": 40500
},
{
"epoch": 0.41,
"learning_rate": 5.941963794844115e-06,
"loss": 0.6455,
"step": 40600
},
{
"epoch": 0.41,
"learning_rate": 5.931967893163804e-06,
"loss": 0.6373,
"step": 40700
},
{
"epoch": 0.41,
"learning_rate": 5.921971991483493e-06,
"loss": 0.6366,
"step": 40800
},
{
"epoch": 0.41,
"learning_rate": 5.911976089803181e-06,
"loss": 0.6443,
"step": 40900
},
{
"epoch": 0.41,
"learning_rate": 5.90198018812287e-06,
"loss": 0.6314,
"step": 41000
},
{
"epoch": 0.41,
"learning_rate": 5.891984286442559e-06,
"loss": 0.6372,
"step": 41100
},
{
"epoch": 0.41,
"learning_rate": 5.881988384762248e-06,
"loss": 0.6376,
"step": 41200
},
{
"epoch": 0.41,
"learning_rate": 5.871992483081937e-06,
"loss": 0.6455,
"step": 41300
},
{
"epoch": 0.41,
"learning_rate": 5.861996581401626e-06,
"loss": 0.6482,
"step": 41400
},
{
"epoch": 0.41,
"learning_rate": 5.852000679721314e-06,
"loss": 0.6345,
"step": 41500
},
{
"epoch": 0.42,
"learning_rate": 5.8420047780410035e-06,
"loss": 0.6328,
"step": 41600
},
{
"epoch": 0.42,
"learning_rate": 5.832008876360693e-06,
"loss": 0.627,
"step": 41700
},
{
"epoch": 0.42,
"learning_rate": 5.822012974680381e-06,
"loss": 0.6328,
"step": 41800
},
{
"epoch": 0.42,
"learning_rate": 5.81201707300007e-06,
"loss": 0.6312,
"step": 41900
},
{
"epoch": 0.42,
"learning_rate": 5.8020211713197595e-06,
"loss": 0.6348,
"step": 42000
},
{
"epoch": 0.42,
"learning_rate": 5.792025269639449e-06,
"loss": 0.6356,
"step": 42100
},
{
"epoch": 0.42,
"learning_rate": 5.782029367959138e-06,
"loss": 0.6383,
"step": 42200
},
{
"epoch": 0.42,
"learning_rate": 5.7720334662788255e-06,
"loss": 0.6286,
"step": 42300
},
{
"epoch": 0.42,
"learning_rate": 5.762037564598515e-06,
"loss": 0.6327,
"step": 42400
},
{
"epoch": 0.42,
"learning_rate": 5.752041662918204e-06,
"loss": 0.6379,
"step": 42500
},
{
"epoch": 0.43,
"learning_rate": 5.742045761237893e-06,
"loss": 0.6377,
"step": 42600
},
{
"epoch": 0.43,
"learning_rate": 5.7320498595575815e-06,
"loss": 0.6294,
"step": 42700
},
{
"epoch": 0.43,
"learning_rate": 5.722053957877271e-06,
"loss": 0.614,
"step": 42800
},
{
"epoch": 0.43,
"learning_rate": 5.71205805619696e-06,
"loss": 0.6243,
"step": 42900
},
{
"epoch": 0.43,
"learning_rate": 5.702062154516649e-06,
"loss": 0.6283,
"step": 43000
},
{
"epoch": 0.43,
"learning_rate": 5.6920662528363375e-06,
"loss": 0.6191,
"step": 43100
},
{
"epoch": 0.43,
"learning_rate": 5.682070351156026e-06,
"loss": 0.6263,
"step": 43200
},
{
"epoch": 0.43,
"learning_rate": 5.672074449475715e-06,
"loss": 0.6143,
"step": 43300
},
{
"epoch": 0.43,
"learning_rate": 5.662078547795404e-06,
"loss": 0.6165,
"step": 43400
},
{
"epoch": 0.43,
"learning_rate": 5.6520826461150936e-06,
"loss": 0.6137,
"step": 43500
},
{
"epoch": 0.44,
"learning_rate": 5.642086744434783e-06,
"loss": 0.6217,
"step": 43600
},
{
"epoch": 0.44,
"learning_rate": 5.63209084275447e-06,
"loss": 0.6211,
"step": 43700
},
{
"epoch": 0.44,
"learning_rate": 5.6220949410741595e-06,
"loss": 0.6216,
"step": 43800
},
{
"epoch": 0.44,
"learning_rate": 5.612099039393849e-06,
"loss": 0.6126,
"step": 43900
},
{
"epoch": 0.44,
"learning_rate": 5.602103137713538e-06,
"loss": 0.6142,
"step": 44000
},
{
"epoch": 0.44,
"learning_rate": 5.592107236033227e-06,
"loss": 0.6099,
"step": 44100
},
{
"epoch": 0.44,
"learning_rate": 5.5821113343529156e-06,
"loss": 0.6162,
"step": 44200
},
{
"epoch": 0.44,
"learning_rate": 5.572115432672605e-06,
"loss": 0.6153,
"step": 44300
},
{
"epoch": 0.44,
"learning_rate": 5.562119530992294e-06,
"loss": 0.6074,
"step": 44400
},
{
"epoch": 0.44,
"learning_rate": 5.552123629311983e-06,
"loss": 0.6148,
"step": 44500
},
{
"epoch": 0.45,
"learning_rate": 5.542127727631671e-06,
"loss": 0.6129,
"step": 44600
},
{
"epoch": 0.45,
"learning_rate": 5.53213182595136e-06,
"loss": 0.6148,
"step": 44700
},
{
"epoch": 0.45,
"learning_rate": 5.522135924271049e-06,
"loss": 0.6004,
"step": 44800
},
{
"epoch": 0.45,
"learning_rate": 5.512140022590738e-06,
"loss": 0.6047,
"step": 44900
},
{
"epoch": 0.45,
"learning_rate": 5.502144120910428e-06,
"loss": 0.6136,
"step": 45000
},
{
"epoch": 0.45,
"learning_rate": 5.492148219230116e-06,
"loss": 0.6016,
"step": 45100
},
{
"epoch": 0.45,
"learning_rate": 5.482152317549805e-06,
"loss": 0.6083,
"step": 45200
},
{
"epoch": 0.45,
"learning_rate": 5.472156415869494e-06,
"loss": 0.6135,
"step": 45300
},
{
"epoch": 0.45,
"learning_rate": 5.462160514189183e-06,
"loss": 0.6077,
"step": 45400
},
{
"epoch": 0.45,
"learning_rate": 5.452164612508872e-06,
"loss": 0.6039,
"step": 45500
},
{
"epoch": 0.46,
"learning_rate": 5.44216871082856e-06,
"loss": 0.6045,
"step": 45600
},
{
"epoch": 0.46,
"learning_rate": 5.43217280914825e-06,
"loss": 0.6093,
"step": 45700
},
{
"epoch": 0.46,
"learning_rate": 5.422176907467939e-06,
"loss": 0.6107,
"step": 45800
},
{
"epoch": 0.46,
"learning_rate": 5.412181005787628e-06,
"loss": 0.6076,
"step": 45900
},
{
"epoch": 0.46,
"learning_rate": 5.4021851041073156e-06,
"loss": 0.5965,
"step": 46000
},
{
"epoch": 0.46,
"learning_rate": 5.392189202427005e-06,
"loss": 0.6015,
"step": 46100
},
{
"epoch": 0.46,
"learning_rate": 5.382193300746694e-06,
"loss": 0.6067,
"step": 46200
},
{
"epoch": 0.46,
"learning_rate": 5.372197399066383e-06,
"loss": 0.5977,
"step": 46300
},
{
"epoch": 0.46,
"learning_rate": 5.3622014973860724e-06,
"loss": 0.5941,
"step": 46400
},
{
"epoch": 0.46,
"learning_rate": 5.352205595705761e-06,
"loss": 0.6005,
"step": 46500
},
{
"epoch": 0.47,
"learning_rate": 5.34220969402545e-06,
"loss": 0.5917,
"step": 46600
},
{
"epoch": 0.47,
"learning_rate": 5.332213792345139e-06,
"loss": 0.5976,
"step": 46700
},
{
"epoch": 0.47,
"learning_rate": 5.3222178906648285e-06,
"loss": 0.6022,
"step": 46800
},
{
"epoch": 0.47,
"learning_rate": 5.312221988984517e-06,
"loss": 0.5941,
"step": 46900
},
{
"epoch": 0.47,
"learning_rate": 5.302226087304205e-06,
"loss": 0.5894,
"step": 47000
},
{
"epoch": 0.47,
"learning_rate": 5.2922301856238944e-06,
"loss": 0.5959,
"step": 47100
},
{
"epoch": 0.47,
"learning_rate": 5.282234283943584e-06,
"loss": 0.6019,
"step": 47200
},
{
"epoch": 0.47,
"learning_rate": 5.272238382263273e-06,
"loss": 0.5858,
"step": 47300
},
{
"epoch": 0.47,
"learning_rate": 5.262242480582962e-06,
"loss": 0.5898,
"step": 47400
},
{
"epoch": 0.47,
"learning_rate": 5.25224657890265e-06,
"loss": 0.5963,
"step": 47500
},
{
"epoch": 0.48,
"learning_rate": 5.242250677222339e-06,
"loss": 0.5891,
"step": 47600
},
{
"epoch": 0.48,
"learning_rate": 5.232254775542028e-06,
"loss": 0.5905,
"step": 47700
},
{
"epoch": 0.48,
"learning_rate": 5.222258873861717e-06,
"loss": 0.5944,
"step": 47800
},
{
"epoch": 0.48,
"learning_rate": 5.212262972181406e-06,
"loss": 0.585,
"step": 47900
},
{
"epoch": 0.48,
"learning_rate": 5.202267070501095e-06,
"loss": 0.5844,
"step": 48000
},
{
"epoch": 0.48,
"learning_rate": 5.192271168820784e-06,
"loss": 0.586,
"step": 48100
},
{
"epoch": 0.48,
"learning_rate": 5.182275267140473e-06,
"loss": 0.5918,
"step": 48200
},
{
"epoch": 0.48,
"learning_rate": 5.1722793654601625e-06,
"loss": 0.5756,
"step": 48300
},
{
"epoch": 0.48,
"learning_rate": 5.16228346377985e-06,
"loss": 0.5863,
"step": 48400
},
{
"epoch": 0.48,
"learning_rate": 5.152287562099539e-06,
"loss": 0.5903,
"step": 48500
},
{
"epoch": 0.49,
"learning_rate": 5.1422916604192285e-06,
"loss": 0.5843,
"step": 48600
},
{
"epoch": 0.49,
"learning_rate": 5.132295758738918e-06,
"loss": 0.5743,
"step": 48700
},
{
"epoch": 0.49,
"learning_rate": 5.122299857058607e-06,
"loss": 0.5789,
"step": 48800
},
{
"epoch": 0.49,
"learning_rate": 5.112303955378295e-06,
"loss": 0.5777,
"step": 48900
},
{
"epoch": 0.49,
"learning_rate": 5.1023080536979845e-06,
"loss": 0.5762,
"step": 49000
},
{
"epoch": 0.49,
"learning_rate": 5.092312152017673e-06,
"loss": 0.5751,
"step": 49100
},
{
"epoch": 0.49,
"learning_rate": 5.082316250337362e-06,
"loss": 0.5698,
"step": 49200
},
{
"epoch": 0.49,
"learning_rate": 5.0723203486570505e-06,
"loss": 0.5774,
"step": 49300
},
{
"epoch": 0.49,
"learning_rate": 5.06232444697674e-06,
"loss": 0.5868,
"step": 49400
},
{
"epoch": 0.49,
"learning_rate": 5.052328545296429e-06,
"loss": 0.5693,
"step": 49500
},
{
"epoch": 0.5,
"learning_rate": 5.042332643616118e-06,
"loss": 0.5721,
"step": 49600
},
{
"epoch": 0.5,
"learning_rate": 5.032336741935807e-06,
"loss": 0.5667,
"step": 49700
},
{
"epoch": 0.5,
"learning_rate": 5.022340840255495e-06,
"loss": 0.5655,
"step": 49800
},
{
"epoch": 0.5,
"learning_rate": 5.012344938575184e-06,
"loss": 0.572,
"step": 49900
},
{
"epoch": 0.5,
"learning_rate": 5.002349036894873e-06,
"loss": 0.5827,
"step": 50000
},
{
"epoch": 0.5,
"learning_rate": 4.9923531352145625e-06,
"loss": 0.5666,
"step": 50100
},
{
"epoch": 0.5,
"learning_rate": 4.982357233534251e-06,
"loss": 0.569,
"step": 50200
},
{
"epoch": 0.5,
"learning_rate": 4.97236133185394e-06,
"loss": 0.5689,
"step": 50300
},
{
"epoch": 0.5,
"learning_rate": 4.962365430173629e-06,
"loss": 0.5698,
"step": 50400
},
{
"epoch": 0.5,
"learning_rate": 4.9523695284933186e-06,
"loss": 0.5726,
"step": 50500
},
{
"epoch": 0.51,
"learning_rate": 4.942373626813007e-06,
"loss": 0.575,
"step": 50600
},
{
"epoch": 0.51,
"learning_rate": 4.932377725132696e-06,
"loss": 0.5653,
"step": 50700
},
{
"epoch": 0.51,
"learning_rate": 4.922381823452385e-06,
"loss": 0.571,
"step": 50800
},
{
"epoch": 0.51,
"learning_rate": 4.912385921772074e-06,
"loss": 0.5665,
"step": 50900
},
{
"epoch": 0.51,
"learning_rate": 4.902390020091763e-06,
"loss": 0.5631,
"step": 51000
},
{
"epoch": 0.51,
"learning_rate": 4.892394118411451e-06,
"loss": 0.5666,
"step": 51100
},
{
"epoch": 0.51,
"learning_rate": 4.8823982167311406e-06,
"loss": 0.5605,
"step": 51200
},
{
"epoch": 0.51,
"learning_rate": 4.87240231505083e-06,
"loss": 0.5664,
"step": 51300
},
{
"epoch": 0.51,
"learning_rate": 4.862406413370518e-06,
"loss": 0.5583,
"step": 51400
},
{
"epoch": 0.51,
"learning_rate": 4.852410511690207e-06,
"loss": 0.5577,
"step": 51500
},
{
"epoch": 0.52,
"learning_rate": 4.842414610009897e-06,
"loss": 0.559,
"step": 51600
},
{
"epoch": 0.52,
"learning_rate": 4.832418708329585e-06,
"loss": 0.5554,
"step": 51700
},
{
"epoch": 0.52,
"learning_rate": 4.822422806649274e-06,
"loss": 0.5578,
"step": 51800
},
{
"epoch": 0.52,
"learning_rate": 4.812426904968963e-06,
"loss": 0.5585,
"step": 51900
},
{
"epoch": 0.52,
"learning_rate": 4.802431003288653e-06,
"loss": 0.5555,
"step": 52000
},
{
"epoch": 0.52,
"learning_rate": 4.792435101608341e-06,
"loss": 0.5559,
"step": 52100
},
{
"epoch": 0.52,
"learning_rate": 4.78243919992803e-06,
"loss": 0.5547,
"step": 52200
},
{
"epoch": 0.52,
"learning_rate": 4.7724432982477186e-06,
"loss": 0.5612,
"step": 52300
},
{
"epoch": 0.52,
"learning_rate": 4.762447396567408e-06,
"loss": 0.5531,
"step": 52400
},
{
"epoch": 0.52,
"learning_rate": 4.752451494887096e-06,
"loss": 0.5511,
"step": 52500
},
{
"epoch": 0.53,
"learning_rate": 4.742455593206785e-06,
"loss": 0.5535,
"step": 52600
},
{
"epoch": 0.53,
"learning_rate": 4.732459691526475e-06,
"loss": 0.5539,
"step": 52700
},
{
"epoch": 0.53,
"learning_rate": 4.722463789846163e-06,
"loss": 0.549,
"step": 52800
},
{
"epoch": 0.53,
"learning_rate": 4.712467888165852e-06,
"loss": 0.5489,
"step": 52900
},
{
"epoch": 0.53,
"learning_rate": 4.702471986485541e-06,
"loss": 0.5489,
"step": 53000
},
{
"epoch": 0.53,
"learning_rate": 4.692476084805231e-06,
"loss": 0.5561,
"step": 53100
},
{
"epoch": 0.53,
"learning_rate": 4.682480183124919e-06,
"loss": 0.5433,
"step": 53200
},
{
"epoch": 0.53,
"learning_rate": 4.672484281444608e-06,
"loss": 0.5429,
"step": 53300
},
{
"epoch": 0.53,
"learning_rate": 4.6624883797642974e-06,
"loss": 0.5369,
"step": 53400
},
{
"epoch": 0.53,
"learning_rate": 4.652492478083986e-06,
"loss": 0.5521,
"step": 53500
},
{
"epoch": 0.54,
"learning_rate": 4.642496576403675e-06,
"loss": 0.5414,
"step": 53600
},
{
"epoch": 0.54,
"learning_rate": 4.632500674723363e-06,
"loss": 0.5415,
"step": 53700
},
{
"epoch": 0.54,
"learning_rate": 4.622504773043053e-06,
"loss": 0.5442,
"step": 53800
},
{
"epoch": 0.54,
"learning_rate": 4.612508871362741e-06,
"loss": 0.5392,
"step": 53900
},
{
"epoch": 0.54,
"learning_rate": 4.60251296968243e-06,
"loss": 0.5385,
"step": 54000
},
{
"epoch": 0.54,
"learning_rate": 4.5925170680021194e-06,
"loss": 0.5385,
"step": 54100
},
{
"epoch": 0.54,
"learning_rate": 4.582521166321809e-06,
"loss": 0.5348,
"step": 54200
},
{
"epoch": 0.54,
"learning_rate": 4.572525264641498e-06,
"loss": 0.5387,
"step": 54300
},
{
"epoch": 0.54,
"learning_rate": 4.562529362961186e-06,
"loss": 0.536,
"step": 54400
},
{
"epoch": 0.54,
"learning_rate": 4.5525334612808755e-06,
"loss": 0.5332,
"step": 54500
},
{
"epoch": 0.55,
"learning_rate": 4.542537559600565e-06,
"loss": 0.5299,
"step": 54600
},
{
"epoch": 0.55,
"learning_rate": 4.532541657920253e-06,
"loss": 0.5403,
"step": 54700
},
{
"epoch": 0.55,
"learning_rate": 4.522545756239942e-06,
"loss": 0.5365,
"step": 54800
},
{
"epoch": 0.55,
"learning_rate": 4.512549854559631e-06,
"loss": 0.534,
"step": 54900
},
{
"epoch": 0.55,
"learning_rate": 4.50255395287932e-06,
"loss": 0.5358,
"step": 55000
},
{
"epoch": 0.55,
"learning_rate": 4.492558051199008e-06,
"loss": 0.5323,
"step": 55100
},
{
"epoch": 0.55,
"learning_rate": 4.4825621495186975e-06,
"loss": 0.5297,
"step": 55200
},
{
"epoch": 0.55,
"learning_rate": 4.472566247838387e-06,
"loss": 0.5333,
"step": 55300
},
{
"epoch": 0.55,
"learning_rate": 4.462570346158076e-06,
"loss": 0.5285,
"step": 55400
},
{
"epoch": 0.55,
"learning_rate": 4.452574444477764e-06,
"loss": 0.5328,
"step": 55500
},
{
"epoch": 0.56,
"learning_rate": 4.4425785427974535e-06,
"loss": 0.5395,
"step": 55600
},
{
"epoch": 0.56,
"learning_rate": 4.432582641117143e-06,
"loss": 0.5259,
"step": 55700
},
{
"epoch": 0.56,
"learning_rate": 4.422586739436831e-06,
"loss": 0.5231,
"step": 55800
},
{
"epoch": 0.56,
"learning_rate": 4.41259083775652e-06,
"loss": 0.5244,
"step": 55900
},
{
"epoch": 0.56,
"learning_rate": 4.4025949360762095e-06,
"loss": 0.5202,
"step": 56000
},
{
"epoch": 0.56,
"learning_rate": 4.392599034395898e-06,
"loss": 0.5236,
"step": 56100
},
{
"epoch": 0.56,
"learning_rate": 4.382603132715587e-06,
"loss": 0.5271,
"step": 56200
},
{
"epoch": 0.56,
"learning_rate": 4.3726072310352755e-06,
"loss": 0.518,
"step": 56300
},
{
"epoch": 0.56,
"learning_rate": 4.362611329354965e-06,
"loss": 0.5178,
"step": 56400
},
{
"epoch": 0.56,
"learning_rate": 4.352615427674654e-06,
"loss": 0.5288,
"step": 56500
},
{
"epoch": 0.57,
"learning_rate": 4.342619525994342e-06,
"loss": 0.5158,
"step": 56600
},
{
"epoch": 0.57,
"learning_rate": 4.3326236243140315e-06,
"loss": 0.5155,
"step": 56700
},
{
"epoch": 0.57,
"learning_rate": 4.322627722633721e-06,
"loss": 0.5211,
"step": 56800
},
{
"epoch": 0.57,
"learning_rate": 4.31263182095341e-06,
"loss": 0.5161,
"step": 56900
},
{
"epoch": 0.57,
"learning_rate": 4.302635919273098e-06,
"loss": 0.526,
"step": 57000
},
{
"epoch": 0.57,
"learning_rate": 4.2926400175927875e-06,
"loss": 0.5179,
"step": 57100
},
{
"epoch": 0.57,
"learning_rate": 4.282644115912476e-06,
"loss": 0.5239,
"step": 57200
},
{
"epoch": 0.57,
"learning_rate": 4.272648214232165e-06,
"loss": 0.5117,
"step": 57300
},
{
"epoch": 0.57,
"learning_rate": 4.262652312551854e-06,
"loss": 0.5185,
"step": 57400
},
{
"epoch": 0.57,
"learning_rate": 4.252656410871543e-06,
"loss": 0.5149,
"step": 57500
},
{
"epoch": 0.58,
"learning_rate": 4.242660509191232e-06,
"loss": 0.5135,
"step": 57600
},
{
"epoch": 0.58,
"learning_rate": 4.23266460751092e-06,
"loss": 0.5147,
"step": 57700
},
{
"epoch": 0.58,
"learning_rate": 4.2226687058306095e-06,
"loss": 0.5162,
"step": 57800
},
{
"epoch": 0.58,
"learning_rate": 4.212672804150299e-06,
"loss": 0.5109,
"step": 57900
},
{
"epoch": 0.58,
"learning_rate": 4.202676902469988e-06,
"loss": 0.5093,
"step": 58000
},
{
"epoch": 0.58,
"learning_rate": 4.192681000789677e-06,
"loss": 0.5135,
"step": 58100
},
{
"epoch": 0.58,
"learning_rate": 4.1826850991093655e-06,
"loss": 0.5058,
"step": 58200
},
{
"epoch": 0.58,
"learning_rate": 4.172689197429055e-06,
"loss": 0.5082,
"step": 58300
},
{
"epoch": 0.58,
"learning_rate": 4.162693295748743e-06,
"loss": 0.5122,
"step": 58400
},
{
"epoch": 0.58,
"learning_rate": 4.152697394068432e-06,
"loss": 0.5127,
"step": 58500
},
{
"epoch": 0.59,
"learning_rate": 4.142701492388122e-06,
"loss": 0.505,
"step": 58600
},
{
"epoch": 0.59,
"learning_rate": 4.13270559070781e-06,
"loss": 0.5058,
"step": 58700
},
{
"epoch": 0.59,
"learning_rate": 4.122709689027499e-06,
"loss": 0.5049,
"step": 58800
},
{
"epoch": 0.59,
"learning_rate": 4.1127137873471875e-06,
"loss": 0.5055,
"step": 58900
},
{
"epoch": 0.59,
"learning_rate": 4.102717885666877e-06,
"loss": 0.5061,
"step": 59000
},
{
"epoch": 0.59,
"learning_rate": 4.092721983986566e-06,
"loss": 0.5015,
"step": 59100
},
{
"epoch": 0.59,
"learning_rate": 4.082726082306255e-06,
"loss": 0.5052,
"step": 59200
},
{
"epoch": 0.59,
"learning_rate": 4.0727301806259436e-06,
"loss": 0.5028,
"step": 59300
},
{
"epoch": 0.59,
"learning_rate": 4.062734278945633e-06,
"loss": 0.504,
"step": 59400
},
{
"epoch": 0.59,
"learning_rate": 4.052738377265322e-06,
"loss": 0.5079,
"step": 59500
},
{
"epoch": 0.6,
"learning_rate": 4.04274247558501e-06,
"loss": 0.4977,
"step": 59600
},
{
"epoch": 0.6,
"learning_rate": 4.0327465739047e-06,
"loss": 0.4993,
"step": 59700
},
{
"epoch": 0.6,
"learning_rate": 4.022750672224388e-06,
"loss": 0.4933,
"step": 59800
},
{
"epoch": 0.6,
"learning_rate": 4.012754770544077e-06,
"loss": 0.4999,
"step": 59900
},
{
"epoch": 0.6,
"learning_rate": 4.002758868863766e-06,
"loss": 0.4964,
"step": 60000
},
{
"epoch": 0.6,
"learning_rate": 3.992762967183455e-06,
"loss": 0.5002,
"step": 60100
},
{
"epoch": 0.6,
"learning_rate": 3.982767065503144e-06,
"loss": 0.4923,
"step": 60200
},
{
"epoch": 0.6,
"learning_rate": 3.972771163822832e-06,
"loss": 0.4876,
"step": 60300
},
{
"epoch": 0.6,
"learning_rate": 3.962775262142522e-06,
"loss": 0.4963,
"step": 60400
},
{
"epoch": 0.6,
"learning_rate": 3.952779360462211e-06,
"loss": 0.5016,
"step": 60500
},
{
"epoch": 0.61,
"learning_rate": 3.9427834587819e-06,
"loss": 0.497,
"step": 60600
},
{
"epoch": 0.61,
"learning_rate": 3.932787557101589e-06,
"loss": 0.4883,
"step": 60700
},
{
"epoch": 0.61,
"learning_rate": 3.922791655421278e-06,
"loss": 0.4895,
"step": 60800
},
{
"epoch": 0.61,
"learning_rate": 3.912795753740967e-06,
"loss": 0.4914,
"step": 60900
},
{
"epoch": 0.61,
"learning_rate": 3.902799852060655e-06,
"loss": 0.4935,
"step": 61000
},
{
"epoch": 0.61,
"learning_rate": 3.8928039503803444e-06,
"loss": 0.4904,
"step": 61100
},
{
"epoch": 0.61,
"learning_rate": 3.882808048700033e-06,
"loss": 0.493,
"step": 61200
},
{
"epoch": 0.61,
"learning_rate": 3.872812147019722e-06,
"loss": 0.4843,
"step": 61300
},
{
"epoch": 0.61,
"learning_rate": 3.862816245339411e-06,
"loss": 0.4867,
"step": 61400
},
{
"epoch": 0.61,
"learning_rate": 3.8528203436591e-06,
"loss": 0.4933,
"step": 61500
},
{
"epoch": 0.62,
"learning_rate": 3.842824441978789e-06,
"loss": 0.4823,
"step": 61600
},
{
"epoch": 0.62,
"learning_rate": 3.832828540298478e-06,
"loss": 0.4873,
"step": 61700
},
{
"epoch": 0.62,
"learning_rate": 3.822832638618167e-06,
"loss": 0.4841,
"step": 61800
},
{
"epoch": 0.62,
"learning_rate": 3.812836736937856e-06,
"loss": 0.4867,
"step": 61900
},
{
"epoch": 0.62,
"learning_rate": 3.802840835257545e-06,
"loss": 0.4837,
"step": 62000
},
{
"epoch": 0.62,
"learning_rate": 3.7928449335772337e-06,
"loss": 0.4861,
"step": 62100
},
{
"epoch": 0.62,
"learning_rate": 3.7828490318969224e-06,
"loss": 0.4799,
"step": 62200
},
{
"epoch": 0.62,
"learning_rate": 3.7728531302166117e-06,
"loss": 0.4803,
"step": 62300
},
{
"epoch": 0.62,
"learning_rate": 3.7628572285363e-06,
"loss": 0.4808,
"step": 62400
},
{
"epoch": 0.62,
"learning_rate": 3.7528613268559893e-06,
"loss": 0.4829,
"step": 62500
},
{
"epoch": 0.63,
"learning_rate": 3.7428654251756785e-06,
"loss": 0.4802,
"step": 62600
},
{
"epoch": 0.63,
"learning_rate": 3.7328695234953673e-06,
"loss": 0.4816,
"step": 62700
},
{
"epoch": 0.63,
"learning_rate": 3.7228736218150565e-06,
"loss": 0.4847,
"step": 62800
},
{
"epoch": 0.63,
"learning_rate": 3.712877720134745e-06,
"loss": 0.4831,
"step": 62900
},
{
"epoch": 0.63,
"learning_rate": 3.702881818454434e-06,
"loss": 0.4816,
"step": 63000
},
{
"epoch": 0.63,
"learning_rate": 3.692885916774123e-06,
"loss": 0.474,
"step": 63100
},
{
"epoch": 0.63,
"learning_rate": 3.6828900150938117e-06,
"loss": 0.479,
"step": 63200
},
{
"epoch": 0.63,
"learning_rate": 3.672894113413501e-06,
"loss": 0.4773,
"step": 63300
},
{
"epoch": 0.63,
"learning_rate": 3.6628982117331897e-06,
"loss": 0.4765,
"step": 63400
},
{
"epoch": 0.63,
"learning_rate": 3.652902310052879e-06,
"loss": 0.4813,
"step": 63500
},
{
"epoch": 0.64,
"learning_rate": 3.6429064083725673e-06,
"loss": 0.4786,
"step": 63600
},
{
"epoch": 0.64,
"learning_rate": 3.6329105066922565e-06,
"loss": 0.4815,
"step": 63700
},
{
"epoch": 0.64,
"learning_rate": 3.6229146050119453e-06,
"loss": 0.4771,
"step": 63800
},
{
"epoch": 0.64,
"learning_rate": 3.6129187033316345e-06,
"loss": 0.4769,
"step": 63900
},
{
"epoch": 0.64,
"learning_rate": 3.6029228016513233e-06,
"loss": 0.4739,
"step": 64000
},
{
"epoch": 0.64,
"learning_rate": 3.592926899971012e-06,
"loss": 0.4776,
"step": 64100
},
{
"epoch": 0.64,
"learning_rate": 3.5829309982907013e-06,
"loss": 0.4695,
"step": 64200
},
{
"epoch": 0.64,
"learning_rate": 3.5729350966103897e-06,
"loss": 0.4606,
"step": 64300
},
{
"epoch": 0.64,
"learning_rate": 3.562939194930079e-06,
"loss": 0.4716,
"step": 64400
},
{
"epoch": 0.64,
"learning_rate": 3.5529432932497677e-06,
"loss": 0.4677,
"step": 64500
},
{
"epoch": 0.65,
"learning_rate": 3.542947391569457e-06,
"loss": 0.4712,
"step": 64600
},
{
"epoch": 0.65,
"learning_rate": 3.532951489889146e-06,
"loss": 0.4682,
"step": 64700
},
{
"epoch": 0.65,
"learning_rate": 3.5229555882088345e-06,
"loss": 0.4672,
"step": 64800
},
{
"epoch": 0.65,
"learning_rate": 3.5129596865285237e-06,
"loss": 0.4691,
"step": 64900
},
{
"epoch": 0.65,
"learning_rate": 3.502963784848212e-06,
"loss": 0.4656,
"step": 65000
},
{
"epoch": 0.65,
"learning_rate": 3.4929678831679013e-06,
"loss": 0.4619,
"step": 65100
},
{
"epoch": 0.65,
"learning_rate": 3.4829719814875905e-06,
"loss": 0.4595,
"step": 65200
},
{
"epoch": 0.65,
"learning_rate": 3.4729760798072793e-06,
"loss": 0.4688,
"step": 65300
},
{
"epoch": 0.65,
"learning_rate": 3.4629801781269686e-06,
"loss": 0.4658,
"step": 65400
},
{
"epoch": 0.65,
"learning_rate": 3.452984276446657e-06,
"loss": 0.4598,
"step": 65500
},
{
"epoch": 0.66,
"learning_rate": 3.442988374766346e-06,
"loss": 0.4623,
"step": 65600
},
{
"epoch": 0.66,
"learning_rate": 3.432992473086035e-06,
"loss": 0.4558,
"step": 65700
},
{
"epoch": 0.66,
"learning_rate": 3.4229965714057237e-06,
"loss": 0.4675,
"step": 65800
},
{
"epoch": 0.66,
"learning_rate": 3.413000669725413e-06,
"loss": 0.4548,
"step": 65900
},
{
"epoch": 0.66,
"learning_rate": 3.4030047680451018e-06,
"loss": 0.4635,
"step": 66000
},
{
"epoch": 0.66,
"learning_rate": 3.393008866364791e-06,
"loss": 0.4551,
"step": 66100
},
{
"epoch": 0.66,
"learning_rate": 3.3830129646844793e-06,
"loss": 0.4619,
"step": 66200
},
{
"epoch": 0.66,
"learning_rate": 3.3730170630041686e-06,
"loss": 0.4581,
"step": 66300
},
{
"epoch": 0.66,
"learning_rate": 3.3630211613238574e-06,
"loss": 0.4606,
"step": 66400
},
{
"epoch": 0.66,
"learning_rate": 3.3530252596435466e-06,
"loss": 0.4521,
"step": 66500
},
{
"epoch": 0.67,
"learning_rate": 3.3430293579632354e-06,
"loss": 0.4602,
"step": 66600
},
{
"epoch": 0.67,
"learning_rate": 3.333033456282924e-06,
"loss": 0.4598,
"step": 66700
},
{
"epoch": 0.67,
"learning_rate": 3.3230375546026134e-06,
"loss": 0.4567,
"step": 66800
},
{
"epoch": 0.67,
"learning_rate": 3.3130416529223018e-06,
"loss": 0.4515,
"step": 66900
},
{
"epoch": 0.67,
"learning_rate": 3.303045751241991e-06,
"loss": 0.449,
"step": 67000
},
{
"epoch": 0.67,
"learning_rate": 3.2930498495616798e-06,
"loss": 0.4539,
"step": 67100
},
{
"epoch": 0.67,
"learning_rate": 3.283053947881369e-06,
"loss": 0.4501,
"step": 67200
},
{
"epoch": 0.67,
"learning_rate": 3.2730580462010582e-06,
"loss": 0.4509,
"step": 67300
},
{
"epoch": 0.67,
"learning_rate": 3.2630621445207466e-06,
"loss": 0.4487,
"step": 67400
},
{
"epoch": 0.67,
"learning_rate": 3.253066242840436e-06,
"loss": 0.4492,
"step": 67500
},
{
"epoch": 0.68,
"learning_rate": 3.2430703411601246e-06,
"loss": 0.4477,
"step": 67600
},
{
"epoch": 0.68,
"learning_rate": 3.2330744394798134e-06,
"loss": 0.4444,
"step": 67700
},
{
"epoch": 0.68,
"learning_rate": 3.223078537799502e-06,
"loss": 0.4431,
"step": 67800
},
{
"epoch": 0.68,
"learning_rate": 3.2130826361191914e-06,
"loss": 0.4444,
"step": 67900
},
{
"epoch": 0.68,
"learning_rate": 3.2030867344388806e-06,
"loss": 0.4446,
"step": 68000
},
{
"epoch": 0.68,
"learning_rate": 3.193090832758569e-06,
"loss": 0.4443,
"step": 68100
},
{
"epoch": 0.68,
"learning_rate": 3.1830949310782582e-06,
"loss": 0.4503,
"step": 68200
},
{
"epoch": 0.68,
"learning_rate": 3.173099029397947e-06,
"loss": 0.4501,
"step": 68300
},
{
"epoch": 0.68,
"learning_rate": 3.1631031277176362e-06,
"loss": 0.4432,
"step": 68400
},
{
"epoch": 0.68,
"learning_rate": 3.153107226037325e-06,
"loss": 0.4398,
"step": 68500
},
{
"epoch": 0.69,
"learning_rate": 3.143111324357014e-06,
"loss": 0.446,
"step": 68600
},
{
"epoch": 0.69,
"learning_rate": 3.133115422676703e-06,
"loss": 0.4467,
"step": 68700
},
{
"epoch": 0.69,
"learning_rate": 3.1231195209963914e-06,
"loss": 0.4381,
"step": 68800
},
{
"epoch": 0.69,
"learning_rate": 3.1131236193160806e-06,
"loss": 0.4365,
"step": 68900
},
{
"epoch": 0.69,
"learning_rate": 3.1031277176357694e-06,
"loss": 0.4557,
"step": 69000
},
{
"epoch": 0.69,
"learning_rate": 3.0931318159554586e-06,
"loss": 0.4466,
"step": 69100
},
{
"epoch": 0.69,
"learning_rate": 3.083135914275148e-06,
"loss": 0.4401,
"step": 69200
},
{
"epoch": 0.69,
"learning_rate": 3.0731400125948362e-06,
"loss": 0.4382,
"step": 69300
},
{
"epoch": 0.69,
"learning_rate": 3.0631441109145255e-06,
"loss": 0.4375,
"step": 69400
},
{
"epoch": 0.69,
"learning_rate": 3.0531482092342143e-06,
"loss": 0.4404,
"step": 69500
},
{
"epoch": 0.7,
"learning_rate": 3.043152307553903e-06,
"loss": 0.434,
"step": 69600
},
{
"epoch": 0.7,
"learning_rate": 3.033156405873592e-06,
"loss": 0.4341,
"step": 69700
},
{
"epoch": 0.7,
"learning_rate": 3.023160504193281e-06,
"loss": 0.4348,
"step": 69800
},
{
"epoch": 0.7,
"learning_rate": 3.0131646025129703e-06,
"loss": 0.4451,
"step": 69900
},
{
"epoch": 0.7,
"learning_rate": 3.0031687008326587e-06,
"loss": 0.44,
"step": 70000
},
{
"epoch": 0.7,
"learning_rate": 2.993172799152348e-06,
"loss": 0.439,
"step": 70100
},
{
"epoch": 0.7,
"learning_rate": 2.9831768974720367e-06,
"loss": 0.4355,
"step": 70200
},
{
"epoch": 0.7,
"learning_rate": 2.973180995791726e-06,
"loss": 0.4304,
"step": 70300
},
{
"epoch": 0.7,
"learning_rate": 2.9631850941114143e-06,
"loss": 0.4335,
"step": 70400
},
{
"epoch": 0.7,
"learning_rate": 2.9531891924311035e-06,
"loss": 0.4355,
"step": 70500
},
{
"epoch": 0.71,
"learning_rate": 2.9431932907507927e-06,
"loss": 0.4289,
"step": 70600
},
{
"epoch": 0.71,
"learning_rate": 2.933197389070481e-06,
"loss": 0.426,
"step": 70700
},
{
"epoch": 0.71,
"learning_rate": 2.9232014873901703e-06,
"loss": 0.4282,
"step": 70800
},
{
"epoch": 0.71,
"learning_rate": 2.913205585709859e-06,
"loss": 0.4296,
"step": 70900
},
{
"epoch": 0.71,
"learning_rate": 2.9032096840295483e-06,
"loss": 0.4262,
"step": 71000
},
{
"epoch": 0.71,
"learning_rate": 2.8932137823492367e-06,
"loss": 0.4289,
"step": 71100
},
{
"epoch": 0.71,
"learning_rate": 2.883217880668926e-06,
"loss": 0.4217,
"step": 71200
},
{
"epoch": 0.71,
"learning_rate": 2.873221978988615e-06,
"loss": 0.4215,
"step": 71300
},
{
"epoch": 0.71,
"learning_rate": 2.863226077308304e-06,
"loss": 0.4342,
"step": 71400
},
{
"epoch": 0.71,
"learning_rate": 2.8532301756279927e-06,
"loss": 0.4274,
"step": 71500
},
{
"epoch": 0.72,
"learning_rate": 2.8432342739476815e-06,
"loss": 0.422,
"step": 71600
},
{
"epoch": 0.72,
"learning_rate": 2.8332383722673707e-06,
"loss": 0.4217,
"step": 71700
},
{
"epoch": 0.72,
"learning_rate": 2.82324247058706e-06,
"loss": 0.425,
"step": 71800
},
{
"epoch": 0.72,
"learning_rate": 2.8132465689067483e-06,
"loss": 0.4239,
"step": 71900
},
{
"epoch": 0.72,
"learning_rate": 2.8032506672264375e-06,
"loss": 0.4246,
"step": 72000
},
{
"epoch": 0.72,
"learning_rate": 2.7932547655461263e-06,
"loss": 0.4259,
"step": 72100
},
{
"epoch": 0.72,
"learning_rate": 2.7832588638658155e-06,
"loss": 0.422,
"step": 72200
},
{
"epoch": 0.72,
"learning_rate": 2.773262962185504e-06,
"loss": 0.4205,
"step": 72300
},
{
"epoch": 0.72,
"learning_rate": 2.763267060505193e-06,
"loss": 0.4225,
"step": 72400
},
{
"epoch": 0.72,
"learning_rate": 2.7532711588248823e-06,
"loss": 0.4144,
"step": 72500
},
{
"epoch": 0.73,
"learning_rate": 2.7432752571445707e-06,
"loss": 0.4194,
"step": 72600
},
{
"epoch": 0.73,
"learning_rate": 2.73327935546426e-06,
"loss": 0.4251,
"step": 72700
},
{
"epoch": 0.73,
"learning_rate": 2.7232834537839487e-06,
"loss": 0.4178,
"step": 72800
},
{
"epoch": 0.73,
"learning_rate": 2.713287552103638e-06,
"loss": 0.4189,
"step": 72900
},
{
"epoch": 0.73,
"learning_rate": 2.7032916504233263e-06,
"loss": 0.4227,
"step": 73000
},
{
"epoch": 0.73,
"learning_rate": 2.6932957487430155e-06,
"loss": 0.4183,
"step": 73100
},
{
"epoch": 0.73,
"learning_rate": 2.6832998470627048e-06,
"loss": 0.4157,
"step": 73200
},
{
"epoch": 0.73,
"learning_rate": 2.6733039453823936e-06,
"loss": 0.421,
"step": 73300
},
{
"epoch": 0.73,
"learning_rate": 2.6633080437020824e-06,
"loss": 0.4197,
"step": 73400
},
{
"epoch": 0.73,
"learning_rate": 2.653312142021771e-06,
"loss": 0.4119,
"step": 73500
},
{
"epoch": 0.74,
"learning_rate": 2.6433162403414604e-06,
"loss": 0.4087,
"step": 73600
},
{
"epoch": 0.74,
"learning_rate": 2.6333203386611487e-06,
"loss": 0.4137,
"step": 73700
},
{
"epoch": 0.74,
"learning_rate": 2.623324436980838e-06,
"loss": 0.4124,
"step": 73800
},
{
"epoch": 0.74,
"learning_rate": 2.613328535300527e-06,
"loss": 0.4092,
"step": 73900
},
{
"epoch": 0.74,
"learning_rate": 2.603332633620216e-06,
"loss": 0.4101,
"step": 74000
},
{
"epoch": 0.74,
"learning_rate": 2.593336731939905e-06,
"loss": 0.4115,
"step": 74100
},
{
"epoch": 0.74,
"learning_rate": 2.5833408302595936e-06,
"loss": 0.4104,
"step": 74200
},
{
"epoch": 0.74,
"learning_rate": 2.5733449285792828e-06,
"loss": 0.409,
"step": 74300
},
{
"epoch": 0.74,
"learning_rate": 2.563349026898971e-06,
"loss": 0.415,
"step": 74400
},
{
"epoch": 0.74,
"learning_rate": 2.5533531252186604e-06,
"loss": 0.4013,
"step": 74500
},
{
"epoch": 0.75,
"learning_rate": 2.5433572235383496e-06,
"loss": 0.4008,
"step": 74600
},
{
"epoch": 0.75,
"learning_rate": 2.5333613218580384e-06,
"loss": 0.4131,
"step": 74700
},
{
"epoch": 0.75,
"learning_rate": 2.5233654201777276e-06,
"loss": 0.4123,
"step": 74800
},
{
"epoch": 0.75,
"learning_rate": 2.513369518497416e-06,
"loss": 0.4132,
"step": 74900
},
{
"epoch": 0.75,
"learning_rate": 2.503373616817105e-06,
"loss": 0.4111,
"step": 75000
},
{
"epoch": 0.75,
"learning_rate": 2.493377715136794e-06,
"loss": 0.4043,
"step": 75100
},
{
"epoch": 0.75,
"learning_rate": 2.483381813456483e-06,
"loss": 0.4106,
"step": 75200
},
{
"epoch": 0.75,
"learning_rate": 2.473385911776172e-06,
"loss": 0.4051,
"step": 75300
},
{
"epoch": 0.75,
"learning_rate": 2.463390010095861e-06,
"loss": 0.4004,
"step": 75400
},
{
"epoch": 0.75,
"learning_rate": 2.45339410841555e-06,
"loss": 0.4012,
"step": 75500
},
{
"epoch": 0.76,
"learning_rate": 2.443398206735239e-06,
"loss": 0.4039,
"step": 75600
},
{
"epoch": 0.76,
"learning_rate": 2.4334023050549276e-06,
"loss": 0.403,
"step": 75700
},
{
"epoch": 0.76,
"learning_rate": 2.4234064033746164e-06,
"loss": 0.4111,
"step": 75800
},
{
"epoch": 0.76,
"learning_rate": 2.4134105016943056e-06,
"loss": 0.3985,
"step": 75900
},
{
"epoch": 0.76,
"learning_rate": 2.4034146000139944e-06,
"loss": 0.4118,
"step": 76000
},
{
"epoch": 0.76,
"learning_rate": 2.3934186983336836e-06,
"loss": 0.397,
"step": 76100
},
{
"epoch": 0.76,
"learning_rate": 2.3834227966533724e-06,
"loss": 0.3907,
"step": 76200
},
{
"epoch": 0.76,
"learning_rate": 2.3734268949730612e-06,
"loss": 0.3998,
"step": 76300
},
{
"epoch": 0.76,
"learning_rate": 2.36343099329275e-06,
"loss": 0.3988,
"step": 76400
},
{
"epoch": 0.76,
"learning_rate": 2.3534350916124392e-06,
"loss": 0.3976,
"step": 76500
},
{
"epoch": 0.77,
"learning_rate": 2.343439189932128e-06,
"loss": 0.402,
"step": 76600
},
{
"epoch": 0.77,
"learning_rate": 2.333443288251817e-06,
"loss": 0.3991,
"step": 76700
},
{
"epoch": 0.77,
"learning_rate": 2.323447386571506e-06,
"loss": 0.402,
"step": 76800
},
{
"epoch": 0.77,
"learning_rate": 2.313451484891195e-06,
"loss": 0.3953,
"step": 76900
},
{
"epoch": 0.77,
"learning_rate": 2.3034555832108836e-06,
"loss": 0.3934,
"step": 77000
},
{
"epoch": 0.77,
"learning_rate": 2.2934596815305724e-06,
"loss": 0.3893,
"step": 77100
},
{
"epoch": 0.77,
"learning_rate": 2.2834637798502617e-06,
"loss": 0.4021,
"step": 77200
},
{
"epoch": 0.77,
"learning_rate": 2.2734678781699505e-06,
"loss": 0.3921,
"step": 77300
},
{
"epoch": 0.77,
"learning_rate": 2.2634719764896397e-06,
"loss": 0.3945,
"step": 77400
},
{
"epoch": 0.77,
"learning_rate": 2.2534760748093285e-06,
"loss": 0.3959,
"step": 77500
},
{
"epoch": 0.78,
"learning_rate": 2.2434801731290173e-06,
"loss": 0.3946,
"step": 77600
},
{
"epoch": 0.78,
"learning_rate": 2.233484271448706e-06,
"loss": 0.3946,
"step": 77700
},
{
"epoch": 0.78,
"learning_rate": 2.2234883697683953e-06,
"loss": 0.386,
"step": 77800
},
{
"epoch": 0.78,
"learning_rate": 2.213492468088084e-06,
"loss": 0.3997,
"step": 77900
},
{
"epoch": 0.78,
"learning_rate": 2.203496566407773e-06,
"loss": 0.3903,
"step": 78000
},
{
"epoch": 0.78,
"learning_rate": 2.193500664727462e-06,
"loss": 0.3926,
"step": 78100
},
{
"epoch": 0.78,
"learning_rate": 2.183504763047151e-06,
"loss": 0.3835,
"step": 78200
},
{
"epoch": 0.78,
"learning_rate": 2.1735088613668397e-06,
"loss": 0.3882,
"step": 78300
},
{
"epoch": 0.78,
"learning_rate": 2.163512959686529e-06,
"loss": 0.3914,
"step": 78400
},
{
"epoch": 0.78,
"learning_rate": 2.1535170580062177e-06,
"loss": 0.3881,
"step": 78500
},
{
"epoch": 0.79,
"learning_rate": 2.1435211563259065e-06,
"loss": 0.3884,
"step": 78600
},
{
"epoch": 0.79,
"learning_rate": 2.1335252546455953e-06,
"loss": 0.3876,
"step": 78700
},
{
"epoch": 0.79,
"learning_rate": 2.1235293529652845e-06,
"loss": 0.3857,
"step": 78800
},
{
"epoch": 0.79,
"learning_rate": 2.1135334512849733e-06,
"loss": 0.3794,
"step": 78900
},
{
"epoch": 0.79,
"learning_rate": 2.103537549604662e-06,
"loss": 0.3873,
"step": 79000
},
{
"epoch": 0.79,
"learning_rate": 2.0935416479243513e-06,
"loss": 0.3831,
"step": 79100
},
{
"epoch": 0.79,
"learning_rate": 2.08354574624404e-06,
"loss": 0.3805,
"step": 79200
},
{
"epoch": 0.79,
"learning_rate": 2.073549844563729e-06,
"loss": 0.3866,
"step": 79300
},
{
"epoch": 0.79,
"learning_rate": 2.063553942883418e-06,
"loss": 0.3887,
"step": 79400
},
{
"epoch": 0.79,
"learning_rate": 2.053558041203107e-06,
"loss": 0.3812,
"step": 79500
},
{
"epoch": 0.8,
"learning_rate": 2.0435621395227957e-06,
"loss": 0.3807,
"step": 79600
},
{
"epoch": 0.8,
"learning_rate": 2.033566237842485e-06,
"loss": 0.3764,
"step": 79700
},
{
"epoch": 0.8,
"learning_rate": 2.0235703361621737e-06,
"loss": 0.3812,
"step": 79800
},
{
"epoch": 0.8,
"learning_rate": 2.0135744344818625e-06,
"loss": 0.3721,
"step": 79900
},
{
"epoch": 0.8,
"learning_rate": 2.0035785328015513e-06,
"loss": 0.3796,
"step": 80000
},
{
"epoch": 0.8,
"learning_rate": 1.9935826311212405e-06,
"loss": 0.3769,
"step": 80100
},
{
"epoch": 0.8,
"learning_rate": 1.9835867294409293e-06,
"loss": 0.3933,
"step": 80200
},
{
"epoch": 0.8,
"learning_rate": 1.973590827760618e-06,
"loss": 0.3763,
"step": 80300
},
{
"epoch": 0.8,
"learning_rate": 1.9635949260803074e-06,
"loss": 0.3776,
"step": 80400
},
{
"epoch": 0.8,
"learning_rate": 1.953599024399996e-06,
"loss": 0.3797,
"step": 80500
},
{
"epoch": 0.81,
"learning_rate": 1.943603122719685e-06,
"loss": 0.381,
"step": 80600
},
{
"epoch": 0.81,
"learning_rate": 1.933607221039374e-06,
"loss": 0.38,
"step": 80700
},
{
"epoch": 0.81,
"learning_rate": 1.923611319359063e-06,
"loss": 0.3772,
"step": 80800
},
{
"epoch": 0.81,
"learning_rate": 1.9136154176787518e-06,
"loss": 0.3724,
"step": 80900
},
{
"epoch": 0.81,
"learning_rate": 1.9036195159984408e-06,
"loss": 0.3808,
"step": 81000
},
{
"epoch": 0.81,
"learning_rate": 1.8936236143181298e-06,
"loss": 0.3799,
"step": 81100
},
{
"epoch": 0.81,
"learning_rate": 1.8836277126378186e-06,
"loss": 0.3779,
"step": 81200
},
{
"epoch": 0.81,
"learning_rate": 1.8736318109575074e-06,
"loss": 0.3841,
"step": 81300
},
{
"epoch": 0.81,
"learning_rate": 1.8636359092771966e-06,
"loss": 0.3795,
"step": 81400
},
{
"epoch": 0.81,
"learning_rate": 1.8536400075968856e-06,
"loss": 0.3698,
"step": 81500
},
{
"epoch": 0.82,
"learning_rate": 1.8436441059165744e-06,
"loss": 0.3682,
"step": 81600
},
{
"epoch": 0.82,
"learning_rate": 1.8336482042362632e-06,
"loss": 0.3751,
"step": 81700
},
{
"epoch": 0.82,
"learning_rate": 1.8236523025559522e-06,
"loss": 0.3708,
"step": 81800
},
{
"epoch": 0.82,
"learning_rate": 1.813656400875641e-06,
"loss": 0.3635,
"step": 81900
},
{
"epoch": 0.82,
"learning_rate": 1.80366049919533e-06,
"loss": 0.3749,
"step": 82000
},
{
"epoch": 0.82,
"learning_rate": 1.793664597515019e-06,
"loss": 0.36,
"step": 82100
},
{
"epoch": 0.82,
"learning_rate": 1.783668695834708e-06,
"loss": 0.3696,
"step": 82200
},
{
"epoch": 0.82,
"learning_rate": 1.7736727941543968e-06,
"loss": 0.3669,
"step": 82300
},
{
"epoch": 0.82,
"learning_rate": 1.7636768924740858e-06,
"loss": 0.3714,
"step": 82400
},
{
"epoch": 0.82,
"learning_rate": 1.7536809907937746e-06,
"loss": 0.3643,
"step": 82500
},
{
"epoch": 0.83,
"learning_rate": 1.7436850891134636e-06,
"loss": 0.3672,
"step": 82600
},
{
"epoch": 0.83,
"learning_rate": 1.7336891874331526e-06,
"loss": 0.3738,
"step": 82700
},
{
"epoch": 0.83,
"learning_rate": 1.7236932857528416e-06,
"loss": 0.3739,
"step": 82800
},
{
"epoch": 0.83,
"learning_rate": 1.7136973840725304e-06,
"loss": 0.3593,
"step": 82900
},
{
"epoch": 0.83,
"learning_rate": 1.7037014823922194e-06,
"loss": 0.3703,
"step": 83000
},
{
"epoch": 0.83,
"learning_rate": 1.6937055807119082e-06,
"loss": 0.3653,
"step": 83100
},
{
"epoch": 0.83,
"learning_rate": 1.683709679031597e-06,
"loss": 0.3647,
"step": 83200
},
{
"epoch": 0.83,
"learning_rate": 1.673713777351286e-06,
"loss": 0.3707,
"step": 83300
},
{
"epoch": 0.83,
"learning_rate": 1.6637178756709752e-06,
"loss": 0.3667,
"step": 83400
},
{
"epoch": 0.83,
"learning_rate": 1.653721973990664e-06,
"loss": 0.3677,
"step": 83500
},
{
"epoch": 0.84,
"learning_rate": 1.6437260723103528e-06,
"loss": 0.3628,
"step": 83600
},
{
"epoch": 0.84,
"learning_rate": 1.6337301706300418e-06,
"loss": 0.3608,
"step": 83700
},
{
"epoch": 0.84,
"learning_rate": 1.6237342689497306e-06,
"loss": 0.363,
"step": 83800
},
{
"epoch": 0.84,
"learning_rate": 1.6137383672694196e-06,
"loss": 0.3646,
"step": 83900
},
{
"epoch": 0.84,
"learning_rate": 1.6037424655891086e-06,
"loss": 0.3572,
"step": 84000
},
{
"epoch": 0.84,
"learning_rate": 1.5937465639087977e-06,
"loss": 0.3531,
"step": 84100
},
{
"epoch": 0.84,
"learning_rate": 1.5837506622284864e-06,
"loss": 0.3645,
"step": 84200
},
{
"epoch": 0.84,
"learning_rate": 1.5737547605481755e-06,
"loss": 0.3635,
"step": 84300
},
{
"epoch": 0.84,
"learning_rate": 1.5637588588678642e-06,
"loss": 0.3623,
"step": 84400
},
{
"epoch": 0.84,
"learning_rate": 1.5537629571875533e-06,
"loss": 0.3583,
"step": 84500
},
{
"epoch": 0.85,
"learning_rate": 1.543767055507242e-06,
"loss": 0.362,
"step": 84600
},
{
"epoch": 0.85,
"learning_rate": 1.5337711538269313e-06,
"loss": 0.3616,
"step": 84700
},
{
"epoch": 0.85,
"learning_rate": 1.52377525214662e-06,
"loss": 0.3517,
"step": 84800
},
{
"epoch": 0.85,
"learning_rate": 1.513779350466309e-06,
"loss": 0.3615,
"step": 84900
},
{
"epoch": 0.85,
"learning_rate": 1.5037834487859979e-06,
"loss": 0.3553,
"step": 85000
},
{
"epoch": 0.85,
"learning_rate": 1.4937875471056867e-06,
"loss": 0.3582,
"step": 85100
},
{
"epoch": 0.85,
"learning_rate": 1.4837916454253757e-06,
"loss": 0.3581,
"step": 85200
},
{
"epoch": 0.85,
"learning_rate": 1.4737957437450645e-06,
"loss": 0.3648,
"step": 85300
},
{
"epoch": 0.85,
"learning_rate": 1.4637998420647537e-06,
"loss": 0.3561,
"step": 85400
},
{
"epoch": 0.85,
"learning_rate": 1.4538039403844425e-06,
"loss": 0.3541,
"step": 85500
},
{
"epoch": 0.86,
"learning_rate": 1.4438080387041315e-06,
"loss": 0.3578,
"step": 85600
},
{
"epoch": 0.86,
"learning_rate": 1.4338121370238203e-06,
"loss": 0.3656,
"step": 85700
},
{
"epoch": 0.86,
"learning_rate": 1.4238162353435093e-06,
"loss": 0.3484,
"step": 85800
},
{
"epoch": 0.86,
"learning_rate": 1.413820333663198e-06,
"loss": 0.3539,
"step": 85900
},
{
"epoch": 0.86,
"learning_rate": 1.4038244319828873e-06,
"loss": 0.3559,
"step": 86000
},
{
"epoch": 0.86,
"learning_rate": 1.393828530302576e-06,
"loss": 0.3499,
"step": 86100
},
{
"epoch": 0.86,
"learning_rate": 1.3838326286222651e-06,
"loss": 0.3553,
"step": 86200
},
{
"epoch": 0.86,
"learning_rate": 1.373836726941954e-06,
"loss": 0.3655,
"step": 86300
},
{
"epoch": 0.86,
"learning_rate": 1.3638408252616427e-06,
"loss": 0.3574,
"step": 86400
},
{
"epoch": 0.86,
"learning_rate": 1.3538449235813317e-06,
"loss": 0.3513,
"step": 86500
},
{
"epoch": 0.87,
"learning_rate": 1.3438490219010205e-06,
"loss": 0.3615,
"step": 86600
},
{
"epoch": 0.87,
"learning_rate": 1.3338531202207097e-06,
"loss": 0.3558,
"step": 86700
},
{
"epoch": 0.87,
"learning_rate": 1.3238572185403985e-06,
"loss": 0.3517,
"step": 86800
},
{
"epoch": 0.87,
"learning_rate": 1.3138613168600875e-06,
"loss": 0.3538,
"step": 86900
},
{
"epoch": 0.87,
"learning_rate": 1.3038654151797763e-06,
"loss": 0.3583,
"step": 87000
},
{
"epoch": 0.87,
"learning_rate": 1.2938695134994653e-06,
"loss": 0.3598,
"step": 87100
},
{
"epoch": 0.87,
"learning_rate": 1.2838736118191541e-06,
"loss": 0.3459,
"step": 87200
},
{
"epoch": 0.87,
"learning_rate": 1.2738777101388433e-06,
"loss": 0.3526,
"step": 87300
},
{
"epoch": 0.87,
"learning_rate": 1.2638818084585321e-06,
"loss": 0.3477,
"step": 87400
},
{
"epoch": 0.87,
"learning_rate": 1.2538859067782211e-06,
"loss": 0.343,
"step": 87500
},
{
"epoch": 0.88,
"learning_rate": 1.24389000509791e-06,
"loss": 0.3425,
"step": 87600
},
{
"epoch": 0.88,
"learning_rate": 1.233894103417599e-06,
"loss": 0.3503,
"step": 87700
},
{
"epoch": 0.88,
"learning_rate": 1.223898201737288e-06,
"loss": 0.3471,
"step": 87800
},
{
"epoch": 0.88,
"learning_rate": 1.2139023000569767e-06,
"loss": 0.3512,
"step": 87900
},
{
"epoch": 0.88,
"learning_rate": 1.2039063983766655e-06,
"loss": 0.347,
"step": 88000
},
{
"epoch": 0.88,
"learning_rate": 1.1939104966963545e-06,
"loss": 0.3472,
"step": 88100
},
{
"epoch": 0.88,
"learning_rate": 1.1839145950160436e-06,
"loss": 0.3523,
"step": 88200
},
{
"epoch": 0.88,
"learning_rate": 1.1739186933357324e-06,
"loss": 0.3402,
"step": 88300
},
{
"epoch": 0.88,
"learning_rate": 1.1639227916554214e-06,
"loss": 0.3494,
"step": 88400
},
{
"epoch": 0.88,
"learning_rate": 1.1539268899751104e-06,
"loss": 0.3389,
"step": 88500
},
{
"epoch": 0.89,
"learning_rate": 1.1439309882947992e-06,
"loss": 0.3484,
"step": 88600
},
{
"epoch": 0.89,
"learning_rate": 1.1339350866144882e-06,
"loss": 0.3446,
"step": 88700
},
{
"epoch": 0.89,
"learning_rate": 1.1239391849341772e-06,
"loss": 0.348,
"step": 88800
},
{
"epoch": 0.89,
"learning_rate": 1.113943283253866e-06,
"loss": 0.3432,
"step": 88900
},
{
"epoch": 0.89,
"learning_rate": 1.103947381573555e-06,
"loss": 0.3412,
"step": 89000
},
{
"epoch": 0.89,
"learning_rate": 1.0939514798932438e-06,
"loss": 0.3523,
"step": 89100
},
{
"epoch": 0.89,
"learning_rate": 1.0839555782129328e-06,
"loss": 0.3461,
"step": 89200
},
{
"epoch": 0.89,
"learning_rate": 1.0739596765326218e-06,
"loss": 0.339,
"step": 89300
},
{
"epoch": 0.89,
"learning_rate": 1.0639637748523106e-06,
"loss": 0.3418,
"step": 89400
},
{
"epoch": 0.89,
"learning_rate": 1.0539678731719996e-06,
"loss": 0.3502,
"step": 89500
},
{
"epoch": 0.9,
"learning_rate": 1.0439719714916886e-06,
"loss": 0.3451,
"step": 89600
},
{
"epoch": 0.9,
"learning_rate": 1.0339760698113774e-06,
"loss": 0.3388,
"step": 89700
},
{
"epoch": 0.9,
"learning_rate": 1.0239801681310664e-06,
"loss": 0.3398,
"step": 89800
},
{
"epoch": 0.9,
"learning_rate": 1.0139842664507552e-06,
"loss": 0.3428,
"step": 89900
},
{
"epoch": 0.9,
"learning_rate": 1.0039883647704442e-06,
"loss": 0.3447,
"step": 90000
},
{
"epoch": 0.9,
"learning_rate": 9.93992463090133e-07,
"loss": 0.347,
"step": 90100
},
{
"epoch": 0.9,
"learning_rate": 9.83996561409822e-07,
"loss": 0.3359,
"step": 90200
},
{
"epoch": 0.9,
"learning_rate": 9.74000659729511e-07,
"loss": 0.3342,
"step": 90300
},
{
"epoch": 0.9,
"learning_rate": 9.640047580491998e-07,
"loss": 0.3371,
"step": 90400
},
{
"epoch": 0.9,
"learning_rate": 9.540088563688888e-07,
"loss": 0.3436,
"step": 90500
},
{
"epoch": 0.91,
"learning_rate": 9.440129546885777e-07,
"loss": 0.3484,
"step": 90600
},
{
"epoch": 0.91,
"learning_rate": 9.340170530082666e-07,
"loss": 0.3445,
"step": 90700
},
{
"epoch": 0.91,
"learning_rate": 9.240211513279556e-07,
"loss": 0.3412,
"step": 90800
},
{
"epoch": 0.91,
"learning_rate": 9.140252496476445e-07,
"loss": 0.3401,
"step": 90900
},
{
"epoch": 0.91,
"learning_rate": 9.040293479673334e-07,
"loss": 0.3436,
"step": 91000
},
{
"epoch": 0.91,
"learning_rate": 8.940334462870224e-07,
"loss": 0.3415,
"step": 91100
},
{
"epoch": 0.91,
"learning_rate": 8.840375446067113e-07,
"loss": 0.3397,
"step": 91200
},
{
"epoch": 0.91,
"learning_rate": 8.740416429264002e-07,
"loss": 0.3355,
"step": 91300
},
{
"epoch": 0.91,
"learning_rate": 8.640457412460891e-07,
"loss": 0.3378,
"step": 91400
},
{
"epoch": 0.91,
"learning_rate": 8.540498395657781e-07,
"loss": 0.3424,
"step": 91500
},
{
"epoch": 0.92,
"learning_rate": 8.44053937885467e-07,
"loss": 0.3361,
"step": 91600
},
{
"epoch": 0.92,
"learning_rate": 8.34058036205156e-07,
"loss": 0.333,
"step": 91700
},
{
"epoch": 0.92,
"learning_rate": 8.24062134524845e-07,
"loss": 0.3361,
"step": 91800
},
{
"epoch": 0.92,
"learning_rate": 8.140662328445339e-07,
"loss": 0.3312,
"step": 91900
},
{
"epoch": 0.92,
"learning_rate": 8.040703311642227e-07,
"loss": 0.3375,
"step": 92000
},
{
"epoch": 0.92,
"learning_rate": 7.940744294839118e-07,
"loss": 0.3342,
"step": 92100
},
{
"epoch": 0.92,
"learning_rate": 7.840785278036006e-07,
"loss": 0.3373,
"step": 92200
},
{
"epoch": 0.92,
"learning_rate": 7.740826261232895e-07,
"loss": 0.3317,
"step": 92300
},
{
"epoch": 0.92,
"learning_rate": 7.640867244429784e-07,
"loss": 0.3309,
"step": 92400
},
{
"epoch": 0.92,
"learning_rate": 7.540908227626674e-07,
"loss": 0.3331,
"step": 92500
},
{
"epoch": 0.93,
"learning_rate": 7.440949210823563e-07,
"loss": 0.3329,
"step": 92600
},
{
"epoch": 0.93,
"learning_rate": 7.340990194020452e-07,
"loss": 0.3318,
"step": 92700
},
{
"epoch": 0.93,
"learning_rate": 7.241031177217342e-07,
"loss": 0.3377,
"step": 92800
},
{
"epoch": 0.93,
"learning_rate": 7.141072160414231e-07,
"loss": 0.3346,
"step": 92900
},
{
"epoch": 0.93,
"learning_rate": 7.04111314361112e-07,
"loss": 0.3381,
"step": 93000
},
{
"epoch": 0.93,
"learning_rate": 6.94115412680801e-07,
"loss": 0.333,
"step": 93100
},
{
"epoch": 0.93,
"learning_rate": 6.841195110004899e-07,
"loss": 0.3322,
"step": 93200
},
{
"epoch": 0.93,
"learning_rate": 6.741236093201788e-07,
"loss": 0.3288,
"step": 93300
},
{
"epoch": 0.93,
"learning_rate": 6.641277076398677e-07,
"loss": 0.3337,
"step": 93400
},
{
"epoch": 0.93,
"learning_rate": 6.541318059595567e-07,
"loss": 0.3327,
"step": 93500
},
{
"epoch": 0.94,
"learning_rate": 6.441359042792456e-07,
"loss": 0.3358,
"step": 93600
},
{
"epoch": 0.94,
"learning_rate": 6.341400025989344e-07,
"loss": 0.3337,
"step": 93700
},
{
"epoch": 0.94,
"learning_rate": 6.241441009186234e-07,
"loss": 0.3312,
"step": 93800
},
{
"epoch": 0.94,
"learning_rate": 6.141481992383123e-07,
"loss": 0.3299,
"step": 93900
},
{
"epoch": 0.94,
"learning_rate": 6.041522975580013e-07,
"loss": 0.3324,
"step": 94000
},
{
"epoch": 0.94,
"learning_rate": 5.941563958776902e-07,
"loss": 0.3343,
"step": 94100
},
{
"epoch": 0.94,
"learning_rate": 5.841604941973791e-07,
"loss": 0.3296,
"step": 94200
},
{
"epoch": 0.94,
"learning_rate": 5.74164592517068e-07,
"loss": 0.3346,
"step": 94300
},
{
"epoch": 0.94,
"learning_rate": 5.64168690836757e-07,
"loss": 0.3376,
"step": 94400
},
{
"epoch": 0.94,
"learning_rate": 5.541727891564459e-07,
"loss": 0.3314,
"step": 94500
},
{
"epoch": 0.95,
"learning_rate": 5.441768874761348e-07,
"loss": 0.3316,
"step": 94600
},
{
"epoch": 0.95,
"learning_rate": 5.341809857958237e-07,
"loss": 0.3231,
"step": 94700
},
{
"epoch": 0.95,
"learning_rate": 5.241850841155126e-07,
"loss": 0.3315,
"step": 94800
},
{
"epoch": 0.95,
"learning_rate": 5.141891824352016e-07,
"loss": 0.3288,
"step": 94900
},
{
"epoch": 0.95,
"learning_rate": 5.041932807548905e-07,
"loss": 0.3297,
"step": 95000
},
{
"epoch": 0.95,
"learning_rate": 4.941973790745794e-07,
"loss": 0.3308,
"step": 95100
},
{
"epoch": 0.95,
"learning_rate": 4.842014773942684e-07,
"loss": 0.326,
"step": 95200
},
{
"epoch": 0.95,
"learning_rate": 4.742055757139573e-07,
"loss": 0.3281,
"step": 95300
},
{
"epoch": 0.95,
"learning_rate": 4.6420967403364625e-07,
"loss": 0.3285,
"step": 95400
},
{
"epoch": 0.95,
"learning_rate": 4.542137723533352e-07,
"loss": 0.3269,
"step": 95500
},
{
"epoch": 0.96,
"learning_rate": 4.442178706730241e-07,
"loss": 0.3311,
"step": 95600
},
{
"epoch": 0.96,
"learning_rate": 4.34221968992713e-07,
"loss": 0.3251,
"step": 95700
},
{
"epoch": 0.96,
"learning_rate": 4.2422606731240196e-07,
"loss": 0.3262,
"step": 95800
},
{
"epoch": 0.96,
"learning_rate": 4.1423016563209086e-07,
"loss": 0.3305,
"step": 95900
},
{
"epoch": 0.96,
"learning_rate": 4.042342639517798e-07,
"loss": 0.3242,
"step": 96000
},
{
"epoch": 0.96,
"learning_rate": 3.942383622714687e-07,
"loss": 0.3282,
"step": 96100
},
{
"epoch": 0.96,
"learning_rate": 3.8424246059115767e-07,
"loss": 0.3285,
"step": 96200
},
{
"epoch": 0.96,
"learning_rate": 3.742465589108466e-07,
"loss": 0.3266,
"step": 96300
},
{
"epoch": 0.96,
"learning_rate": 3.642506572305355e-07,
"loss": 0.3262,
"step": 96400
},
{
"epoch": 0.96,
"learning_rate": 3.542547555502245e-07,
"loss": 0.3276,
"step": 96500
},
{
"epoch": 0.97,
"learning_rate": 3.4425885386991333e-07,
"loss": 0.3281,
"step": 96600
},
{
"epoch": 0.97,
"learning_rate": 3.342629521896023e-07,
"loss": 0.3238,
"step": 96700
},
{
"epoch": 0.97,
"learning_rate": 3.2426705050929124e-07,
"loss": 0.3222,
"step": 96800
},
{
"epoch": 0.97,
"learning_rate": 3.1427114882898014e-07,
"loss": 0.3299,
"step": 96900
},
{
"epoch": 0.97,
"learning_rate": 3.042752471486691e-07,
"loss": 0.3224,
"step": 97000
},
{
"epoch": 0.97,
"learning_rate": 2.94279345468358e-07,
"loss": 0.329,
"step": 97100
},
{
"epoch": 0.97,
"learning_rate": 2.8428344378804695e-07,
"loss": 0.3213,
"step": 97200
},
{
"epoch": 0.97,
"learning_rate": 2.7428754210773585e-07,
"loss": 0.3283,
"step": 97300
},
{
"epoch": 0.97,
"learning_rate": 2.6429164042742475e-07,
"loss": 0.3227,
"step": 97400
},
{
"epoch": 0.97,
"learning_rate": 2.542957387471137e-07,
"loss": 0.3243,
"step": 97500
},
{
"epoch": 0.98,
"learning_rate": 2.4429983706680266e-07,
"loss": 0.3229,
"step": 97600
},
{
"epoch": 0.98,
"learning_rate": 2.3430393538649156e-07,
"loss": 0.3247,
"step": 97700
},
{
"epoch": 0.98,
"learning_rate": 2.243080337061805e-07,
"loss": 0.3163,
"step": 97800
},
{
"epoch": 0.98,
"learning_rate": 2.143121320258694e-07,
"loss": 0.3288,
"step": 97900
},
{
"epoch": 0.98,
"learning_rate": 2.0431623034555834e-07,
"loss": 0.3295,
"step": 98000
},
{
"epoch": 0.98,
"learning_rate": 1.9432032866524727e-07,
"loss": 0.321,
"step": 98100
},
{
"epoch": 0.98,
"learning_rate": 1.843244269849362e-07,
"loss": 0.3158,
"step": 98200
},
{
"epoch": 0.98,
"learning_rate": 1.743285253046251e-07,
"loss": 0.3238,
"step": 98300
},
{
"epoch": 0.98,
"learning_rate": 1.6433262362431403e-07,
"loss": 0.3251,
"step": 98400
},
{
"epoch": 0.98,
"learning_rate": 1.5433672194400295e-07,
"loss": 0.3278,
"step": 98500
},
{
"epoch": 0.99,
"learning_rate": 1.443408202636919e-07,
"loss": 0.3232,
"step": 98600
},
{
"epoch": 0.99,
"learning_rate": 1.343449185833808e-07,
"loss": 0.3259,
"step": 98700
},
{
"epoch": 0.99,
"learning_rate": 1.2434901690306976e-07,
"loss": 0.3223,
"step": 98800
},
{
"epoch": 0.99,
"learning_rate": 1.1435311522275868e-07,
"loss": 0.3285,
"step": 98900
},
{
"epoch": 0.99,
"learning_rate": 1.0435721354244762e-07,
"loss": 0.3249,
"step": 99000
},
{
"epoch": 0.99,
"learning_rate": 9.436131186213653e-08,
"loss": 0.3269,
"step": 99100
},
{
"epoch": 0.99,
"learning_rate": 8.436541018182545e-08,
"loss": 0.3265,
"step": 99200
},
{
"epoch": 0.99,
"learning_rate": 7.436950850151439e-08,
"loss": 0.322,
"step": 99300
},
{
"epoch": 0.99,
"learning_rate": 6.43736068212033e-08,
"loss": 0.3186,
"step": 99400
},
{
"epoch": 0.99,
"learning_rate": 5.437770514089224e-08,
"loss": 0.324,
"step": 99500
},
{
"epoch": 1.0,
"learning_rate": 4.4381803460581166e-08,
"loss": 0.3219,
"step": 99600
},
{
"epoch": 1.0,
"learning_rate": 3.4385901780270094e-08,
"loss": 0.3218,
"step": 99700
},
{
"epoch": 1.0,
"learning_rate": 2.4390000099959022e-08,
"loss": 0.3188,
"step": 99800
},
{
"epoch": 1.0,
"learning_rate": 1.4394098419647945e-08,
"loss": 0.3218,
"step": 99900
},
{
"epoch": 1.0,
"learning_rate": 4.3981967393368716e-09,
"loss": 0.3233,
"step": 100000
}
],
"logging_steps": 100,
"max_steps": 100043,
"num_train_epochs": 1,
"save_steps": 10000,
"total_flos": 5668611072983040.0,
"trial_name": null,
"trial_params": null
}