{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9995701848205272, "eval_steps": 200000, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.990303975370099e-06, "loss": 1.6732, "step": 100 }, { "epoch": 0.0, "learning_rate": 9.980308073689788e-06, "loss": 1.3978, "step": 200 }, { "epoch": 0.0, "learning_rate": 9.970312172009477e-06, "loss": 1.3189, "step": 300 }, { "epoch": 0.0, "learning_rate": 9.960316270329166e-06, "loss": 1.294, "step": 400 }, { "epoch": 0.0, "learning_rate": 9.950320368648854e-06, "loss": 1.269, "step": 500 }, { "epoch": 0.01, "learning_rate": 9.940324466968543e-06, "loss": 1.2399, "step": 600 }, { "epoch": 0.01, "learning_rate": 9.930328565288232e-06, "loss": 1.2272, "step": 700 }, { "epoch": 0.01, "learning_rate": 9.920332663607922e-06, "loss": 1.2275, "step": 800 }, { "epoch": 0.01, "learning_rate": 9.91033676192761e-06, "loss": 1.204, "step": 900 }, { "epoch": 0.01, "learning_rate": 9.9003408602473e-06, "loss": 1.2009, "step": 1000 }, { "epoch": 0.01, "learning_rate": 9.89034495856699e-06, "loss": 1.1841, "step": 1100 }, { "epoch": 0.01, "learning_rate": 9.880349056886678e-06, "loss": 1.1711, "step": 1200 }, { "epoch": 0.01, "learning_rate": 9.870353155206366e-06, "loss": 1.1601, "step": 1300 }, { "epoch": 0.01, "learning_rate": 9.860357253526055e-06, "loss": 1.1601, "step": 1400 }, { "epoch": 0.01, "learning_rate": 9.850361351845744e-06, "loss": 1.1313, "step": 1500 }, { "epoch": 0.02, "learning_rate": 9.840365450165434e-06, "loss": 1.1443, "step": 1600 }, { "epoch": 0.02, "learning_rate": 9.830369548485121e-06, "loss": 1.144, "step": 1700 }, { "epoch": 0.02, "learning_rate": 9.82037364680481e-06, "loss": 1.1347, "step": 1800 }, { "epoch": 0.02, "learning_rate": 9.8103777451245e-06, "loss": 1.1323, "step": 1900 }, { "epoch": 0.02, "learning_rate": 9.800381843444189e-06, "loss": 1.1229, "step": 2000 }, { "epoch": 0.02, "learning_rate": 9.790385941763878e-06, "loss": 1.1027, "step": 2100 }, { "epoch": 0.02, "learning_rate": 9.780390040083566e-06, "loss": 1.1113, "step": 2200 }, { "epoch": 0.02, "learning_rate": 9.770394138403255e-06, "loss": 1.1093, "step": 2300 }, { "epoch": 0.02, "learning_rate": 9.760398236722944e-06, "loss": 1.0941, "step": 2400 }, { "epoch": 0.02, "learning_rate": 9.750402335042633e-06, "loss": 1.0985, "step": 2500 }, { "epoch": 0.03, "learning_rate": 9.740406433362322e-06, "loss": 1.0881, "step": 2600 }, { "epoch": 0.03, "learning_rate": 9.73041053168201e-06, "loss": 1.106, "step": 2700 }, { "epoch": 0.03, "learning_rate": 9.7204146300017e-06, "loss": 1.0959, "step": 2800 }, { "epoch": 0.03, "learning_rate": 9.710418728321388e-06, "loss": 1.0974, "step": 2900 }, { "epoch": 0.03, "learning_rate": 9.700422826641078e-06, "loss": 1.0783, "step": 3000 }, { "epoch": 0.03, "learning_rate": 9.690426924960767e-06, "loss": 1.0669, "step": 3100 }, { "epoch": 0.03, "learning_rate": 9.680431023280456e-06, "loss": 1.081, "step": 3200 }, { "epoch": 0.03, "learning_rate": 9.670435121600145e-06, "loss": 1.0611, "step": 3300 }, { "epoch": 0.03, "learning_rate": 9.660439219919835e-06, "loss": 1.0572, "step": 3400 }, { "epoch": 0.03, "learning_rate": 9.650443318239524e-06, "loss": 1.0654, "step": 3500 }, { "epoch": 0.04, "learning_rate": 9.640447416559211e-06, "loss": 1.0671, "step": 3600 }, { "epoch": 0.04, "learning_rate": 9.6304515148789e-06, "loss": 1.0655, "step": 3700 }, { "epoch": 0.04, "learning_rate": 9.62045561319859e-06, "loss": 1.0507, "step": 3800 }, { "epoch": 0.04, "learning_rate": 9.610459711518279e-06, "loss": 1.0482, "step": 3900 }, { "epoch": 0.04, "learning_rate": 9.600463809837968e-06, "loss": 1.0543, "step": 4000 }, { "epoch": 0.04, "learning_rate": 9.590467908157656e-06, "loss": 1.054, "step": 4100 }, { "epoch": 0.04, "learning_rate": 9.580472006477345e-06, "loss": 1.0551, "step": 4200 }, { "epoch": 0.04, "learning_rate": 9.570476104797034e-06, "loss": 1.0425, "step": 4300 }, { "epoch": 0.04, "learning_rate": 9.560480203116723e-06, "loss": 1.0417, "step": 4400 }, { "epoch": 0.04, "learning_rate": 9.550484301436413e-06, "loss": 1.0477, "step": 4500 }, { "epoch": 0.05, "learning_rate": 9.5404883997561e-06, "loss": 1.0298, "step": 4600 }, { "epoch": 0.05, "learning_rate": 9.53049249807579e-06, "loss": 1.0305, "step": 4700 }, { "epoch": 0.05, "learning_rate": 9.520496596395479e-06, "loss": 1.0279, "step": 4800 }, { "epoch": 0.05, "learning_rate": 9.510500694715168e-06, "loss": 1.0308, "step": 4900 }, { "epoch": 0.05, "learning_rate": 9.500504793034855e-06, "loss": 1.0328, "step": 5000 }, { "epoch": 0.05, "learning_rate": 9.490508891354544e-06, "loss": 1.027, "step": 5100 }, { "epoch": 0.05, "learning_rate": 9.480512989674234e-06, "loss": 1.0251, "step": 5200 }, { "epoch": 0.05, "learning_rate": 9.470517087993923e-06, "loss": 1.0273, "step": 5300 }, { "epoch": 0.05, "learning_rate": 9.460521186313612e-06, "loss": 1.0258, "step": 5400 }, { "epoch": 0.05, "learning_rate": 9.450525284633301e-06, "loss": 1.0239, "step": 5500 }, { "epoch": 0.06, "learning_rate": 9.44052938295299e-06, "loss": 1.0263, "step": 5600 }, { "epoch": 0.06, "learning_rate": 9.43053348127268e-06, "loss": 1.009, "step": 5700 }, { "epoch": 0.06, "learning_rate": 9.420537579592367e-06, "loss": 1.005, "step": 5800 }, { "epoch": 0.06, "learning_rate": 9.410541677912057e-06, "loss": 1.0067, "step": 5900 }, { "epoch": 0.06, "learning_rate": 9.400545776231746e-06, "loss": 1.0088, "step": 6000 }, { "epoch": 0.06, "learning_rate": 9.390549874551435e-06, "loss": 1.0019, "step": 6100 }, { "epoch": 0.06, "learning_rate": 9.380553972871124e-06, "loss": 0.9987, "step": 6200 }, { "epoch": 0.06, "learning_rate": 9.370558071190813e-06, "loss": 1.0008, "step": 6300 }, { "epoch": 0.06, "learning_rate": 9.360562169510503e-06, "loss": 0.9994, "step": 6400 }, { "epoch": 0.06, "learning_rate": 9.35056626783019e-06, "loss": 0.9955, "step": 6500 }, { "epoch": 0.07, "learning_rate": 9.34057036614988e-06, "loss": 0.9904, "step": 6600 }, { "epoch": 0.07, "learning_rate": 9.330574464469569e-06, "loss": 0.9859, "step": 6700 }, { "epoch": 0.07, "learning_rate": 9.320578562789258e-06, "loss": 0.9792, "step": 6800 }, { "epoch": 0.07, "learning_rate": 9.310582661108945e-06, "loss": 0.995, "step": 6900 }, { "epoch": 0.07, "learning_rate": 9.300586759428635e-06, "loss": 0.9833, "step": 7000 }, { "epoch": 0.07, "learning_rate": 9.290590857748324e-06, "loss": 0.9749, "step": 7100 }, { "epoch": 0.07, "learning_rate": 9.280594956068013e-06, "loss": 0.9891, "step": 7200 }, { "epoch": 0.07, "learning_rate": 9.270599054387702e-06, "loss": 0.9881, "step": 7300 }, { "epoch": 0.07, "learning_rate": 9.26060315270739e-06, "loss": 0.9832, "step": 7400 }, { "epoch": 0.07, "learning_rate": 9.250607251027079e-06, "loss": 0.9816, "step": 7500 }, { "epoch": 0.08, "learning_rate": 9.240611349346768e-06, "loss": 0.9743, "step": 7600 }, { "epoch": 0.08, "learning_rate": 9.230615447666457e-06, "loss": 0.9834, "step": 7700 }, { "epoch": 0.08, "learning_rate": 9.220619545986147e-06, "loss": 0.9774, "step": 7800 }, { "epoch": 0.08, "learning_rate": 9.210623644305836e-06, "loss": 0.9845, "step": 7900 }, { "epoch": 0.08, "learning_rate": 9.200627742625523e-06, "loss": 0.9652, "step": 8000 }, { "epoch": 0.08, "learning_rate": 9.190631840945213e-06, "loss": 0.969, "step": 8100 }, { "epoch": 0.08, "learning_rate": 9.180635939264902e-06, "loss": 0.9679, "step": 8200 }, { "epoch": 0.08, "learning_rate": 9.170640037584591e-06, "loss": 0.9624, "step": 8300 }, { "epoch": 0.08, "learning_rate": 9.16064413590428e-06, "loss": 0.9813, "step": 8400 }, { "epoch": 0.08, "learning_rate": 9.15064823422397e-06, "loss": 0.9702, "step": 8500 }, { "epoch": 0.09, "learning_rate": 9.140652332543659e-06, "loss": 0.9634, "step": 8600 }, { "epoch": 0.09, "learning_rate": 9.130656430863348e-06, "loss": 0.9598, "step": 8700 }, { "epoch": 0.09, "learning_rate": 9.120660529183035e-06, "loss": 0.9662, "step": 8800 }, { "epoch": 0.09, "learning_rate": 9.110664627502725e-06, "loss": 0.9588, "step": 8900 }, { "epoch": 0.09, "learning_rate": 9.100668725822414e-06, "loss": 0.9451, "step": 9000 }, { "epoch": 0.09, "learning_rate": 9.090672824142103e-06, "loss": 0.967, "step": 9100 }, { "epoch": 0.09, "learning_rate": 9.080676922461792e-06, "loss": 0.9545, "step": 9200 }, { "epoch": 0.09, "learning_rate": 9.07068102078148e-06, "loss": 0.9568, "step": 9300 }, { "epoch": 0.09, "learning_rate": 9.060685119101169e-06, "loss": 0.9483, "step": 9400 }, { "epoch": 0.09, "learning_rate": 9.050689217420858e-06, "loss": 0.9444, "step": 9500 }, { "epoch": 0.1, "learning_rate": 9.040693315740547e-06, "loss": 0.9379, "step": 9600 }, { "epoch": 0.1, "learning_rate": 9.030697414060237e-06, "loss": 0.9445, "step": 9700 }, { "epoch": 0.1, "learning_rate": 9.020701512379924e-06, "loss": 0.939, "step": 9800 }, { "epoch": 0.1, "learning_rate": 9.010705610699613e-06, "loss": 0.9506, "step": 9900 }, { "epoch": 0.1, "learning_rate": 9.000709709019303e-06, "loss": 0.941, "step": 10000 }, { "epoch": 0.1, "learning_rate": 8.990713807338992e-06, "loss": 0.9444, "step": 10100 }, { "epoch": 0.1, "learning_rate": 8.98071790565868e-06, "loss": 0.9526, "step": 10200 }, { "epoch": 0.1, "learning_rate": 8.970722003978369e-06, "loss": 0.9294, "step": 10300 }, { "epoch": 0.1, "learning_rate": 8.960726102298058e-06, "loss": 0.9332, "step": 10400 }, { "epoch": 0.1, "learning_rate": 8.950730200617747e-06, "loss": 0.9357, "step": 10500 }, { "epoch": 0.11, "learning_rate": 8.940734298937436e-06, "loss": 0.9408, "step": 10600 }, { "epoch": 0.11, "learning_rate": 8.930738397257125e-06, "loss": 0.9301, "step": 10700 }, { "epoch": 0.11, "learning_rate": 8.920742495576815e-06, "loss": 0.9462, "step": 10800 }, { "epoch": 0.11, "learning_rate": 8.910746593896504e-06, "loss": 0.9295, "step": 10900 }, { "epoch": 0.11, "learning_rate": 8.900750692216193e-06, "loss": 0.9418, "step": 11000 }, { "epoch": 0.11, "learning_rate": 8.890754790535882e-06, "loss": 0.9315, "step": 11100 }, { "epoch": 0.11, "learning_rate": 8.88075888885557e-06, "loss": 0.9356, "step": 11200 }, { "epoch": 0.11, "learning_rate": 8.870762987175259e-06, "loss": 0.9216, "step": 11300 }, { "epoch": 0.11, "learning_rate": 8.860767085494948e-06, "loss": 0.9298, "step": 11400 }, { "epoch": 0.11, "learning_rate": 8.850771183814638e-06, "loss": 0.9207, "step": 11500 }, { "epoch": 0.12, "learning_rate": 8.840775282134325e-06, "loss": 0.923, "step": 11600 }, { "epoch": 0.12, "learning_rate": 8.830779380454014e-06, "loss": 0.91, "step": 11700 }, { "epoch": 0.12, "learning_rate": 8.820783478773703e-06, "loss": 0.9171, "step": 11800 }, { "epoch": 0.12, "learning_rate": 8.810787577093393e-06, "loss": 0.9179, "step": 11900 }, { "epoch": 0.12, "learning_rate": 8.800791675413082e-06, "loss": 0.9185, "step": 12000 }, { "epoch": 0.12, "learning_rate": 8.79079577373277e-06, "loss": 0.9071, "step": 12100 }, { "epoch": 0.12, "learning_rate": 8.780799872052459e-06, "loss": 0.9142, "step": 12200 }, { "epoch": 0.12, "learning_rate": 8.770803970372148e-06, "loss": 0.9079, "step": 12300 }, { "epoch": 0.12, "learning_rate": 8.760808068691837e-06, "loss": 0.9105, "step": 12400 }, { "epoch": 0.12, "learning_rate": 8.750812167011526e-06, "loss": 0.9027, "step": 12500 }, { "epoch": 0.13, "learning_rate": 8.740816265331214e-06, "loss": 0.9074, "step": 12600 }, { "epoch": 0.13, "learning_rate": 8.730820363650903e-06, "loss": 0.9064, "step": 12700 }, { "epoch": 0.13, "learning_rate": 8.720824461970592e-06, "loss": 0.8988, "step": 12800 }, { "epoch": 0.13, "learning_rate": 8.710828560290282e-06, "loss": 0.9, "step": 12900 }, { "epoch": 0.13, "learning_rate": 8.70083265860997e-06, "loss": 0.888, "step": 13000 }, { "epoch": 0.13, "learning_rate": 8.69083675692966e-06, "loss": 0.9012, "step": 13100 }, { "epoch": 0.13, "learning_rate": 8.68084085524935e-06, "loss": 0.9036, "step": 13200 }, { "epoch": 0.13, "learning_rate": 8.670844953569038e-06, "loss": 0.9023, "step": 13300 }, { "epoch": 0.13, "learning_rate": 8.660849051888726e-06, "loss": 0.9017, "step": 13400 }, { "epoch": 0.13, "learning_rate": 8.650853150208415e-06, "loss": 0.8999, "step": 13500 }, { "epoch": 0.14, "learning_rate": 8.640857248528104e-06, "loss": 0.895, "step": 13600 }, { "epoch": 0.14, "learning_rate": 8.630861346847794e-06, "loss": 0.9, "step": 13700 }, { "epoch": 0.14, "learning_rate": 8.620865445167483e-06, "loss": 0.907, "step": 13800 }, { "epoch": 0.14, "learning_rate": 8.610869543487172e-06, "loss": 0.8963, "step": 13900 }, { "epoch": 0.14, "learning_rate": 8.60087364180686e-06, "loss": 0.8953, "step": 14000 }, { "epoch": 0.14, "learning_rate": 8.590877740126549e-06, "loss": 0.9034, "step": 14100 }, { "epoch": 0.14, "learning_rate": 8.580881838446238e-06, "loss": 0.8916, "step": 14200 }, { "epoch": 0.14, "learning_rate": 8.570885936765927e-06, "loss": 0.8964, "step": 14300 }, { "epoch": 0.14, "learning_rate": 8.560890035085616e-06, "loss": 0.8894, "step": 14400 }, { "epoch": 0.14, "learning_rate": 8.550894133405304e-06, "loss": 0.8894, "step": 14500 }, { "epoch": 0.15, "learning_rate": 8.540898231724993e-06, "loss": 0.8941, "step": 14600 }, { "epoch": 0.15, "learning_rate": 8.530902330044682e-06, "loss": 0.8888, "step": 14700 }, { "epoch": 0.15, "learning_rate": 8.520906428364372e-06, "loss": 0.8898, "step": 14800 }, { "epoch": 0.15, "learning_rate": 8.510910526684059e-06, "loss": 0.8812, "step": 14900 }, { "epoch": 0.15, "learning_rate": 8.500914625003748e-06, "loss": 0.8833, "step": 15000 }, { "epoch": 0.15, "learning_rate": 8.490918723323438e-06, "loss": 0.8907, "step": 15100 }, { "epoch": 0.15, "learning_rate": 8.480922821643127e-06, "loss": 0.8732, "step": 15200 }, { "epoch": 0.15, "learning_rate": 8.470926919962816e-06, "loss": 0.8894, "step": 15300 }, { "epoch": 0.15, "learning_rate": 8.460931018282505e-06, "loss": 0.8731, "step": 15400 }, { "epoch": 0.15, "learning_rate": 8.450935116602193e-06, "loss": 0.8811, "step": 15500 }, { "epoch": 0.16, "learning_rate": 8.440939214921882e-06, "loss": 0.8649, "step": 15600 }, { "epoch": 0.16, "learning_rate": 8.430943313241571e-06, "loss": 0.8654, "step": 15700 }, { "epoch": 0.16, "learning_rate": 8.42094741156126e-06, "loss": 0.8688, "step": 15800 }, { "epoch": 0.16, "learning_rate": 8.41095150988095e-06, "loss": 0.8713, "step": 15900 }, { "epoch": 0.16, "learning_rate": 8.400955608200639e-06, "loss": 0.8694, "step": 16000 }, { "epoch": 0.16, "learning_rate": 8.390959706520328e-06, "loss": 0.8617, "step": 16100 }, { "epoch": 0.16, "learning_rate": 8.380963804840017e-06, "loss": 0.8764, "step": 16200 }, { "epoch": 0.16, "learning_rate": 8.370967903159706e-06, "loss": 0.8627, "step": 16300 }, { "epoch": 0.16, "learning_rate": 8.360972001479394e-06, "loss": 0.8749, "step": 16400 }, { "epoch": 0.16, "learning_rate": 8.350976099799083e-06, "loss": 0.8702, "step": 16500 }, { "epoch": 0.17, "learning_rate": 8.340980198118772e-06, "loss": 0.868, "step": 16600 }, { "epoch": 0.17, "learning_rate": 8.330984296438462e-06, "loss": 0.8545, "step": 16700 }, { "epoch": 0.17, "learning_rate": 8.32098839475815e-06, "loss": 0.865, "step": 16800 }, { "epoch": 0.17, "learning_rate": 8.310992493077838e-06, "loss": 0.864, "step": 16900 }, { "epoch": 0.17, "learning_rate": 8.300996591397528e-06, "loss": 0.8676, "step": 17000 }, { "epoch": 0.17, "learning_rate": 8.291000689717217e-06, "loss": 0.8636, "step": 17100 }, { "epoch": 0.17, "learning_rate": 8.281004788036906e-06, "loss": 0.8555, "step": 17200 }, { "epoch": 0.17, "learning_rate": 8.271008886356594e-06, "loss": 0.8548, "step": 17300 }, { "epoch": 0.17, "learning_rate": 8.261012984676283e-06, "loss": 0.8546, "step": 17400 }, { "epoch": 0.17, "learning_rate": 8.251017082995972e-06, "loss": 0.8606, "step": 17500 }, { "epoch": 0.18, "learning_rate": 8.241021181315661e-06, "loss": 0.8485, "step": 17600 }, { "epoch": 0.18, "learning_rate": 8.23102527963535e-06, "loss": 0.8421, "step": 17700 }, { "epoch": 0.18, "learning_rate": 8.221029377955038e-06, "loss": 0.8468, "step": 17800 }, { "epoch": 0.18, "learning_rate": 8.211033476274727e-06, "loss": 0.8493, "step": 17900 }, { "epoch": 0.18, "learning_rate": 8.201037574594416e-06, "loss": 0.8508, "step": 18000 }, { "epoch": 0.18, "learning_rate": 8.191041672914106e-06, "loss": 0.8441, "step": 18100 }, { "epoch": 0.18, "learning_rate": 8.181045771233795e-06, "loss": 0.8554, "step": 18200 }, { "epoch": 0.18, "learning_rate": 8.171049869553484e-06, "loss": 0.8458, "step": 18300 }, { "epoch": 0.18, "learning_rate": 8.161053967873173e-06, "loss": 0.8433, "step": 18400 }, { "epoch": 0.18, "learning_rate": 8.151058066192863e-06, "loss": 0.8494, "step": 18500 }, { "epoch": 0.19, "learning_rate": 8.141062164512552e-06, "loss": 0.842, "step": 18600 }, { "epoch": 0.19, "learning_rate": 8.13106626283224e-06, "loss": 0.849, "step": 18700 }, { "epoch": 0.19, "learning_rate": 8.121070361151928e-06, "loss": 0.8382, "step": 18800 }, { "epoch": 0.19, "learning_rate": 8.111074459471618e-06, "loss": 0.8385, "step": 18900 }, { "epoch": 0.19, "learning_rate": 8.101078557791307e-06, "loss": 0.8397, "step": 19000 }, { "epoch": 0.19, "learning_rate": 8.091082656110996e-06, "loss": 0.8538, "step": 19100 }, { "epoch": 0.19, "learning_rate": 8.081086754430684e-06, "loss": 0.8392, "step": 19200 }, { "epoch": 0.19, "learning_rate": 8.071090852750373e-06, "loss": 0.8379, "step": 19300 }, { "epoch": 0.19, "learning_rate": 8.061094951070062e-06, "loss": 0.8409, "step": 19400 }, { "epoch": 0.19, "learning_rate": 8.051099049389751e-06, "loss": 0.8332, "step": 19500 }, { "epoch": 0.2, "learning_rate": 8.04110314770944e-06, "loss": 0.8292, "step": 19600 }, { "epoch": 0.2, "learning_rate": 8.031107246029128e-06, "loss": 0.8382, "step": 19700 }, { "epoch": 0.2, "learning_rate": 8.021111344348817e-06, "loss": 0.8329, "step": 19800 }, { "epoch": 0.2, "learning_rate": 8.011115442668507e-06, "loss": 0.825, "step": 19900 }, { "epoch": 0.2, "learning_rate": 8.001119540988196e-06, "loss": 0.8274, "step": 20000 }, { "epoch": 0.2, "learning_rate": 7.991123639307883e-06, "loss": 0.8341, "step": 20100 }, { "epoch": 0.2, "learning_rate": 7.981127737627572e-06, "loss": 0.8302, "step": 20200 }, { "epoch": 0.2, "learning_rate": 7.971131835947262e-06, "loss": 0.8247, "step": 20300 }, { "epoch": 0.2, "learning_rate": 7.961135934266951e-06, "loss": 0.8355, "step": 20400 }, { "epoch": 0.2, "learning_rate": 7.95114003258664e-06, "loss": 0.8298, "step": 20500 }, { "epoch": 0.21, "learning_rate": 7.94114413090633e-06, "loss": 0.8282, "step": 20600 }, { "epoch": 0.21, "learning_rate": 7.931148229226019e-06, "loss": 0.8202, "step": 20700 }, { "epoch": 0.21, "learning_rate": 7.921152327545708e-06, "loss": 0.8203, "step": 20800 }, { "epoch": 0.21, "learning_rate": 7.911156425865395e-06, "loss": 0.8295, "step": 20900 }, { "epoch": 0.21, "learning_rate": 7.901160524185085e-06, "loss": 0.8229, "step": 21000 }, { "epoch": 0.21, "learning_rate": 7.891164622504774e-06, "loss": 0.818, "step": 21100 }, { "epoch": 0.21, "learning_rate": 7.881168720824463e-06, "loss": 0.8154, "step": 21200 }, { "epoch": 0.21, "learning_rate": 7.871172819144152e-06, "loss": 0.8166, "step": 21300 }, { "epoch": 0.21, "learning_rate": 7.861176917463841e-06, "loss": 0.8104, "step": 21400 }, { "epoch": 0.21, "learning_rate": 7.851181015783529e-06, "loss": 0.8178, "step": 21500 }, { "epoch": 0.22, "learning_rate": 7.841185114103218e-06, "loss": 0.8025, "step": 21600 }, { "epoch": 0.22, "learning_rate": 7.831189212422907e-06, "loss": 0.8099, "step": 21700 }, { "epoch": 0.22, "learning_rate": 7.821193310742597e-06, "loss": 0.8119, "step": 21800 }, { "epoch": 0.22, "learning_rate": 7.811197409062286e-06, "loss": 0.8049, "step": 21900 }, { "epoch": 0.22, "learning_rate": 7.801201507381973e-06, "loss": 0.8157, "step": 22000 }, { "epoch": 0.22, "learning_rate": 7.791205605701663e-06, "loss": 0.8031, "step": 22100 }, { "epoch": 0.22, "learning_rate": 7.781209704021352e-06, "loss": 0.8061, "step": 22200 }, { "epoch": 0.22, "learning_rate": 7.771213802341041e-06, "loss": 0.8175, "step": 22300 }, { "epoch": 0.22, "learning_rate": 7.76121790066073e-06, "loss": 0.8002, "step": 22400 }, { "epoch": 0.22, "learning_rate": 7.751221998980418e-06, "loss": 0.8115, "step": 22500 }, { "epoch": 0.23, "learning_rate": 7.741226097300107e-06, "loss": 0.8095, "step": 22600 }, { "epoch": 0.23, "learning_rate": 7.731230195619796e-06, "loss": 0.7981, "step": 22700 }, { "epoch": 0.23, "learning_rate": 7.721234293939485e-06, "loss": 0.802, "step": 22800 }, { "epoch": 0.23, "learning_rate": 7.711238392259175e-06, "loss": 0.7983, "step": 22900 }, { "epoch": 0.23, "learning_rate": 7.701242490578862e-06, "loss": 0.7987, "step": 23000 }, { "epoch": 0.23, "learning_rate": 7.691246588898551e-06, "loss": 0.8006, "step": 23100 }, { "epoch": 0.23, "learning_rate": 7.68125068721824e-06, "loss": 0.8005, "step": 23200 }, { "epoch": 0.23, "learning_rate": 7.67125478553793e-06, "loss": 0.7952, "step": 23300 }, { "epoch": 0.23, "learning_rate": 7.661258883857619e-06, "loss": 0.7929, "step": 23400 }, { "epoch": 0.23, "learning_rate": 7.651262982177308e-06, "loss": 0.7874, "step": 23500 }, { "epoch": 0.24, "learning_rate": 7.641267080496997e-06, "loss": 0.7971, "step": 23600 }, { "epoch": 0.24, "learning_rate": 7.631271178816687e-06, "loss": 0.7972, "step": 23700 }, { "epoch": 0.24, "learning_rate": 7.621275277136375e-06, "loss": 0.7909, "step": 23800 }, { "epoch": 0.24, "learning_rate": 7.611279375456063e-06, "loss": 0.8056, "step": 23900 }, { "epoch": 0.24, "learning_rate": 7.601283473775753e-06, "loss": 0.7985, "step": 24000 }, { "epoch": 0.24, "learning_rate": 7.591287572095442e-06, "loss": 0.7947, "step": 24100 }, { "epoch": 0.24, "learning_rate": 7.581291670415131e-06, "loss": 0.7843, "step": 24200 }, { "epoch": 0.24, "learning_rate": 7.5712957687348194e-06, "loss": 0.8008, "step": 24300 }, { "epoch": 0.24, "learning_rate": 7.561299867054508e-06, "loss": 0.786, "step": 24400 }, { "epoch": 0.24, "learning_rate": 7.551303965374197e-06, "loss": 0.7837, "step": 24500 }, { "epoch": 0.25, "learning_rate": 7.541308063693886e-06, "loss": 0.7978, "step": 24600 }, { "epoch": 0.25, "learning_rate": 7.5313121620135755e-06, "loss": 0.7869, "step": 24700 }, { "epoch": 0.25, "learning_rate": 7.521316260333264e-06, "loss": 0.7944, "step": 24800 }, { "epoch": 0.25, "learning_rate": 7.511320358652952e-06, "loss": 0.7884, "step": 24900 }, { "epoch": 0.25, "learning_rate": 7.5013244569726414e-06, "loss": 0.7783, "step": 25000 }, { "epoch": 0.25, "learning_rate": 7.491328555292331e-06, "loss": 0.7859, "step": 25100 }, { "epoch": 0.25, "learning_rate": 7.48133265361202e-06, "loss": 0.7741, "step": 25200 }, { "epoch": 0.25, "learning_rate": 7.471336751931708e-06, "loss": 0.7788, "step": 25300 }, { "epoch": 0.25, "learning_rate": 7.4613408502513975e-06, "loss": 0.7745, "step": 25400 }, { "epoch": 0.25, "learning_rate": 7.451344948571087e-06, "loss": 0.7833, "step": 25500 }, { "epoch": 0.26, "learning_rate": 7.441349046890776e-06, "loss": 0.7831, "step": 25600 }, { "epoch": 0.26, "learning_rate": 7.431353145210465e-06, "loss": 0.781, "step": 25700 }, { "epoch": 0.26, "learning_rate": 7.421357243530153e-06, "loss": 0.7754, "step": 25800 }, { "epoch": 0.26, "learning_rate": 7.411361341849842e-06, "loss": 0.7647, "step": 25900 }, { "epoch": 0.26, "learning_rate": 7.401365440169531e-06, "loss": 0.7641, "step": 26000 }, { "epoch": 0.26, "learning_rate": 7.39136953848922e-06, "loss": 0.7744, "step": 26100 }, { "epoch": 0.26, "learning_rate": 7.3813736368089095e-06, "loss": 0.774, "step": 26200 }, { "epoch": 0.26, "learning_rate": 7.371377735128598e-06, "loss": 0.7651, "step": 26300 }, { "epoch": 0.26, "learning_rate": 7.361381833448287e-06, "loss": 0.7605, "step": 26400 }, { "epoch": 0.26, "learning_rate": 7.3513859317679755e-06, "loss": 0.7753, "step": 26500 }, { "epoch": 0.27, "learning_rate": 7.341390030087665e-06, "loss": 0.7682, "step": 26600 }, { "epoch": 0.27, "learning_rate": 7.331394128407353e-06, "loss": 0.7663, "step": 26700 }, { "epoch": 0.27, "learning_rate": 7.321398226727042e-06, "loss": 0.7652, "step": 26800 }, { "epoch": 0.27, "learning_rate": 7.3114023250467315e-06, "loss": 0.7623, "step": 26900 }, { "epoch": 0.27, "learning_rate": 7.301406423366421e-06, "loss": 0.768, "step": 27000 }, { "epoch": 0.27, "learning_rate": 7.29141052168611e-06, "loss": 0.7627, "step": 27100 }, { "epoch": 0.27, "learning_rate": 7.2814146200057975e-06, "loss": 0.7555, "step": 27200 }, { "epoch": 0.27, "learning_rate": 7.271418718325487e-06, "loss": 0.7667, "step": 27300 }, { "epoch": 0.27, "learning_rate": 7.261422816645176e-06, "loss": 0.7544, "step": 27400 }, { "epoch": 0.27, "learning_rate": 7.251426914964865e-06, "loss": 0.7584, "step": 27500 }, { "epoch": 0.28, "learning_rate": 7.241431013284554e-06, "loss": 0.764, "step": 27600 }, { "epoch": 0.28, "learning_rate": 7.231435111604243e-06, "loss": 0.766, "step": 27700 }, { "epoch": 0.28, "learning_rate": 7.221439209923932e-06, "loss": 0.7607, "step": 27800 }, { "epoch": 0.28, "learning_rate": 7.211443308243621e-06, "loss": 0.748, "step": 27900 }, { "epoch": 0.28, "learning_rate": 7.20144740656331e-06, "loss": 0.7565, "step": 28000 }, { "epoch": 0.28, "learning_rate": 7.191451504882998e-06, "loss": 0.7553, "step": 28100 }, { "epoch": 0.28, "learning_rate": 7.181455603202687e-06, "loss": 0.7592, "step": 28200 }, { "epoch": 0.28, "learning_rate": 7.171459701522376e-06, "loss": 0.7463, "step": 28300 }, { "epoch": 0.28, "learning_rate": 7.1614637998420656e-06, "loss": 0.7468, "step": 28400 }, { "epoch": 0.28, "learning_rate": 7.151467898161755e-06, "loss": 0.7543, "step": 28500 }, { "epoch": 0.29, "learning_rate": 7.141471996481443e-06, "loss": 0.759, "step": 28600 }, { "epoch": 0.29, "learning_rate": 7.1314760948011315e-06, "loss": 0.7443, "step": 28700 }, { "epoch": 0.29, "learning_rate": 7.121480193120821e-06, "loss": 0.7498, "step": 28800 }, { "epoch": 0.29, "learning_rate": 7.11148429144051e-06, "loss": 0.7458, "step": 28900 }, { "epoch": 0.29, "learning_rate": 7.101488389760199e-06, "loss": 0.7495, "step": 29000 }, { "epoch": 0.29, "learning_rate": 7.0914924880798875e-06, "loss": 0.7317, "step": 29100 }, { "epoch": 0.29, "learning_rate": 7.081496586399577e-06, "loss": 0.7429, "step": 29200 }, { "epoch": 0.29, "learning_rate": 7.071500684719266e-06, "loss": 0.7284, "step": 29300 }, { "epoch": 0.29, "learning_rate": 7.061504783038955e-06, "loss": 0.7388, "step": 29400 }, { "epoch": 0.29, "learning_rate": 7.0515088813586444e-06, "loss": 0.7364, "step": 29500 }, { "epoch": 0.3, "learning_rate": 7.041512979678332e-06, "loss": 0.7364, "step": 29600 }, { "epoch": 0.3, "learning_rate": 7.031517077998021e-06, "loss": 0.7358, "step": 29700 }, { "epoch": 0.3, "learning_rate": 7.02152117631771e-06, "loss": 0.7347, "step": 29800 }, { "epoch": 0.3, "learning_rate": 7.0115252746374e-06, "loss": 0.7338, "step": 29900 }, { "epoch": 0.3, "learning_rate": 7.001529372957088e-06, "loss": 0.7403, "step": 30000 }, { "epoch": 0.3, "learning_rate": 6.991533471276777e-06, "loss": 0.7319, "step": 30100 }, { "epoch": 0.3, "learning_rate": 6.981537569596466e-06, "loss": 0.7356, "step": 30200 }, { "epoch": 0.3, "learning_rate": 6.971541667916155e-06, "loss": 0.725, "step": 30300 }, { "epoch": 0.3, "learning_rate": 6.961545766235844e-06, "loss": 0.7262, "step": 30400 }, { "epoch": 0.3, "learning_rate": 6.951549864555532e-06, "loss": 0.7348, "step": 30500 }, { "epoch": 0.31, "learning_rate": 6.941553962875222e-06, "loss": 0.7276, "step": 30600 }, { "epoch": 0.31, "learning_rate": 6.931558061194911e-06, "loss": 0.7252, "step": 30700 }, { "epoch": 0.31, "learning_rate": 6.9215621595146e-06, "loss": 0.7324, "step": 30800 }, { "epoch": 0.31, "learning_rate": 6.911566257834289e-06, "loss": 0.7426, "step": 30900 }, { "epoch": 0.31, "learning_rate": 6.901570356153977e-06, "loss": 0.7332, "step": 31000 }, { "epoch": 0.31, "learning_rate": 6.891574454473666e-06, "loss": 0.7355, "step": 31100 }, { "epoch": 0.31, "learning_rate": 6.881578552793355e-06, "loss": 0.7296, "step": 31200 }, { "epoch": 0.31, "learning_rate": 6.8715826511130444e-06, "loss": 0.7284, "step": 31300 }, { "epoch": 0.31, "learning_rate": 6.861586749432733e-06, "loss": 0.7217, "step": 31400 }, { "epoch": 0.31, "learning_rate": 6.851590847752422e-06, "loss": 0.7216, "step": 31500 }, { "epoch": 0.32, "learning_rate": 6.841594946072111e-06, "loss": 0.7139, "step": 31600 }, { "epoch": 0.32, "learning_rate": 6.8315990443918005e-06, "loss": 0.7227, "step": 31700 }, { "epoch": 0.32, "learning_rate": 6.82160314271149e-06, "loss": 0.7221, "step": 31800 }, { "epoch": 0.32, "learning_rate": 6.811607241031177e-06, "loss": 0.7082, "step": 31900 }, { "epoch": 0.32, "learning_rate": 6.8016113393508664e-06, "loss": 0.72, "step": 32000 }, { "epoch": 0.32, "learning_rate": 6.791615437670556e-06, "loss": 0.7134, "step": 32100 }, { "epoch": 0.32, "learning_rate": 6.781619535990245e-06, "loss": 0.719, "step": 32200 }, { "epoch": 0.32, "learning_rate": 6.771623634309934e-06, "loss": 0.7083, "step": 32300 }, { "epoch": 0.32, "learning_rate": 6.761627732629622e-06, "loss": 0.7148, "step": 32400 }, { "epoch": 0.32, "learning_rate": 6.751631830949311e-06, "loss": 0.7229, "step": 32500 }, { "epoch": 0.33, "learning_rate": 6.741635929269e-06, "loss": 0.7173, "step": 32600 }, { "epoch": 0.33, "learning_rate": 6.731640027588689e-06, "loss": 0.7118, "step": 32700 }, { "epoch": 0.33, "learning_rate": 6.7216441259083785e-06, "loss": 0.7072, "step": 32800 }, { "epoch": 0.33, "learning_rate": 6.711648224228067e-06, "loss": 0.7174, "step": 32900 }, { "epoch": 0.33, "learning_rate": 6.701652322547756e-06, "loss": 0.7125, "step": 33000 }, { "epoch": 0.33, "learning_rate": 6.691656420867445e-06, "loss": 0.705, "step": 33100 }, { "epoch": 0.33, "learning_rate": 6.6816605191871345e-06, "loss": 0.7023, "step": 33200 }, { "epoch": 0.33, "learning_rate": 6.671664617506822e-06, "loss": 0.7114, "step": 33300 }, { "epoch": 0.33, "learning_rate": 6.661668715826511e-06, "loss": 0.7038, "step": 33400 }, { "epoch": 0.33, "learning_rate": 6.6516728141462005e-06, "loss": 0.7051, "step": 33500 }, { "epoch": 0.34, "learning_rate": 6.64167691246589e-06, "loss": 0.6988, "step": 33600 }, { "epoch": 0.34, "learning_rate": 6.631681010785579e-06, "loss": 0.7131, "step": 33700 }, { "epoch": 0.34, "learning_rate": 6.621685109105267e-06, "loss": 0.6983, "step": 33800 }, { "epoch": 0.34, "learning_rate": 6.6116892074249565e-06, "loss": 0.6978, "step": 33900 }, { "epoch": 0.34, "learning_rate": 6.601693305744645e-06, "loss": 0.6941, "step": 34000 }, { "epoch": 0.34, "learning_rate": 6.591697404064334e-06, "loss": 0.7028, "step": 34100 }, { "epoch": 0.34, "learning_rate": 6.581701502384023e-06, "loss": 0.6925, "step": 34200 }, { "epoch": 0.34, "learning_rate": 6.571705600703712e-06, "loss": 0.6993, "step": 34300 }, { "epoch": 0.34, "learning_rate": 6.561709699023401e-06, "loss": 0.6894, "step": 34400 }, { "epoch": 0.34, "learning_rate": 6.55171379734309e-06, "loss": 0.7054, "step": 34500 }, { "epoch": 0.35, "learning_rate": 6.541717895662779e-06, "loss": 0.708, "step": 34600 }, { "epoch": 0.35, "learning_rate": 6.531721993982467e-06, "loss": 0.6977, "step": 34700 }, { "epoch": 0.35, "learning_rate": 6.521726092302156e-06, "loss": 0.6997, "step": 34800 }, { "epoch": 0.35, "learning_rate": 6.511730190621845e-06, "loss": 0.6984, "step": 34900 }, { "epoch": 0.35, "learning_rate": 6.5017342889415345e-06, "loss": 0.6955, "step": 35000 }, { "epoch": 0.35, "learning_rate": 6.491738387261224e-06, "loss": 0.6885, "step": 35100 }, { "epoch": 0.35, "learning_rate": 6.481742485580912e-06, "loss": 0.6926, "step": 35200 }, { "epoch": 0.35, "learning_rate": 6.471746583900601e-06, "loss": 0.6927, "step": 35300 }, { "epoch": 0.35, "learning_rate": 6.4617506822202906e-06, "loss": 0.6877, "step": 35400 }, { "epoch": 0.35, "learning_rate": 6.45175478053998e-06, "loss": 0.6911, "step": 35500 }, { "epoch": 0.36, "learning_rate": 6.441758878859668e-06, "loss": 0.6959, "step": 35600 }, { "epoch": 0.36, "learning_rate": 6.4317629771793565e-06, "loss": 0.6741, "step": 35700 }, { "epoch": 0.36, "learning_rate": 6.421767075499046e-06, "loss": 0.6853, "step": 35800 }, { "epoch": 0.36, "learning_rate": 6.411771173818735e-06, "loss": 0.6892, "step": 35900 }, { "epoch": 0.36, "learning_rate": 6.401775272138424e-06, "loss": 0.6864, "step": 36000 }, { "epoch": 0.36, "learning_rate": 6.3917793704581125e-06, "loss": 0.6843, "step": 36100 }, { "epoch": 0.36, "learning_rate": 6.381783468777801e-06, "loss": 0.6959, "step": 36200 }, { "epoch": 0.36, "learning_rate": 6.37178756709749e-06, "loss": 0.6783, "step": 36300 }, { "epoch": 0.36, "learning_rate": 6.361791665417179e-06, "loss": 0.6741, "step": 36400 }, { "epoch": 0.36, "learning_rate": 6.3517957637368686e-06, "loss": 0.6771, "step": 36500 }, { "epoch": 0.37, "learning_rate": 6.341799862056557e-06, "loss": 0.6699, "step": 36600 }, { "epoch": 0.37, "learning_rate": 6.331803960376246e-06, "loss": 0.684, "step": 36700 }, { "epoch": 0.37, "learning_rate": 6.321808058695935e-06, "loss": 0.675, "step": 36800 }, { "epoch": 0.37, "learning_rate": 6.311812157015625e-06, "loss": 0.6857, "step": 36900 }, { "epoch": 0.37, "learning_rate": 6.301816255335314e-06, "loss": 0.6782, "step": 37000 }, { "epoch": 0.37, "learning_rate": 6.291820353655001e-06, "loss": 0.6692, "step": 37100 }, { "epoch": 0.37, "learning_rate": 6.2818244519746906e-06, "loss": 0.6789, "step": 37200 }, { "epoch": 0.37, "learning_rate": 6.27182855029438e-06, "loss": 0.6692, "step": 37300 }, { "epoch": 0.37, "learning_rate": 6.261832648614069e-06, "loss": 0.668, "step": 37400 }, { "epoch": 0.37, "learning_rate": 6.251836746933758e-06, "loss": 0.6657, "step": 37500 }, { "epoch": 0.38, "learning_rate": 6.241840845253447e-06, "loss": 0.6699, "step": 37600 }, { "epoch": 0.38, "learning_rate": 6.231844943573136e-06, "loss": 0.6695, "step": 37700 }, { "epoch": 0.38, "learning_rate": 6.221849041892824e-06, "loss": 0.6613, "step": 37800 }, { "epoch": 0.38, "learning_rate": 6.211853140212513e-06, "loss": 0.6711, "step": 37900 }, { "epoch": 0.38, "learning_rate": 6.201857238532202e-06, "loss": 0.6647, "step": 38000 }, { "epoch": 0.38, "learning_rate": 6.191861336851891e-06, "loss": 0.6625, "step": 38100 }, { "epoch": 0.38, "learning_rate": 6.18186543517158e-06, "loss": 0.6593, "step": 38200 }, { "epoch": 0.38, "learning_rate": 6.1718695334912694e-06, "loss": 0.6718, "step": 38300 }, { "epoch": 0.38, "learning_rate": 6.161873631810959e-06, "loss": 0.6642, "step": 38400 }, { "epoch": 0.38, "learning_rate": 6.151877730130646e-06, "loss": 0.6564, "step": 38500 }, { "epoch": 0.39, "learning_rate": 6.141881828450335e-06, "loss": 0.6584, "step": 38600 }, { "epoch": 0.39, "learning_rate": 6.131885926770025e-06, "loss": 0.6569, "step": 38700 }, { "epoch": 0.39, "learning_rate": 6.121890025089714e-06, "loss": 0.6643, "step": 38800 }, { "epoch": 0.39, "learning_rate": 6.111894123409403e-06, "loss": 0.6603, "step": 38900 }, { "epoch": 0.39, "learning_rate": 6.101898221729091e-06, "loss": 0.6656, "step": 39000 }, { "epoch": 0.39, "learning_rate": 6.091902320048781e-06, "loss": 0.664, "step": 39100 }, { "epoch": 0.39, "learning_rate": 6.08190641836847e-06, "loss": 0.6555, "step": 39200 }, { "epoch": 0.39, "learning_rate": 6.071910516688159e-06, "loss": 0.6534, "step": 39300 }, { "epoch": 0.39, "learning_rate": 6.061914615007847e-06, "loss": 0.6555, "step": 39400 }, { "epoch": 0.39, "learning_rate": 6.051918713327536e-06, "loss": 0.6525, "step": 39500 }, { "epoch": 0.4, "learning_rate": 6.041922811647225e-06, "loss": 0.6548, "step": 39600 }, { "epoch": 0.4, "learning_rate": 6.031926909966914e-06, "loss": 0.6439, "step": 39700 }, { "epoch": 0.4, "learning_rate": 6.0219310082866035e-06, "loss": 0.6604, "step": 39800 }, { "epoch": 0.4, "learning_rate": 6.011935106606291e-06, "loss": 0.6473, "step": 39900 }, { "epoch": 0.4, "learning_rate": 6.00193920492598e-06, "loss": 0.6473, "step": 40000 }, { "epoch": 0.4, "learning_rate": 5.9919433032456694e-06, "loss": 0.6405, "step": 40100 }, { "epoch": 0.4, "learning_rate": 5.981947401565359e-06, "loss": 0.6438, "step": 40200 }, { "epoch": 0.4, "learning_rate": 5.971951499885048e-06, "loss": 0.6455, "step": 40300 }, { "epoch": 0.4, "learning_rate": 5.961955598204736e-06, "loss": 0.6465, "step": 40400 }, { "epoch": 0.4, "learning_rate": 5.9519596965244255e-06, "loss": 0.6487, "step": 40500 }, { "epoch": 0.41, "learning_rate": 5.941963794844115e-06, "loss": 0.6455, "step": 40600 }, { "epoch": 0.41, "learning_rate": 5.931967893163804e-06, "loss": 0.6373, "step": 40700 }, { "epoch": 0.41, "learning_rate": 5.921971991483493e-06, "loss": 0.6366, "step": 40800 }, { "epoch": 0.41, "learning_rate": 5.911976089803181e-06, "loss": 0.6443, "step": 40900 }, { "epoch": 0.41, "learning_rate": 5.90198018812287e-06, "loss": 0.6314, "step": 41000 }, { "epoch": 0.41, "learning_rate": 5.891984286442559e-06, "loss": 0.6372, "step": 41100 }, { "epoch": 0.41, "learning_rate": 5.881988384762248e-06, "loss": 0.6376, "step": 41200 }, { "epoch": 0.41, "learning_rate": 5.871992483081937e-06, "loss": 0.6455, "step": 41300 }, { "epoch": 0.41, "learning_rate": 5.861996581401626e-06, "loss": 0.6482, "step": 41400 }, { "epoch": 0.41, "learning_rate": 5.852000679721314e-06, "loss": 0.6345, "step": 41500 }, { "epoch": 0.42, "learning_rate": 5.8420047780410035e-06, "loss": 0.6328, "step": 41600 }, { "epoch": 0.42, "learning_rate": 5.832008876360693e-06, "loss": 0.627, "step": 41700 }, { "epoch": 0.42, "learning_rate": 5.822012974680381e-06, "loss": 0.6328, "step": 41800 }, { "epoch": 0.42, "learning_rate": 5.81201707300007e-06, "loss": 0.6312, "step": 41900 }, { "epoch": 0.42, "learning_rate": 5.8020211713197595e-06, "loss": 0.6348, "step": 42000 }, { "epoch": 0.42, "learning_rate": 5.792025269639449e-06, "loss": 0.6356, "step": 42100 }, { "epoch": 0.42, "learning_rate": 5.782029367959138e-06, "loss": 0.6383, "step": 42200 }, { "epoch": 0.42, "learning_rate": 5.7720334662788255e-06, "loss": 0.6286, "step": 42300 }, { "epoch": 0.42, "learning_rate": 5.762037564598515e-06, "loss": 0.6327, "step": 42400 }, { "epoch": 0.42, "learning_rate": 5.752041662918204e-06, "loss": 0.6379, "step": 42500 }, { "epoch": 0.43, "learning_rate": 5.742045761237893e-06, "loss": 0.6377, "step": 42600 }, { "epoch": 0.43, "learning_rate": 5.7320498595575815e-06, "loss": 0.6294, "step": 42700 }, { "epoch": 0.43, "learning_rate": 5.722053957877271e-06, "loss": 0.614, "step": 42800 }, { "epoch": 0.43, "learning_rate": 5.71205805619696e-06, "loss": 0.6243, "step": 42900 }, { "epoch": 0.43, "learning_rate": 5.702062154516649e-06, "loss": 0.6283, "step": 43000 }, { "epoch": 0.43, "learning_rate": 5.6920662528363375e-06, "loss": 0.6191, "step": 43100 }, { "epoch": 0.43, "learning_rate": 5.682070351156026e-06, "loss": 0.6263, "step": 43200 }, { "epoch": 0.43, "learning_rate": 5.672074449475715e-06, "loss": 0.6143, "step": 43300 }, { "epoch": 0.43, "learning_rate": 5.662078547795404e-06, "loss": 0.6165, "step": 43400 }, { "epoch": 0.43, "learning_rate": 5.6520826461150936e-06, "loss": 0.6137, "step": 43500 }, { "epoch": 0.44, "learning_rate": 5.642086744434783e-06, "loss": 0.6217, "step": 43600 }, { "epoch": 0.44, "learning_rate": 5.63209084275447e-06, "loss": 0.6211, "step": 43700 }, { "epoch": 0.44, "learning_rate": 5.6220949410741595e-06, "loss": 0.6216, "step": 43800 }, { "epoch": 0.44, "learning_rate": 5.612099039393849e-06, "loss": 0.6126, "step": 43900 }, { "epoch": 0.44, "learning_rate": 5.602103137713538e-06, "loss": 0.6142, "step": 44000 }, { "epoch": 0.44, "learning_rate": 5.592107236033227e-06, "loss": 0.6099, "step": 44100 }, { "epoch": 0.44, "learning_rate": 5.5821113343529156e-06, "loss": 0.6162, "step": 44200 }, { "epoch": 0.44, "learning_rate": 5.572115432672605e-06, "loss": 0.6153, "step": 44300 }, { "epoch": 0.44, "learning_rate": 5.562119530992294e-06, "loss": 0.6074, "step": 44400 }, { "epoch": 0.44, "learning_rate": 5.552123629311983e-06, "loss": 0.6148, "step": 44500 }, { "epoch": 0.45, "learning_rate": 5.542127727631671e-06, "loss": 0.6129, "step": 44600 }, { "epoch": 0.45, "learning_rate": 5.53213182595136e-06, "loss": 0.6148, "step": 44700 }, { "epoch": 0.45, "learning_rate": 5.522135924271049e-06, "loss": 0.6004, "step": 44800 }, { "epoch": 0.45, "learning_rate": 5.512140022590738e-06, "loss": 0.6047, "step": 44900 }, { "epoch": 0.45, "learning_rate": 5.502144120910428e-06, "loss": 0.6136, "step": 45000 }, { "epoch": 0.45, "learning_rate": 5.492148219230116e-06, "loss": 0.6016, "step": 45100 }, { "epoch": 0.45, "learning_rate": 5.482152317549805e-06, "loss": 0.6083, "step": 45200 }, { "epoch": 0.45, "learning_rate": 5.472156415869494e-06, "loss": 0.6135, "step": 45300 }, { "epoch": 0.45, "learning_rate": 5.462160514189183e-06, "loss": 0.6077, "step": 45400 }, { "epoch": 0.45, "learning_rate": 5.452164612508872e-06, "loss": 0.6039, "step": 45500 }, { "epoch": 0.46, "learning_rate": 5.44216871082856e-06, "loss": 0.6045, "step": 45600 }, { "epoch": 0.46, "learning_rate": 5.43217280914825e-06, "loss": 0.6093, "step": 45700 }, { "epoch": 0.46, "learning_rate": 5.422176907467939e-06, "loss": 0.6107, "step": 45800 }, { "epoch": 0.46, "learning_rate": 5.412181005787628e-06, "loss": 0.6076, "step": 45900 }, { "epoch": 0.46, "learning_rate": 5.4021851041073156e-06, "loss": 0.5965, "step": 46000 }, { "epoch": 0.46, "learning_rate": 5.392189202427005e-06, "loss": 0.6015, "step": 46100 }, { "epoch": 0.46, "learning_rate": 5.382193300746694e-06, "loss": 0.6067, "step": 46200 }, { "epoch": 0.46, "learning_rate": 5.372197399066383e-06, "loss": 0.5977, "step": 46300 }, { "epoch": 0.46, "learning_rate": 5.3622014973860724e-06, "loss": 0.5941, "step": 46400 }, { "epoch": 0.46, "learning_rate": 5.352205595705761e-06, "loss": 0.6005, "step": 46500 }, { "epoch": 0.47, "learning_rate": 5.34220969402545e-06, "loss": 0.5917, "step": 46600 }, { "epoch": 0.47, "learning_rate": 5.332213792345139e-06, "loss": 0.5976, "step": 46700 }, { "epoch": 0.47, "learning_rate": 5.3222178906648285e-06, "loss": 0.6022, "step": 46800 }, { "epoch": 0.47, "learning_rate": 5.312221988984517e-06, "loss": 0.5941, "step": 46900 }, { "epoch": 0.47, "learning_rate": 5.302226087304205e-06, "loss": 0.5894, "step": 47000 }, { "epoch": 0.47, "learning_rate": 5.2922301856238944e-06, "loss": 0.5959, "step": 47100 }, { "epoch": 0.47, "learning_rate": 5.282234283943584e-06, "loss": 0.6019, "step": 47200 }, { "epoch": 0.47, "learning_rate": 5.272238382263273e-06, "loss": 0.5858, "step": 47300 }, { "epoch": 0.47, "learning_rate": 5.262242480582962e-06, "loss": 0.5898, "step": 47400 }, { "epoch": 0.47, "learning_rate": 5.25224657890265e-06, "loss": 0.5963, "step": 47500 }, { "epoch": 0.48, "learning_rate": 5.242250677222339e-06, "loss": 0.5891, "step": 47600 }, { "epoch": 0.48, "learning_rate": 5.232254775542028e-06, "loss": 0.5905, "step": 47700 }, { "epoch": 0.48, "learning_rate": 5.222258873861717e-06, "loss": 0.5944, "step": 47800 }, { "epoch": 0.48, "learning_rate": 5.212262972181406e-06, "loss": 0.585, "step": 47900 }, { "epoch": 0.48, "learning_rate": 5.202267070501095e-06, "loss": 0.5844, "step": 48000 }, { "epoch": 0.48, "learning_rate": 5.192271168820784e-06, "loss": 0.586, "step": 48100 }, { "epoch": 0.48, "learning_rate": 5.182275267140473e-06, "loss": 0.5918, "step": 48200 }, { "epoch": 0.48, "learning_rate": 5.1722793654601625e-06, "loss": 0.5756, "step": 48300 }, { "epoch": 0.48, "learning_rate": 5.16228346377985e-06, "loss": 0.5863, "step": 48400 }, { "epoch": 0.48, "learning_rate": 5.152287562099539e-06, "loss": 0.5903, "step": 48500 }, { "epoch": 0.49, "learning_rate": 5.1422916604192285e-06, "loss": 0.5843, "step": 48600 }, { "epoch": 0.49, "learning_rate": 5.132295758738918e-06, "loss": 0.5743, "step": 48700 }, { "epoch": 0.49, "learning_rate": 5.122299857058607e-06, "loss": 0.5789, "step": 48800 }, { "epoch": 0.49, "learning_rate": 5.112303955378295e-06, "loss": 0.5777, "step": 48900 }, { "epoch": 0.49, "learning_rate": 5.1023080536979845e-06, "loss": 0.5762, "step": 49000 }, { "epoch": 0.49, "learning_rate": 5.092312152017673e-06, "loss": 0.5751, "step": 49100 }, { "epoch": 0.49, "learning_rate": 5.082316250337362e-06, "loss": 0.5698, "step": 49200 }, { "epoch": 0.49, "learning_rate": 5.0723203486570505e-06, "loss": 0.5774, "step": 49300 }, { "epoch": 0.49, "learning_rate": 5.06232444697674e-06, "loss": 0.5868, "step": 49400 }, { "epoch": 0.49, "learning_rate": 5.052328545296429e-06, "loss": 0.5693, "step": 49500 }, { "epoch": 0.5, "learning_rate": 5.042332643616118e-06, "loss": 0.5721, "step": 49600 }, { "epoch": 0.5, "learning_rate": 5.032336741935807e-06, "loss": 0.5667, "step": 49700 }, { "epoch": 0.5, "learning_rate": 5.022340840255495e-06, "loss": 0.5655, "step": 49800 }, { "epoch": 0.5, "learning_rate": 5.012344938575184e-06, "loss": 0.572, "step": 49900 }, { "epoch": 0.5, "learning_rate": 5.002349036894873e-06, "loss": 0.5827, "step": 50000 }, { "epoch": 0.5, "learning_rate": 4.9923531352145625e-06, "loss": 0.5666, "step": 50100 }, { "epoch": 0.5, "learning_rate": 4.982357233534251e-06, "loss": 0.569, "step": 50200 }, { "epoch": 0.5, "learning_rate": 4.97236133185394e-06, "loss": 0.5689, "step": 50300 }, { "epoch": 0.5, "learning_rate": 4.962365430173629e-06, "loss": 0.5698, "step": 50400 }, { "epoch": 0.5, "learning_rate": 4.9523695284933186e-06, "loss": 0.5726, "step": 50500 }, { "epoch": 0.51, "learning_rate": 4.942373626813007e-06, "loss": 0.575, "step": 50600 }, { "epoch": 0.51, "learning_rate": 4.932377725132696e-06, "loss": 0.5653, "step": 50700 }, { "epoch": 0.51, "learning_rate": 4.922381823452385e-06, "loss": 0.571, "step": 50800 }, { "epoch": 0.51, "learning_rate": 4.912385921772074e-06, "loss": 0.5665, "step": 50900 }, { "epoch": 0.51, "learning_rate": 4.902390020091763e-06, "loss": 0.5631, "step": 51000 }, { "epoch": 0.51, "learning_rate": 4.892394118411451e-06, "loss": 0.5666, "step": 51100 }, { "epoch": 0.51, "learning_rate": 4.8823982167311406e-06, "loss": 0.5605, "step": 51200 }, { "epoch": 0.51, "learning_rate": 4.87240231505083e-06, "loss": 0.5664, "step": 51300 }, { "epoch": 0.51, "learning_rate": 4.862406413370518e-06, "loss": 0.5583, "step": 51400 }, { "epoch": 0.51, "learning_rate": 4.852410511690207e-06, "loss": 0.5577, "step": 51500 }, { "epoch": 0.52, "learning_rate": 4.842414610009897e-06, "loss": 0.559, "step": 51600 }, { "epoch": 0.52, "learning_rate": 4.832418708329585e-06, "loss": 0.5554, "step": 51700 }, { "epoch": 0.52, "learning_rate": 4.822422806649274e-06, "loss": 0.5578, "step": 51800 }, { "epoch": 0.52, "learning_rate": 4.812426904968963e-06, "loss": 0.5585, "step": 51900 }, { "epoch": 0.52, "learning_rate": 4.802431003288653e-06, "loss": 0.5555, "step": 52000 }, { "epoch": 0.52, "learning_rate": 4.792435101608341e-06, "loss": 0.5559, "step": 52100 }, { "epoch": 0.52, "learning_rate": 4.78243919992803e-06, "loss": 0.5547, "step": 52200 }, { "epoch": 0.52, "learning_rate": 4.7724432982477186e-06, "loss": 0.5612, "step": 52300 }, { "epoch": 0.52, "learning_rate": 4.762447396567408e-06, "loss": 0.5531, "step": 52400 }, { "epoch": 0.52, "learning_rate": 4.752451494887096e-06, "loss": 0.5511, "step": 52500 }, { "epoch": 0.53, "learning_rate": 4.742455593206785e-06, "loss": 0.5535, "step": 52600 }, { "epoch": 0.53, "learning_rate": 4.732459691526475e-06, "loss": 0.5539, "step": 52700 }, { "epoch": 0.53, "learning_rate": 4.722463789846163e-06, "loss": 0.549, "step": 52800 }, { "epoch": 0.53, "learning_rate": 4.712467888165852e-06, "loss": 0.5489, "step": 52900 }, { "epoch": 0.53, "learning_rate": 4.702471986485541e-06, "loss": 0.5489, "step": 53000 }, { "epoch": 0.53, "learning_rate": 4.692476084805231e-06, "loss": 0.5561, "step": 53100 }, { "epoch": 0.53, "learning_rate": 4.682480183124919e-06, "loss": 0.5433, "step": 53200 }, { "epoch": 0.53, "learning_rate": 4.672484281444608e-06, "loss": 0.5429, "step": 53300 }, { "epoch": 0.53, "learning_rate": 4.6624883797642974e-06, "loss": 0.5369, "step": 53400 }, { "epoch": 0.53, "learning_rate": 4.652492478083986e-06, "loss": 0.5521, "step": 53500 }, { "epoch": 0.54, "learning_rate": 4.642496576403675e-06, "loss": 0.5414, "step": 53600 }, { "epoch": 0.54, "learning_rate": 4.632500674723363e-06, "loss": 0.5415, "step": 53700 }, { "epoch": 0.54, "learning_rate": 4.622504773043053e-06, "loss": 0.5442, "step": 53800 }, { "epoch": 0.54, "learning_rate": 4.612508871362741e-06, "loss": 0.5392, "step": 53900 }, { "epoch": 0.54, "learning_rate": 4.60251296968243e-06, "loss": 0.5385, "step": 54000 }, { "epoch": 0.54, "learning_rate": 4.5925170680021194e-06, "loss": 0.5385, "step": 54100 }, { "epoch": 0.54, "learning_rate": 4.582521166321809e-06, "loss": 0.5348, "step": 54200 }, { "epoch": 0.54, "learning_rate": 4.572525264641498e-06, "loss": 0.5387, "step": 54300 }, { "epoch": 0.54, "learning_rate": 4.562529362961186e-06, "loss": 0.536, "step": 54400 }, { "epoch": 0.54, "learning_rate": 4.5525334612808755e-06, "loss": 0.5332, "step": 54500 }, { "epoch": 0.55, "learning_rate": 4.542537559600565e-06, "loss": 0.5299, "step": 54600 }, { "epoch": 0.55, "learning_rate": 4.532541657920253e-06, "loss": 0.5403, "step": 54700 }, { "epoch": 0.55, "learning_rate": 4.522545756239942e-06, "loss": 0.5365, "step": 54800 }, { "epoch": 0.55, "learning_rate": 4.512549854559631e-06, "loss": 0.534, "step": 54900 }, { "epoch": 0.55, "learning_rate": 4.50255395287932e-06, "loss": 0.5358, "step": 55000 }, { "epoch": 0.55, "learning_rate": 4.492558051199008e-06, "loss": 0.5323, "step": 55100 }, { "epoch": 0.55, "learning_rate": 4.4825621495186975e-06, "loss": 0.5297, "step": 55200 }, { "epoch": 0.55, "learning_rate": 4.472566247838387e-06, "loss": 0.5333, "step": 55300 }, { "epoch": 0.55, "learning_rate": 4.462570346158076e-06, "loss": 0.5285, "step": 55400 }, { "epoch": 0.55, "learning_rate": 4.452574444477764e-06, "loss": 0.5328, "step": 55500 }, { "epoch": 0.56, "learning_rate": 4.4425785427974535e-06, "loss": 0.5395, "step": 55600 }, { "epoch": 0.56, "learning_rate": 4.432582641117143e-06, "loss": 0.5259, "step": 55700 }, { "epoch": 0.56, "learning_rate": 4.422586739436831e-06, "loss": 0.5231, "step": 55800 }, { "epoch": 0.56, "learning_rate": 4.41259083775652e-06, "loss": 0.5244, "step": 55900 }, { "epoch": 0.56, "learning_rate": 4.4025949360762095e-06, "loss": 0.5202, "step": 56000 }, { "epoch": 0.56, "learning_rate": 4.392599034395898e-06, "loss": 0.5236, "step": 56100 }, { "epoch": 0.56, "learning_rate": 4.382603132715587e-06, "loss": 0.5271, "step": 56200 }, { "epoch": 0.56, "learning_rate": 4.3726072310352755e-06, "loss": 0.518, "step": 56300 }, { "epoch": 0.56, "learning_rate": 4.362611329354965e-06, "loss": 0.5178, "step": 56400 }, { "epoch": 0.56, "learning_rate": 4.352615427674654e-06, "loss": 0.5288, "step": 56500 }, { "epoch": 0.57, "learning_rate": 4.342619525994342e-06, "loss": 0.5158, "step": 56600 }, { "epoch": 0.57, "learning_rate": 4.3326236243140315e-06, "loss": 0.5155, "step": 56700 }, { "epoch": 0.57, "learning_rate": 4.322627722633721e-06, "loss": 0.5211, "step": 56800 }, { "epoch": 0.57, "learning_rate": 4.31263182095341e-06, "loss": 0.5161, "step": 56900 }, { "epoch": 0.57, "learning_rate": 4.302635919273098e-06, "loss": 0.526, "step": 57000 }, { "epoch": 0.57, "learning_rate": 4.2926400175927875e-06, "loss": 0.5179, "step": 57100 }, { "epoch": 0.57, "learning_rate": 4.282644115912476e-06, "loss": 0.5239, "step": 57200 }, { "epoch": 0.57, "learning_rate": 4.272648214232165e-06, "loss": 0.5117, "step": 57300 }, { "epoch": 0.57, "learning_rate": 4.262652312551854e-06, "loss": 0.5185, "step": 57400 }, { "epoch": 0.57, "learning_rate": 4.252656410871543e-06, "loss": 0.5149, "step": 57500 }, { "epoch": 0.58, "learning_rate": 4.242660509191232e-06, "loss": 0.5135, "step": 57600 }, { "epoch": 0.58, "learning_rate": 4.23266460751092e-06, "loss": 0.5147, "step": 57700 }, { "epoch": 0.58, "learning_rate": 4.2226687058306095e-06, "loss": 0.5162, "step": 57800 }, { "epoch": 0.58, "learning_rate": 4.212672804150299e-06, "loss": 0.5109, "step": 57900 }, { "epoch": 0.58, "learning_rate": 4.202676902469988e-06, "loss": 0.5093, "step": 58000 }, { "epoch": 0.58, "learning_rate": 4.192681000789677e-06, "loss": 0.5135, "step": 58100 }, { "epoch": 0.58, "learning_rate": 4.1826850991093655e-06, "loss": 0.5058, "step": 58200 }, { "epoch": 0.58, "learning_rate": 4.172689197429055e-06, "loss": 0.5082, "step": 58300 }, { "epoch": 0.58, "learning_rate": 4.162693295748743e-06, "loss": 0.5122, "step": 58400 }, { "epoch": 0.58, "learning_rate": 4.152697394068432e-06, "loss": 0.5127, "step": 58500 }, { "epoch": 0.59, "learning_rate": 4.142701492388122e-06, "loss": 0.505, "step": 58600 }, { "epoch": 0.59, "learning_rate": 4.13270559070781e-06, "loss": 0.5058, "step": 58700 }, { "epoch": 0.59, "learning_rate": 4.122709689027499e-06, "loss": 0.5049, "step": 58800 }, { "epoch": 0.59, "learning_rate": 4.1127137873471875e-06, "loss": 0.5055, "step": 58900 }, { "epoch": 0.59, "learning_rate": 4.102717885666877e-06, "loss": 0.5061, "step": 59000 }, { "epoch": 0.59, "learning_rate": 4.092721983986566e-06, "loss": 0.5015, "step": 59100 }, { "epoch": 0.59, "learning_rate": 4.082726082306255e-06, "loss": 0.5052, "step": 59200 }, { "epoch": 0.59, "learning_rate": 4.0727301806259436e-06, "loss": 0.5028, "step": 59300 }, { "epoch": 0.59, "learning_rate": 4.062734278945633e-06, "loss": 0.504, "step": 59400 }, { "epoch": 0.59, "learning_rate": 4.052738377265322e-06, "loss": 0.5079, "step": 59500 }, { "epoch": 0.6, "learning_rate": 4.04274247558501e-06, "loss": 0.4977, "step": 59600 }, { "epoch": 0.6, "learning_rate": 4.0327465739047e-06, "loss": 0.4993, "step": 59700 }, { "epoch": 0.6, "learning_rate": 4.022750672224388e-06, "loss": 0.4933, "step": 59800 }, { "epoch": 0.6, "learning_rate": 4.012754770544077e-06, "loss": 0.4999, "step": 59900 }, { "epoch": 0.6, "learning_rate": 4.002758868863766e-06, "loss": 0.4964, "step": 60000 }, { "epoch": 0.6, "learning_rate": 3.992762967183455e-06, "loss": 0.5002, "step": 60100 }, { "epoch": 0.6, "learning_rate": 3.982767065503144e-06, "loss": 0.4923, "step": 60200 }, { "epoch": 0.6, "learning_rate": 3.972771163822832e-06, "loss": 0.4876, "step": 60300 }, { "epoch": 0.6, "learning_rate": 3.962775262142522e-06, "loss": 0.4963, "step": 60400 }, { "epoch": 0.6, "learning_rate": 3.952779360462211e-06, "loss": 0.5016, "step": 60500 }, { "epoch": 0.61, "learning_rate": 3.9427834587819e-06, "loss": 0.497, "step": 60600 }, { "epoch": 0.61, "learning_rate": 3.932787557101589e-06, "loss": 0.4883, "step": 60700 }, { "epoch": 0.61, "learning_rate": 3.922791655421278e-06, "loss": 0.4895, "step": 60800 }, { "epoch": 0.61, "learning_rate": 3.912795753740967e-06, "loss": 0.4914, "step": 60900 }, { "epoch": 0.61, "learning_rate": 3.902799852060655e-06, "loss": 0.4935, "step": 61000 }, { "epoch": 0.61, "learning_rate": 3.8928039503803444e-06, "loss": 0.4904, "step": 61100 }, { "epoch": 0.61, "learning_rate": 3.882808048700033e-06, "loss": 0.493, "step": 61200 }, { "epoch": 0.61, "learning_rate": 3.872812147019722e-06, "loss": 0.4843, "step": 61300 }, { "epoch": 0.61, "learning_rate": 3.862816245339411e-06, "loss": 0.4867, "step": 61400 }, { "epoch": 0.61, "learning_rate": 3.8528203436591e-06, "loss": 0.4933, "step": 61500 }, { "epoch": 0.62, "learning_rate": 3.842824441978789e-06, "loss": 0.4823, "step": 61600 }, { "epoch": 0.62, "learning_rate": 3.832828540298478e-06, "loss": 0.4873, "step": 61700 }, { "epoch": 0.62, "learning_rate": 3.822832638618167e-06, "loss": 0.4841, "step": 61800 }, { "epoch": 0.62, "learning_rate": 3.812836736937856e-06, "loss": 0.4867, "step": 61900 }, { "epoch": 0.62, "learning_rate": 3.802840835257545e-06, "loss": 0.4837, "step": 62000 }, { "epoch": 0.62, "learning_rate": 3.7928449335772337e-06, "loss": 0.4861, "step": 62100 }, { "epoch": 0.62, "learning_rate": 3.7828490318969224e-06, "loss": 0.4799, "step": 62200 }, { "epoch": 0.62, "learning_rate": 3.7728531302166117e-06, "loss": 0.4803, "step": 62300 }, { "epoch": 0.62, "learning_rate": 3.7628572285363e-06, "loss": 0.4808, "step": 62400 }, { "epoch": 0.62, "learning_rate": 3.7528613268559893e-06, "loss": 0.4829, "step": 62500 }, { "epoch": 0.63, "learning_rate": 3.7428654251756785e-06, "loss": 0.4802, "step": 62600 }, { "epoch": 0.63, "learning_rate": 3.7328695234953673e-06, "loss": 0.4816, "step": 62700 }, { "epoch": 0.63, "learning_rate": 3.7228736218150565e-06, "loss": 0.4847, "step": 62800 }, { "epoch": 0.63, "learning_rate": 3.712877720134745e-06, "loss": 0.4831, "step": 62900 }, { "epoch": 0.63, "learning_rate": 3.702881818454434e-06, "loss": 0.4816, "step": 63000 }, { "epoch": 0.63, "learning_rate": 3.692885916774123e-06, "loss": 0.474, "step": 63100 }, { "epoch": 0.63, "learning_rate": 3.6828900150938117e-06, "loss": 0.479, "step": 63200 }, { "epoch": 0.63, "learning_rate": 3.672894113413501e-06, "loss": 0.4773, "step": 63300 }, { "epoch": 0.63, "learning_rate": 3.6628982117331897e-06, "loss": 0.4765, "step": 63400 }, { "epoch": 0.63, "learning_rate": 3.652902310052879e-06, "loss": 0.4813, "step": 63500 }, { "epoch": 0.64, "learning_rate": 3.6429064083725673e-06, "loss": 0.4786, "step": 63600 }, { "epoch": 0.64, "learning_rate": 3.6329105066922565e-06, "loss": 0.4815, "step": 63700 }, { "epoch": 0.64, "learning_rate": 3.6229146050119453e-06, "loss": 0.4771, "step": 63800 }, { "epoch": 0.64, "learning_rate": 3.6129187033316345e-06, "loss": 0.4769, "step": 63900 }, { "epoch": 0.64, "learning_rate": 3.6029228016513233e-06, "loss": 0.4739, "step": 64000 }, { "epoch": 0.64, "learning_rate": 3.592926899971012e-06, "loss": 0.4776, "step": 64100 }, { "epoch": 0.64, "learning_rate": 3.5829309982907013e-06, "loss": 0.4695, "step": 64200 }, { "epoch": 0.64, "learning_rate": 3.5729350966103897e-06, "loss": 0.4606, "step": 64300 }, { "epoch": 0.64, "learning_rate": 3.562939194930079e-06, "loss": 0.4716, "step": 64400 }, { "epoch": 0.64, "learning_rate": 3.5529432932497677e-06, "loss": 0.4677, "step": 64500 }, { "epoch": 0.65, "learning_rate": 3.542947391569457e-06, "loss": 0.4712, "step": 64600 }, { "epoch": 0.65, "learning_rate": 3.532951489889146e-06, "loss": 0.4682, "step": 64700 }, { "epoch": 0.65, "learning_rate": 3.5229555882088345e-06, "loss": 0.4672, "step": 64800 }, { "epoch": 0.65, "learning_rate": 3.5129596865285237e-06, "loss": 0.4691, "step": 64900 }, { "epoch": 0.65, "learning_rate": 3.502963784848212e-06, "loss": 0.4656, "step": 65000 }, { "epoch": 0.65, "learning_rate": 3.4929678831679013e-06, "loss": 0.4619, "step": 65100 }, { "epoch": 0.65, "learning_rate": 3.4829719814875905e-06, "loss": 0.4595, "step": 65200 }, { "epoch": 0.65, "learning_rate": 3.4729760798072793e-06, "loss": 0.4688, "step": 65300 }, { "epoch": 0.65, "learning_rate": 3.4629801781269686e-06, "loss": 0.4658, "step": 65400 }, { "epoch": 0.65, "learning_rate": 3.452984276446657e-06, "loss": 0.4598, "step": 65500 }, { "epoch": 0.66, "learning_rate": 3.442988374766346e-06, "loss": 0.4623, "step": 65600 }, { "epoch": 0.66, "learning_rate": 3.432992473086035e-06, "loss": 0.4558, "step": 65700 }, { "epoch": 0.66, "learning_rate": 3.4229965714057237e-06, "loss": 0.4675, "step": 65800 }, { "epoch": 0.66, "learning_rate": 3.413000669725413e-06, "loss": 0.4548, "step": 65900 }, { "epoch": 0.66, "learning_rate": 3.4030047680451018e-06, "loss": 0.4635, "step": 66000 }, { "epoch": 0.66, "learning_rate": 3.393008866364791e-06, "loss": 0.4551, "step": 66100 }, { "epoch": 0.66, "learning_rate": 3.3830129646844793e-06, "loss": 0.4619, "step": 66200 }, { "epoch": 0.66, "learning_rate": 3.3730170630041686e-06, "loss": 0.4581, "step": 66300 }, { "epoch": 0.66, "learning_rate": 3.3630211613238574e-06, "loss": 0.4606, "step": 66400 }, { "epoch": 0.66, "learning_rate": 3.3530252596435466e-06, "loss": 0.4521, "step": 66500 }, { "epoch": 0.67, "learning_rate": 3.3430293579632354e-06, "loss": 0.4602, "step": 66600 }, { "epoch": 0.67, "learning_rate": 3.333033456282924e-06, "loss": 0.4598, "step": 66700 }, { "epoch": 0.67, "learning_rate": 3.3230375546026134e-06, "loss": 0.4567, "step": 66800 }, { "epoch": 0.67, "learning_rate": 3.3130416529223018e-06, "loss": 0.4515, "step": 66900 }, { "epoch": 0.67, "learning_rate": 3.303045751241991e-06, "loss": 0.449, "step": 67000 }, { "epoch": 0.67, "learning_rate": 3.2930498495616798e-06, "loss": 0.4539, "step": 67100 }, { "epoch": 0.67, "learning_rate": 3.283053947881369e-06, "loss": 0.4501, "step": 67200 }, { "epoch": 0.67, "learning_rate": 3.2730580462010582e-06, "loss": 0.4509, "step": 67300 }, { "epoch": 0.67, "learning_rate": 3.2630621445207466e-06, "loss": 0.4487, "step": 67400 }, { "epoch": 0.67, "learning_rate": 3.253066242840436e-06, "loss": 0.4492, "step": 67500 }, { "epoch": 0.68, "learning_rate": 3.2430703411601246e-06, "loss": 0.4477, "step": 67600 }, { "epoch": 0.68, "learning_rate": 3.2330744394798134e-06, "loss": 0.4444, "step": 67700 }, { "epoch": 0.68, "learning_rate": 3.223078537799502e-06, "loss": 0.4431, "step": 67800 }, { "epoch": 0.68, "learning_rate": 3.2130826361191914e-06, "loss": 0.4444, "step": 67900 }, { "epoch": 0.68, "learning_rate": 3.2030867344388806e-06, "loss": 0.4446, "step": 68000 }, { "epoch": 0.68, "learning_rate": 3.193090832758569e-06, "loss": 0.4443, "step": 68100 }, { "epoch": 0.68, "learning_rate": 3.1830949310782582e-06, "loss": 0.4503, "step": 68200 }, { "epoch": 0.68, "learning_rate": 3.173099029397947e-06, "loss": 0.4501, "step": 68300 }, { "epoch": 0.68, "learning_rate": 3.1631031277176362e-06, "loss": 0.4432, "step": 68400 }, { "epoch": 0.68, "learning_rate": 3.153107226037325e-06, "loss": 0.4398, "step": 68500 }, { "epoch": 0.69, "learning_rate": 3.143111324357014e-06, "loss": 0.446, "step": 68600 }, { "epoch": 0.69, "learning_rate": 3.133115422676703e-06, "loss": 0.4467, "step": 68700 }, { "epoch": 0.69, "learning_rate": 3.1231195209963914e-06, "loss": 0.4381, "step": 68800 }, { "epoch": 0.69, "learning_rate": 3.1131236193160806e-06, "loss": 0.4365, "step": 68900 }, { "epoch": 0.69, "learning_rate": 3.1031277176357694e-06, "loss": 0.4557, "step": 69000 }, { "epoch": 0.69, "learning_rate": 3.0931318159554586e-06, "loss": 0.4466, "step": 69100 }, { "epoch": 0.69, "learning_rate": 3.083135914275148e-06, "loss": 0.4401, "step": 69200 }, { "epoch": 0.69, "learning_rate": 3.0731400125948362e-06, "loss": 0.4382, "step": 69300 }, { "epoch": 0.69, "learning_rate": 3.0631441109145255e-06, "loss": 0.4375, "step": 69400 }, { "epoch": 0.69, "learning_rate": 3.0531482092342143e-06, "loss": 0.4404, "step": 69500 }, { "epoch": 0.7, "learning_rate": 3.043152307553903e-06, "loss": 0.434, "step": 69600 }, { "epoch": 0.7, "learning_rate": 3.033156405873592e-06, "loss": 0.4341, "step": 69700 }, { "epoch": 0.7, "learning_rate": 3.023160504193281e-06, "loss": 0.4348, "step": 69800 }, { "epoch": 0.7, "learning_rate": 3.0131646025129703e-06, "loss": 0.4451, "step": 69900 }, { "epoch": 0.7, "learning_rate": 3.0031687008326587e-06, "loss": 0.44, "step": 70000 }, { "epoch": 0.7, "learning_rate": 2.993172799152348e-06, "loss": 0.439, "step": 70100 }, { "epoch": 0.7, "learning_rate": 2.9831768974720367e-06, "loss": 0.4355, "step": 70200 }, { "epoch": 0.7, "learning_rate": 2.973180995791726e-06, "loss": 0.4304, "step": 70300 }, { "epoch": 0.7, "learning_rate": 2.9631850941114143e-06, "loss": 0.4335, "step": 70400 }, { "epoch": 0.7, "learning_rate": 2.9531891924311035e-06, "loss": 0.4355, "step": 70500 }, { "epoch": 0.71, "learning_rate": 2.9431932907507927e-06, "loss": 0.4289, "step": 70600 }, { "epoch": 0.71, "learning_rate": 2.933197389070481e-06, "loss": 0.426, "step": 70700 }, { "epoch": 0.71, "learning_rate": 2.9232014873901703e-06, "loss": 0.4282, "step": 70800 }, { "epoch": 0.71, "learning_rate": 2.913205585709859e-06, "loss": 0.4296, "step": 70900 }, { "epoch": 0.71, "learning_rate": 2.9032096840295483e-06, "loss": 0.4262, "step": 71000 }, { "epoch": 0.71, "learning_rate": 2.8932137823492367e-06, "loss": 0.4289, "step": 71100 }, { "epoch": 0.71, "learning_rate": 2.883217880668926e-06, "loss": 0.4217, "step": 71200 }, { "epoch": 0.71, "learning_rate": 2.873221978988615e-06, "loss": 0.4215, "step": 71300 }, { "epoch": 0.71, "learning_rate": 2.863226077308304e-06, "loss": 0.4342, "step": 71400 }, { "epoch": 0.71, "learning_rate": 2.8532301756279927e-06, "loss": 0.4274, "step": 71500 }, { "epoch": 0.72, "learning_rate": 2.8432342739476815e-06, "loss": 0.422, "step": 71600 }, { "epoch": 0.72, "learning_rate": 2.8332383722673707e-06, "loss": 0.4217, "step": 71700 }, { "epoch": 0.72, "learning_rate": 2.82324247058706e-06, "loss": 0.425, "step": 71800 }, { "epoch": 0.72, "learning_rate": 2.8132465689067483e-06, "loss": 0.4239, "step": 71900 }, { "epoch": 0.72, "learning_rate": 2.8032506672264375e-06, "loss": 0.4246, "step": 72000 }, { "epoch": 0.72, "learning_rate": 2.7932547655461263e-06, "loss": 0.4259, "step": 72100 }, { "epoch": 0.72, "learning_rate": 2.7832588638658155e-06, "loss": 0.422, "step": 72200 }, { "epoch": 0.72, "learning_rate": 2.773262962185504e-06, "loss": 0.4205, "step": 72300 }, { "epoch": 0.72, "learning_rate": 2.763267060505193e-06, "loss": 0.4225, "step": 72400 }, { "epoch": 0.72, "learning_rate": 2.7532711588248823e-06, "loss": 0.4144, "step": 72500 }, { "epoch": 0.73, "learning_rate": 2.7432752571445707e-06, "loss": 0.4194, "step": 72600 }, { "epoch": 0.73, "learning_rate": 2.73327935546426e-06, "loss": 0.4251, "step": 72700 }, { "epoch": 0.73, "learning_rate": 2.7232834537839487e-06, "loss": 0.4178, "step": 72800 }, { "epoch": 0.73, "learning_rate": 2.713287552103638e-06, "loss": 0.4189, "step": 72900 }, { "epoch": 0.73, "learning_rate": 2.7032916504233263e-06, "loss": 0.4227, "step": 73000 }, { "epoch": 0.73, "learning_rate": 2.6932957487430155e-06, "loss": 0.4183, "step": 73100 }, { "epoch": 0.73, "learning_rate": 2.6832998470627048e-06, "loss": 0.4157, "step": 73200 }, { "epoch": 0.73, "learning_rate": 2.6733039453823936e-06, "loss": 0.421, "step": 73300 }, { "epoch": 0.73, "learning_rate": 2.6633080437020824e-06, "loss": 0.4197, "step": 73400 }, { "epoch": 0.73, "learning_rate": 2.653312142021771e-06, "loss": 0.4119, "step": 73500 }, { "epoch": 0.74, "learning_rate": 2.6433162403414604e-06, "loss": 0.4087, "step": 73600 }, { "epoch": 0.74, "learning_rate": 2.6333203386611487e-06, "loss": 0.4137, "step": 73700 }, { "epoch": 0.74, "learning_rate": 2.623324436980838e-06, "loss": 0.4124, "step": 73800 }, { "epoch": 0.74, "learning_rate": 2.613328535300527e-06, "loss": 0.4092, "step": 73900 }, { "epoch": 0.74, "learning_rate": 2.603332633620216e-06, "loss": 0.4101, "step": 74000 }, { "epoch": 0.74, "learning_rate": 2.593336731939905e-06, "loss": 0.4115, "step": 74100 }, { "epoch": 0.74, "learning_rate": 2.5833408302595936e-06, "loss": 0.4104, "step": 74200 }, { "epoch": 0.74, "learning_rate": 2.5733449285792828e-06, "loss": 0.409, "step": 74300 }, { "epoch": 0.74, "learning_rate": 2.563349026898971e-06, "loss": 0.415, "step": 74400 }, { "epoch": 0.74, "learning_rate": 2.5533531252186604e-06, "loss": 0.4013, "step": 74500 }, { "epoch": 0.75, "learning_rate": 2.5433572235383496e-06, "loss": 0.4008, "step": 74600 }, { "epoch": 0.75, "learning_rate": 2.5333613218580384e-06, "loss": 0.4131, "step": 74700 }, { "epoch": 0.75, "learning_rate": 2.5233654201777276e-06, "loss": 0.4123, "step": 74800 }, { "epoch": 0.75, "learning_rate": 2.513369518497416e-06, "loss": 0.4132, "step": 74900 }, { "epoch": 0.75, "learning_rate": 2.503373616817105e-06, "loss": 0.4111, "step": 75000 }, { "epoch": 0.75, "learning_rate": 2.493377715136794e-06, "loss": 0.4043, "step": 75100 }, { "epoch": 0.75, "learning_rate": 2.483381813456483e-06, "loss": 0.4106, "step": 75200 }, { "epoch": 0.75, "learning_rate": 2.473385911776172e-06, "loss": 0.4051, "step": 75300 }, { "epoch": 0.75, "learning_rate": 2.463390010095861e-06, "loss": 0.4004, "step": 75400 }, { "epoch": 0.75, "learning_rate": 2.45339410841555e-06, "loss": 0.4012, "step": 75500 }, { "epoch": 0.76, "learning_rate": 2.443398206735239e-06, "loss": 0.4039, "step": 75600 }, { "epoch": 0.76, "learning_rate": 2.4334023050549276e-06, "loss": 0.403, "step": 75700 }, { "epoch": 0.76, "learning_rate": 2.4234064033746164e-06, "loss": 0.4111, "step": 75800 }, { "epoch": 0.76, "learning_rate": 2.4134105016943056e-06, "loss": 0.3985, "step": 75900 }, { "epoch": 0.76, "learning_rate": 2.4034146000139944e-06, "loss": 0.4118, "step": 76000 }, { "epoch": 0.76, "learning_rate": 2.3934186983336836e-06, "loss": 0.397, "step": 76100 }, { "epoch": 0.76, "learning_rate": 2.3834227966533724e-06, "loss": 0.3907, "step": 76200 }, { "epoch": 0.76, "learning_rate": 2.3734268949730612e-06, "loss": 0.3998, "step": 76300 }, { "epoch": 0.76, "learning_rate": 2.36343099329275e-06, "loss": 0.3988, "step": 76400 }, { "epoch": 0.76, "learning_rate": 2.3534350916124392e-06, "loss": 0.3976, "step": 76500 }, { "epoch": 0.77, "learning_rate": 2.343439189932128e-06, "loss": 0.402, "step": 76600 }, { "epoch": 0.77, "learning_rate": 2.333443288251817e-06, "loss": 0.3991, "step": 76700 }, { "epoch": 0.77, "learning_rate": 2.323447386571506e-06, "loss": 0.402, "step": 76800 }, { "epoch": 0.77, "learning_rate": 2.313451484891195e-06, "loss": 0.3953, "step": 76900 }, { "epoch": 0.77, "learning_rate": 2.3034555832108836e-06, "loss": 0.3934, "step": 77000 }, { "epoch": 0.77, "learning_rate": 2.2934596815305724e-06, "loss": 0.3893, "step": 77100 }, { "epoch": 0.77, "learning_rate": 2.2834637798502617e-06, "loss": 0.4021, "step": 77200 }, { "epoch": 0.77, "learning_rate": 2.2734678781699505e-06, "loss": 0.3921, "step": 77300 }, { "epoch": 0.77, "learning_rate": 2.2634719764896397e-06, "loss": 0.3945, "step": 77400 }, { "epoch": 0.77, "learning_rate": 2.2534760748093285e-06, "loss": 0.3959, "step": 77500 }, { "epoch": 0.78, "learning_rate": 2.2434801731290173e-06, "loss": 0.3946, "step": 77600 }, { "epoch": 0.78, "learning_rate": 2.233484271448706e-06, "loss": 0.3946, "step": 77700 }, { "epoch": 0.78, "learning_rate": 2.2234883697683953e-06, "loss": 0.386, "step": 77800 }, { "epoch": 0.78, "learning_rate": 2.213492468088084e-06, "loss": 0.3997, "step": 77900 }, { "epoch": 0.78, "learning_rate": 2.203496566407773e-06, "loss": 0.3903, "step": 78000 }, { "epoch": 0.78, "learning_rate": 2.193500664727462e-06, "loss": 0.3926, "step": 78100 }, { "epoch": 0.78, "learning_rate": 2.183504763047151e-06, "loss": 0.3835, "step": 78200 }, { "epoch": 0.78, "learning_rate": 2.1735088613668397e-06, "loss": 0.3882, "step": 78300 }, { "epoch": 0.78, "learning_rate": 2.163512959686529e-06, "loss": 0.3914, "step": 78400 }, { "epoch": 0.78, "learning_rate": 2.1535170580062177e-06, "loss": 0.3881, "step": 78500 }, { "epoch": 0.79, "learning_rate": 2.1435211563259065e-06, "loss": 0.3884, "step": 78600 }, { "epoch": 0.79, "learning_rate": 2.1335252546455953e-06, "loss": 0.3876, "step": 78700 }, { "epoch": 0.79, "learning_rate": 2.1235293529652845e-06, "loss": 0.3857, "step": 78800 }, { "epoch": 0.79, "learning_rate": 2.1135334512849733e-06, "loss": 0.3794, "step": 78900 }, { "epoch": 0.79, "learning_rate": 2.103537549604662e-06, "loss": 0.3873, "step": 79000 }, { "epoch": 0.79, "learning_rate": 2.0935416479243513e-06, "loss": 0.3831, "step": 79100 }, { "epoch": 0.79, "learning_rate": 2.08354574624404e-06, "loss": 0.3805, "step": 79200 }, { "epoch": 0.79, "learning_rate": 2.073549844563729e-06, "loss": 0.3866, "step": 79300 }, { "epoch": 0.79, "learning_rate": 2.063553942883418e-06, "loss": 0.3887, "step": 79400 }, { "epoch": 0.79, "learning_rate": 2.053558041203107e-06, "loss": 0.3812, "step": 79500 }, { "epoch": 0.8, "learning_rate": 2.0435621395227957e-06, "loss": 0.3807, "step": 79600 }, { "epoch": 0.8, "learning_rate": 2.033566237842485e-06, "loss": 0.3764, "step": 79700 }, { "epoch": 0.8, "learning_rate": 2.0235703361621737e-06, "loss": 0.3812, "step": 79800 }, { "epoch": 0.8, "learning_rate": 2.0135744344818625e-06, "loss": 0.3721, "step": 79900 }, { "epoch": 0.8, "learning_rate": 2.0035785328015513e-06, "loss": 0.3796, "step": 80000 }, { "epoch": 0.8, "learning_rate": 1.9935826311212405e-06, "loss": 0.3769, "step": 80100 }, { "epoch": 0.8, "learning_rate": 1.9835867294409293e-06, "loss": 0.3933, "step": 80200 }, { "epoch": 0.8, "learning_rate": 1.973590827760618e-06, "loss": 0.3763, "step": 80300 }, { "epoch": 0.8, "learning_rate": 1.9635949260803074e-06, "loss": 0.3776, "step": 80400 }, { "epoch": 0.8, "learning_rate": 1.953599024399996e-06, "loss": 0.3797, "step": 80500 }, { "epoch": 0.81, "learning_rate": 1.943603122719685e-06, "loss": 0.381, "step": 80600 }, { "epoch": 0.81, "learning_rate": 1.933607221039374e-06, "loss": 0.38, "step": 80700 }, { "epoch": 0.81, "learning_rate": 1.923611319359063e-06, "loss": 0.3772, "step": 80800 }, { "epoch": 0.81, "learning_rate": 1.9136154176787518e-06, "loss": 0.3724, "step": 80900 }, { "epoch": 0.81, "learning_rate": 1.9036195159984408e-06, "loss": 0.3808, "step": 81000 }, { "epoch": 0.81, "learning_rate": 1.8936236143181298e-06, "loss": 0.3799, "step": 81100 }, { "epoch": 0.81, "learning_rate": 1.8836277126378186e-06, "loss": 0.3779, "step": 81200 }, { "epoch": 0.81, "learning_rate": 1.8736318109575074e-06, "loss": 0.3841, "step": 81300 }, { "epoch": 0.81, "learning_rate": 1.8636359092771966e-06, "loss": 0.3795, "step": 81400 }, { "epoch": 0.81, "learning_rate": 1.8536400075968856e-06, "loss": 0.3698, "step": 81500 }, { "epoch": 0.82, "learning_rate": 1.8436441059165744e-06, "loss": 0.3682, "step": 81600 }, { "epoch": 0.82, "learning_rate": 1.8336482042362632e-06, "loss": 0.3751, "step": 81700 }, { "epoch": 0.82, "learning_rate": 1.8236523025559522e-06, "loss": 0.3708, "step": 81800 }, { "epoch": 0.82, "learning_rate": 1.813656400875641e-06, "loss": 0.3635, "step": 81900 }, { "epoch": 0.82, "learning_rate": 1.80366049919533e-06, "loss": 0.3749, "step": 82000 }, { "epoch": 0.82, "learning_rate": 1.793664597515019e-06, "loss": 0.36, "step": 82100 }, { "epoch": 0.82, "learning_rate": 1.783668695834708e-06, "loss": 0.3696, "step": 82200 }, { "epoch": 0.82, "learning_rate": 1.7736727941543968e-06, "loss": 0.3669, "step": 82300 }, { "epoch": 0.82, "learning_rate": 1.7636768924740858e-06, "loss": 0.3714, "step": 82400 }, { "epoch": 0.82, "learning_rate": 1.7536809907937746e-06, "loss": 0.3643, "step": 82500 }, { "epoch": 0.83, "learning_rate": 1.7436850891134636e-06, "loss": 0.3672, "step": 82600 }, { "epoch": 0.83, "learning_rate": 1.7336891874331526e-06, "loss": 0.3738, "step": 82700 }, { "epoch": 0.83, "learning_rate": 1.7236932857528416e-06, "loss": 0.3739, "step": 82800 }, { "epoch": 0.83, "learning_rate": 1.7136973840725304e-06, "loss": 0.3593, "step": 82900 }, { "epoch": 0.83, "learning_rate": 1.7037014823922194e-06, "loss": 0.3703, "step": 83000 }, { "epoch": 0.83, "learning_rate": 1.6937055807119082e-06, "loss": 0.3653, "step": 83100 }, { "epoch": 0.83, "learning_rate": 1.683709679031597e-06, "loss": 0.3647, "step": 83200 }, { "epoch": 0.83, "learning_rate": 1.673713777351286e-06, "loss": 0.3707, "step": 83300 }, { "epoch": 0.83, "learning_rate": 1.6637178756709752e-06, "loss": 0.3667, "step": 83400 }, { "epoch": 0.83, "learning_rate": 1.653721973990664e-06, "loss": 0.3677, "step": 83500 }, { "epoch": 0.84, "learning_rate": 1.6437260723103528e-06, "loss": 0.3628, "step": 83600 }, { "epoch": 0.84, "learning_rate": 1.6337301706300418e-06, "loss": 0.3608, "step": 83700 }, { "epoch": 0.84, "learning_rate": 1.6237342689497306e-06, "loss": 0.363, "step": 83800 }, { "epoch": 0.84, "learning_rate": 1.6137383672694196e-06, "loss": 0.3646, "step": 83900 }, { "epoch": 0.84, "learning_rate": 1.6037424655891086e-06, "loss": 0.3572, "step": 84000 }, { "epoch": 0.84, "learning_rate": 1.5937465639087977e-06, "loss": 0.3531, "step": 84100 }, { "epoch": 0.84, "learning_rate": 1.5837506622284864e-06, "loss": 0.3645, "step": 84200 }, { "epoch": 0.84, "learning_rate": 1.5737547605481755e-06, "loss": 0.3635, "step": 84300 }, { "epoch": 0.84, "learning_rate": 1.5637588588678642e-06, "loss": 0.3623, "step": 84400 }, { "epoch": 0.84, "learning_rate": 1.5537629571875533e-06, "loss": 0.3583, "step": 84500 }, { "epoch": 0.85, "learning_rate": 1.543767055507242e-06, "loss": 0.362, "step": 84600 }, { "epoch": 0.85, "learning_rate": 1.5337711538269313e-06, "loss": 0.3616, "step": 84700 }, { "epoch": 0.85, "learning_rate": 1.52377525214662e-06, "loss": 0.3517, "step": 84800 }, { "epoch": 0.85, "learning_rate": 1.513779350466309e-06, "loss": 0.3615, "step": 84900 }, { "epoch": 0.85, "learning_rate": 1.5037834487859979e-06, "loss": 0.3553, "step": 85000 }, { "epoch": 0.85, "learning_rate": 1.4937875471056867e-06, "loss": 0.3582, "step": 85100 }, { "epoch": 0.85, "learning_rate": 1.4837916454253757e-06, "loss": 0.3581, "step": 85200 }, { "epoch": 0.85, "learning_rate": 1.4737957437450645e-06, "loss": 0.3648, "step": 85300 }, { "epoch": 0.85, "learning_rate": 1.4637998420647537e-06, "loss": 0.3561, "step": 85400 }, { "epoch": 0.85, "learning_rate": 1.4538039403844425e-06, "loss": 0.3541, "step": 85500 }, { "epoch": 0.86, "learning_rate": 1.4438080387041315e-06, "loss": 0.3578, "step": 85600 }, { "epoch": 0.86, "learning_rate": 1.4338121370238203e-06, "loss": 0.3656, "step": 85700 }, { "epoch": 0.86, "learning_rate": 1.4238162353435093e-06, "loss": 0.3484, "step": 85800 }, { "epoch": 0.86, "learning_rate": 1.413820333663198e-06, "loss": 0.3539, "step": 85900 }, { "epoch": 0.86, "learning_rate": 1.4038244319828873e-06, "loss": 0.3559, "step": 86000 }, { "epoch": 0.86, "learning_rate": 1.393828530302576e-06, "loss": 0.3499, "step": 86100 }, { "epoch": 0.86, "learning_rate": 1.3838326286222651e-06, "loss": 0.3553, "step": 86200 }, { "epoch": 0.86, "learning_rate": 1.373836726941954e-06, "loss": 0.3655, "step": 86300 }, { "epoch": 0.86, "learning_rate": 1.3638408252616427e-06, "loss": 0.3574, "step": 86400 }, { "epoch": 0.86, "learning_rate": 1.3538449235813317e-06, "loss": 0.3513, "step": 86500 }, { "epoch": 0.87, "learning_rate": 1.3438490219010205e-06, "loss": 0.3615, "step": 86600 }, { "epoch": 0.87, "learning_rate": 1.3338531202207097e-06, "loss": 0.3558, "step": 86700 }, { "epoch": 0.87, "learning_rate": 1.3238572185403985e-06, "loss": 0.3517, "step": 86800 }, { "epoch": 0.87, "learning_rate": 1.3138613168600875e-06, "loss": 0.3538, "step": 86900 }, { "epoch": 0.87, "learning_rate": 1.3038654151797763e-06, "loss": 0.3583, "step": 87000 }, { "epoch": 0.87, "learning_rate": 1.2938695134994653e-06, "loss": 0.3598, "step": 87100 }, { "epoch": 0.87, "learning_rate": 1.2838736118191541e-06, "loss": 0.3459, "step": 87200 }, { "epoch": 0.87, "learning_rate": 1.2738777101388433e-06, "loss": 0.3526, "step": 87300 }, { "epoch": 0.87, "learning_rate": 1.2638818084585321e-06, "loss": 0.3477, "step": 87400 }, { "epoch": 0.87, "learning_rate": 1.2538859067782211e-06, "loss": 0.343, "step": 87500 }, { "epoch": 0.88, "learning_rate": 1.24389000509791e-06, "loss": 0.3425, "step": 87600 }, { "epoch": 0.88, "learning_rate": 1.233894103417599e-06, "loss": 0.3503, "step": 87700 }, { "epoch": 0.88, "learning_rate": 1.223898201737288e-06, "loss": 0.3471, "step": 87800 }, { "epoch": 0.88, "learning_rate": 1.2139023000569767e-06, "loss": 0.3512, "step": 87900 }, { "epoch": 0.88, "learning_rate": 1.2039063983766655e-06, "loss": 0.347, "step": 88000 }, { "epoch": 0.88, "learning_rate": 1.1939104966963545e-06, "loss": 0.3472, "step": 88100 }, { "epoch": 0.88, "learning_rate": 1.1839145950160436e-06, "loss": 0.3523, "step": 88200 }, { "epoch": 0.88, "learning_rate": 1.1739186933357324e-06, "loss": 0.3402, "step": 88300 }, { "epoch": 0.88, "learning_rate": 1.1639227916554214e-06, "loss": 0.3494, "step": 88400 }, { "epoch": 0.88, "learning_rate": 1.1539268899751104e-06, "loss": 0.3389, "step": 88500 }, { "epoch": 0.89, "learning_rate": 1.1439309882947992e-06, "loss": 0.3484, "step": 88600 }, { "epoch": 0.89, "learning_rate": 1.1339350866144882e-06, "loss": 0.3446, "step": 88700 }, { "epoch": 0.89, "learning_rate": 1.1239391849341772e-06, "loss": 0.348, "step": 88800 }, { "epoch": 0.89, "learning_rate": 1.113943283253866e-06, "loss": 0.3432, "step": 88900 }, { "epoch": 0.89, "learning_rate": 1.103947381573555e-06, "loss": 0.3412, "step": 89000 }, { "epoch": 0.89, "learning_rate": 1.0939514798932438e-06, "loss": 0.3523, "step": 89100 }, { "epoch": 0.89, "learning_rate": 1.0839555782129328e-06, "loss": 0.3461, "step": 89200 }, { "epoch": 0.89, "learning_rate": 1.0739596765326218e-06, "loss": 0.339, "step": 89300 }, { "epoch": 0.89, "learning_rate": 1.0639637748523106e-06, "loss": 0.3418, "step": 89400 }, { "epoch": 0.89, "learning_rate": 1.0539678731719996e-06, "loss": 0.3502, "step": 89500 }, { "epoch": 0.9, "learning_rate": 1.0439719714916886e-06, "loss": 0.3451, "step": 89600 }, { "epoch": 0.9, "learning_rate": 1.0339760698113774e-06, "loss": 0.3388, "step": 89700 }, { "epoch": 0.9, "learning_rate": 1.0239801681310664e-06, "loss": 0.3398, "step": 89800 }, { "epoch": 0.9, "learning_rate": 1.0139842664507552e-06, "loss": 0.3428, "step": 89900 }, { "epoch": 0.9, "learning_rate": 1.0039883647704442e-06, "loss": 0.3447, "step": 90000 }, { "epoch": 0.9, "learning_rate": 9.93992463090133e-07, "loss": 0.347, "step": 90100 }, { "epoch": 0.9, "learning_rate": 9.83996561409822e-07, "loss": 0.3359, "step": 90200 }, { "epoch": 0.9, "learning_rate": 9.74000659729511e-07, "loss": 0.3342, "step": 90300 }, { "epoch": 0.9, "learning_rate": 9.640047580491998e-07, "loss": 0.3371, "step": 90400 }, { "epoch": 0.9, "learning_rate": 9.540088563688888e-07, "loss": 0.3436, "step": 90500 }, { "epoch": 0.91, "learning_rate": 9.440129546885777e-07, "loss": 0.3484, "step": 90600 }, { "epoch": 0.91, "learning_rate": 9.340170530082666e-07, "loss": 0.3445, "step": 90700 }, { "epoch": 0.91, "learning_rate": 9.240211513279556e-07, "loss": 0.3412, "step": 90800 }, { "epoch": 0.91, "learning_rate": 9.140252496476445e-07, "loss": 0.3401, "step": 90900 }, { "epoch": 0.91, "learning_rate": 9.040293479673334e-07, "loss": 0.3436, "step": 91000 }, { "epoch": 0.91, "learning_rate": 8.940334462870224e-07, "loss": 0.3415, "step": 91100 }, { "epoch": 0.91, "learning_rate": 8.840375446067113e-07, "loss": 0.3397, "step": 91200 }, { "epoch": 0.91, "learning_rate": 8.740416429264002e-07, "loss": 0.3355, "step": 91300 }, { "epoch": 0.91, "learning_rate": 8.640457412460891e-07, "loss": 0.3378, "step": 91400 }, { "epoch": 0.91, "learning_rate": 8.540498395657781e-07, "loss": 0.3424, "step": 91500 }, { "epoch": 0.92, "learning_rate": 8.44053937885467e-07, "loss": 0.3361, "step": 91600 }, { "epoch": 0.92, "learning_rate": 8.34058036205156e-07, "loss": 0.333, "step": 91700 }, { "epoch": 0.92, "learning_rate": 8.24062134524845e-07, "loss": 0.3361, "step": 91800 }, { "epoch": 0.92, "learning_rate": 8.140662328445339e-07, "loss": 0.3312, "step": 91900 }, { "epoch": 0.92, "learning_rate": 8.040703311642227e-07, "loss": 0.3375, "step": 92000 }, { "epoch": 0.92, "learning_rate": 7.940744294839118e-07, "loss": 0.3342, "step": 92100 }, { "epoch": 0.92, "learning_rate": 7.840785278036006e-07, "loss": 0.3373, "step": 92200 }, { "epoch": 0.92, "learning_rate": 7.740826261232895e-07, "loss": 0.3317, "step": 92300 }, { "epoch": 0.92, "learning_rate": 7.640867244429784e-07, "loss": 0.3309, "step": 92400 }, { "epoch": 0.92, "learning_rate": 7.540908227626674e-07, "loss": 0.3331, "step": 92500 }, { "epoch": 0.93, "learning_rate": 7.440949210823563e-07, "loss": 0.3329, "step": 92600 }, { "epoch": 0.93, "learning_rate": 7.340990194020452e-07, "loss": 0.3318, "step": 92700 }, { "epoch": 0.93, "learning_rate": 7.241031177217342e-07, "loss": 0.3377, "step": 92800 }, { "epoch": 0.93, "learning_rate": 7.141072160414231e-07, "loss": 0.3346, "step": 92900 }, { "epoch": 0.93, "learning_rate": 7.04111314361112e-07, "loss": 0.3381, "step": 93000 }, { "epoch": 0.93, "learning_rate": 6.94115412680801e-07, "loss": 0.333, "step": 93100 }, { "epoch": 0.93, "learning_rate": 6.841195110004899e-07, "loss": 0.3322, "step": 93200 }, { "epoch": 0.93, "learning_rate": 6.741236093201788e-07, "loss": 0.3288, "step": 93300 }, { "epoch": 0.93, "learning_rate": 6.641277076398677e-07, "loss": 0.3337, "step": 93400 }, { "epoch": 0.93, "learning_rate": 6.541318059595567e-07, "loss": 0.3327, "step": 93500 }, { "epoch": 0.94, "learning_rate": 6.441359042792456e-07, "loss": 0.3358, "step": 93600 }, { "epoch": 0.94, "learning_rate": 6.341400025989344e-07, "loss": 0.3337, "step": 93700 }, { "epoch": 0.94, "learning_rate": 6.241441009186234e-07, "loss": 0.3312, "step": 93800 }, { "epoch": 0.94, "learning_rate": 6.141481992383123e-07, "loss": 0.3299, "step": 93900 }, { "epoch": 0.94, "learning_rate": 6.041522975580013e-07, "loss": 0.3324, "step": 94000 }, { "epoch": 0.94, "learning_rate": 5.941563958776902e-07, "loss": 0.3343, "step": 94100 }, { "epoch": 0.94, "learning_rate": 5.841604941973791e-07, "loss": 0.3296, "step": 94200 }, { "epoch": 0.94, "learning_rate": 5.74164592517068e-07, "loss": 0.3346, "step": 94300 }, { "epoch": 0.94, "learning_rate": 5.64168690836757e-07, "loss": 0.3376, "step": 94400 }, { "epoch": 0.94, "learning_rate": 5.541727891564459e-07, "loss": 0.3314, "step": 94500 }, { "epoch": 0.95, "learning_rate": 5.441768874761348e-07, "loss": 0.3316, "step": 94600 }, { "epoch": 0.95, "learning_rate": 5.341809857958237e-07, "loss": 0.3231, "step": 94700 }, { "epoch": 0.95, "learning_rate": 5.241850841155126e-07, "loss": 0.3315, "step": 94800 }, { "epoch": 0.95, "learning_rate": 5.141891824352016e-07, "loss": 0.3288, "step": 94900 }, { "epoch": 0.95, "learning_rate": 5.041932807548905e-07, "loss": 0.3297, "step": 95000 }, { "epoch": 0.95, "learning_rate": 4.941973790745794e-07, "loss": 0.3308, "step": 95100 }, { "epoch": 0.95, "learning_rate": 4.842014773942684e-07, "loss": 0.326, "step": 95200 }, { "epoch": 0.95, "learning_rate": 4.742055757139573e-07, "loss": 0.3281, "step": 95300 }, { "epoch": 0.95, "learning_rate": 4.6420967403364625e-07, "loss": 0.3285, "step": 95400 }, { "epoch": 0.95, "learning_rate": 4.542137723533352e-07, "loss": 0.3269, "step": 95500 }, { "epoch": 0.96, "learning_rate": 4.442178706730241e-07, "loss": 0.3311, "step": 95600 }, { "epoch": 0.96, "learning_rate": 4.34221968992713e-07, "loss": 0.3251, "step": 95700 }, { "epoch": 0.96, "learning_rate": 4.2422606731240196e-07, "loss": 0.3262, "step": 95800 }, { "epoch": 0.96, "learning_rate": 4.1423016563209086e-07, "loss": 0.3305, "step": 95900 }, { "epoch": 0.96, "learning_rate": 4.042342639517798e-07, "loss": 0.3242, "step": 96000 }, { "epoch": 0.96, "learning_rate": 3.942383622714687e-07, "loss": 0.3282, "step": 96100 }, { "epoch": 0.96, "learning_rate": 3.8424246059115767e-07, "loss": 0.3285, "step": 96200 }, { "epoch": 0.96, "learning_rate": 3.742465589108466e-07, "loss": 0.3266, "step": 96300 }, { "epoch": 0.96, "learning_rate": 3.642506572305355e-07, "loss": 0.3262, "step": 96400 }, { "epoch": 0.96, "learning_rate": 3.542547555502245e-07, "loss": 0.3276, "step": 96500 }, { "epoch": 0.97, "learning_rate": 3.4425885386991333e-07, "loss": 0.3281, "step": 96600 }, { "epoch": 0.97, "learning_rate": 3.342629521896023e-07, "loss": 0.3238, "step": 96700 }, { "epoch": 0.97, "learning_rate": 3.2426705050929124e-07, "loss": 0.3222, "step": 96800 }, { "epoch": 0.97, "learning_rate": 3.1427114882898014e-07, "loss": 0.3299, "step": 96900 }, { "epoch": 0.97, "learning_rate": 3.042752471486691e-07, "loss": 0.3224, "step": 97000 }, { "epoch": 0.97, "learning_rate": 2.94279345468358e-07, "loss": 0.329, "step": 97100 }, { "epoch": 0.97, "learning_rate": 2.8428344378804695e-07, "loss": 0.3213, "step": 97200 }, { "epoch": 0.97, "learning_rate": 2.7428754210773585e-07, "loss": 0.3283, "step": 97300 }, { "epoch": 0.97, "learning_rate": 2.6429164042742475e-07, "loss": 0.3227, "step": 97400 }, { "epoch": 0.97, "learning_rate": 2.542957387471137e-07, "loss": 0.3243, "step": 97500 }, { "epoch": 0.98, "learning_rate": 2.4429983706680266e-07, "loss": 0.3229, "step": 97600 }, { "epoch": 0.98, "learning_rate": 2.3430393538649156e-07, "loss": 0.3247, "step": 97700 }, { "epoch": 0.98, "learning_rate": 2.243080337061805e-07, "loss": 0.3163, "step": 97800 }, { "epoch": 0.98, "learning_rate": 2.143121320258694e-07, "loss": 0.3288, "step": 97900 }, { "epoch": 0.98, "learning_rate": 2.0431623034555834e-07, "loss": 0.3295, "step": 98000 }, { "epoch": 0.98, "learning_rate": 1.9432032866524727e-07, "loss": 0.321, "step": 98100 }, { "epoch": 0.98, "learning_rate": 1.843244269849362e-07, "loss": 0.3158, "step": 98200 }, { "epoch": 0.98, "learning_rate": 1.743285253046251e-07, "loss": 0.3238, "step": 98300 }, { "epoch": 0.98, "learning_rate": 1.6433262362431403e-07, "loss": 0.3251, "step": 98400 }, { "epoch": 0.98, "learning_rate": 1.5433672194400295e-07, "loss": 0.3278, "step": 98500 }, { "epoch": 0.99, "learning_rate": 1.443408202636919e-07, "loss": 0.3232, "step": 98600 }, { "epoch": 0.99, "learning_rate": 1.343449185833808e-07, "loss": 0.3259, "step": 98700 }, { "epoch": 0.99, "learning_rate": 1.2434901690306976e-07, "loss": 0.3223, "step": 98800 }, { "epoch": 0.99, "learning_rate": 1.1435311522275868e-07, "loss": 0.3285, "step": 98900 }, { "epoch": 0.99, "learning_rate": 1.0435721354244762e-07, "loss": 0.3249, "step": 99000 }, { "epoch": 0.99, "learning_rate": 9.436131186213653e-08, "loss": 0.3269, "step": 99100 }, { "epoch": 0.99, "learning_rate": 8.436541018182545e-08, "loss": 0.3265, "step": 99200 }, { "epoch": 0.99, "learning_rate": 7.436950850151439e-08, "loss": 0.322, "step": 99300 }, { "epoch": 0.99, "learning_rate": 6.43736068212033e-08, "loss": 0.3186, "step": 99400 }, { "epoch": 0.99, "learning_rate": 5.437770514089224e-08, "loss": 0.324, "step": 99500 }, { "epoch": 1.0, "learning_rate": 4.4381803460581166e-08, "loss": 0.3219, "step": 99600 }, { "epoch": 1.0, "learning_rate": 3.4385901780270094e-08, "loss": 0.3218, "step": 99700 }, { "epoch": 1.0, "learning_rate": 2.4390000099959022e-08, "loss": 0.3188, "step": 99800 }, { "epoch": 1.0, "learning_rate": 1.4394098419647945e-08, "loss": 0.3218, "step": 99900 }, { "epoch": 1.0, "learning_rate": 4.3981967393368716e-09, "loss": 0.3233, "step": 100000 } ], "logging_steps": 100, "max_steps": 100043, "num_train_epochs": 1, "save_steps": 10000, "total_flos": 5668611072983040.0, "trial_name": null, "trial_params": null }