|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.43494196701283, |
|
"global_step": 80000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.993891264508247e-05, |
|
"loss": 0.4281, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.987782529016494e-05, |
|
"loss": 0.3603, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9816737935247404e-05, |
|
"loss": 0.335, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9755650580329873e-05, |
|
"loss": 0.324, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.9694563225412342e-05, |
|
"loss": 0.3191, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.963347587049481e-05, |
|
"loss": 0.3027, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9572388515577276e-05, |
|
"loss": 0.2698, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.9511301160659744e-05, |
|
"loss": 0.2289, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.9450213805742213e-05, |
|
"loss": 0.2299, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.938912645082468e-05, |
|
"loss": 0.2369, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.932803909590715e-05, |
|
"loss": 0.241, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9266951740989616e-05, |
|
"loss": 0.2375, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.9205864386072085e-05, |
|
"loss": 0.2334, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.9144777031154553e-05, |
|
"loss": 0.1654, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.9083689676237022e-05, |
|
"loss": 0.1569, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.9022602321319487e-05, |
|
"loss": 0.1689, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.8961514966401956e-05, |
|
"loss": 0.1641, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.8900427611484425e-05, |
|
"loss": 0.1654, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.883934025656689e-05, |
|
"loss": 0.1684, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.8778252901649362e-05, |
|
"loss": 0.1484, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.8717165546731827e-05, |
|
"loss": 0.1118, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.8656078191814296e-05, |
|
"loss": 0.1181, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.8594990836896765e-05, |
|
"loss": 0.118, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.853390348197923e-05, |
|
"loss": 0.1204, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.84728161270617e-05, |
|
"loss": 0.1255, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.8411728772144168e-05, |
|
"loss": 0.1284, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.8350641417226636e-05, |
|
"loss": 0.0925, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.82895540623091e-05, |
|
"loss": 0.0896, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.8228466707391574e-05, |
|
"loss": 0.0897, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.816737935247404e-05, |
|
"loss": 0.0969, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 1.8106291997556508e-05, |
|
"loss": 0.0971, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.8045204642638976e-05, |
|
"loss": 0.0963, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 1.7984117287721442e-05, |
|
"loss": 0.0906, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.792302993280391e-05, |
|
"loss": 0.0717, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 1.786194257788638e-05, |
|
"loss": 0.0754, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 1.7800855222968848e-05, |
|
"loss": 0.0709, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 1.7739767868051313e-05, |
|
"loss": 0.0814, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 1.7678680513133785e-05, |
|
"loss": 0.0797, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 1.761759315821625e-05, |
|
"loss": 0.0914, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.755650580329872e-05, |
|
"loss": 0.0648, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.7495418448381188e-05, |
|
"loss": 0.0627, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 1.7434331093463653e-05, |
|
"loss": 0.061, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.7373243738546122e-05, |
|
"loss": 0.0678, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.731215638362859e-05, |
|
"loss": 0.0656, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.725106902871106e-05, |
|
"loss": 0.0702, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.7189981673793525e-05, |
|
"loss": 0.0716, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 1.7128894318875993e-05, |
|
"loss": 0.0553, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 1.7067806963958462e-05, |
|
"loss": 0.0546, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 1.700671960904093e-05, |
|
"loss": 0.0575, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 1.69456322541234e-05, |
|
"loss": 0.0558, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.6884544899205865e-05, |
|
"loss": 0.0628, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 1.6823457544288334e-05, |
|
"loss": 0.06, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 1.6762370189370802e-05, |
|
"loss": 0.0501, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 1.670128283445327e-05, |
|
"loss": 0.0465, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 1.6640195479535736e-05, |
|
"loss": 0.0497, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 1.6579108124618205e-05, |
|
"loss": 0.0513, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 1.6518020769700674e-05, |
|
"loss": 0.0526, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 1.645693341478314e-05, |
|
"loss": 0.0503, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 1.639584605986561e-05, |
|
"loss": 0.05, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 1.6334758704948076e-05, |
|
"loss": 0.0371, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 1.6273671350030545e-05, |
|
"loss": 0.0396, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 1.6212583995113014e-05, |
|
"loss": 0.0417, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 1.6151496640195482e-05, |
|
"loss": 0.0453, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.6090409285277948e-05, |
|
"loss": 0.0402, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1.6029321930360416e-05, |
|
"loss": 0.0421, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 1.5968234575442885e-05, |
|
"loss": 0.0389, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 1.590714722052535e-05, |
|
"loss": 0.0372, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.5846059865607823e-05, |
|
"loss": 0.0346, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 1.5784972510690288e-05, |
|
"loss": 0.0397, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 1.5723885155772757e-05, |
|
"loss": 0.0357, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 1.5662797800855225e-05, |
|
"loss": 0.032, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.560171044593769e-05, |
|
"loss": 0.0474, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 1.554062309102016e-05, |
|
"loss": 0.0298, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 1.5479535736102628e-05, |
|
"loss": 0.0355, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 1.5418448381185097e-05, |
|
"loss": 0.0368, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 1.5357361026267562e-05, |
|
"loss": 0.0335, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 1.5296273671350034e-05, |
|
"loss": 0.0328, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 1.52351863164325e-05, |
|
"loss": 0.0317, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 1.5174098961514966e-05, |
|
"loss": 0.0346, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 1.5113011606597437e-05, |
|
"loss": 0.0279, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 1.5051924251679904e-05, |
|
"loss": 0.0268, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 1.4990836896762371e-05, |
|
"loss": 0.0324, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 1.492974954184484e-05, |
|
"loss": 0.0309, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 1.4868662186927307e-05, |
|
"loss": 0.0316, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 1.4807574832009775e-05, |
|
"loss": 0.0365, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 1.4746487477092244e-05, |
|
"loss": 0.0241, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 13.29, |
|
"learning_rate": 1.4685400122174711e-05, |
|
"loss": 0.0276, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 1.4624312767257178e-05, |
|
"loss": 0.0249, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 1.4563225412339648e-05, |
|
"loss": 0.0284, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 13.74, |
|
"learning_rate": 1.4502138057422115e-05, |
|
"loss": 0.0277, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 1.4441050702504582e-05, |
|
"loss": 0.0296, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 1.4379963347587051e-05, |
|
"loss": 0.0245, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 1.4318875992669518e-05, |
|
"loss": 0.0227, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 14.36, |
|
"learning_rate": 1.4257788637751985e-05, |
|
"loss": 0.0273, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 1.4196701282834456e-05, |
|
"loss": 0.0263, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 14.66, |
|
"learning_rate": 1.4135613927916923e-05, |
|
"loss": 0.0243, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 1.407452657299939e-05, |
|
"loss": 0.0256, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 1.4013439218081858e-05, |
|
"loss": 0.0285, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 1.3952351863164327e-05, |
|
"loss": 0.0189, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 1.3891264508246794e-05, |
|
"loss": 0.0186, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 1.3830177153329263e-05, |
|
"loss": 0.0272, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 1.376908979841173e-05, |
|
"loss": 0.0264, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 1.3708002443494197e-05, |
|
"loss": 0.0239, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 1.3646915088576667e-05, |
|
"loss": 0.0255, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.3585827733659134e-05, |
|
"loss": 0.0261, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 16.19, |
|
"learning_rate": 1.3524740378741601e-05, |
|
"loss": 0.0239, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 1.346365302382407e-05, |
|
"loss": 0.0187, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 1.3402565668906537e-05, |
|
"loss": 0.023, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 16.65, |
|
"learning_rate": 1.3341478313989005e-05, |
|
"loss": 0.0215, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 1.3280390959071474e-05, |
|
"loss": 0.0264, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 1.3219303604153941e-05, |
|
"loss": 0.0198, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 1.3158216249236408e-05, |
|
"loss": 0.0191, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 1.3097128894318879e-05, |
|
"loss": 0.0174, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.3036041539401346e-05, |
|
"loss": 0.0149, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 1.2974954184483813e-05, |
|
"loss": 0.019, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 1.2913866829566281e-05, |
|
"loss": 0.0188, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 1.2852779474648748e-05, |
|
"loss": 0.022, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 1.2791692119731215e-05, |
|
"loss": 0.0248, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 18.17, |
|
"learning_rate": 1.2730604764813686e-05, |
|
"loss": 0.0162, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 1.2669517409896153e-05, |
|
"loss": 0.017, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 1.260843005497862e-05, |
|
"loss": 0.0149, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 1.2547342700061088e-05, |
|
"loss": 0.0194, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 18.78, |
|
"learning_rate": 1.2486255345143557e-05, |
|
"loss": 0.0185, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 1.2425167990226024e-05, |
|
"loss": 0.0161, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"learning_rate": 1.2364080635308493e-05, |
|
"loss": 0.0144, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 1.230299328039096e-05, |
|
"loss": 0.0141, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 1.2241905925473427e-05, |
|
"loss": 0.0149, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 1.2180818570555897e-05, |
|
"loss": 0.0178, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 1.2119731215638364e-05, |
|
"loss": 0.0159, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 1.2058643860720831e-05, |
|
"loss": 0.0194, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 20.01, |
|
"learning_rate": 1.19975565058033e-05, |
|
"loss": 0.0196, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 1.1936469150885767e-05, |
|
"loss": 0.0123, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"learning_rate": 1.1875381795968236e-05, |
|
"loss": 0.0133, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 20.46, |
|
"learning_rate": 1.1814294441050704e-05, |
|
"loss": 0.0188, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 20.62, |
|
"learning_rate": 1.1753207086133171e-05, |
|
"loss": 0.0141, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 1.1692119731215638e-05, |
|
"loss": 0.0176, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 1.1631032376298109e-05, |
|
"loss": 0.0151, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 21.08, |
|
"learning_rate": 1.1569945021380576e-05, |
|
"loss": 0.0151, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"learning_rate": 1.1508857666463043e-05, |
|
"loss": 0.0085, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 1.1447770311545512e-05, |
|
"loss": 0.0165, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"learning_rate": 1.1386682956627979e-05, |
|
"loss": 0.015, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 1.1325595601710446e-05, |
|
"loss": 0.0165, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"learning_rate": 1.1264508246792916e-05, |
|
"loss": 0.0139, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 1.1203420891875383e-05, |
|
"loss": 0.0152, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 22.14, |
|
"learning_rate": 1.114233353695785e-05, |
|
"loss": 0.0116, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 22.3, |
|
"learning_rate": 1.1081246182040319e-05, |
|
"loss": 0.0134, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 22.45, |
|
"learning_rate": 1.1020158827122787e-05, |
|
"loss": 0.0146, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 22.6, |
|
"learning_rate": 1.0959071472205254e-05, |
|
"loss": 0.0148, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 1.0897984117287723e-05, |
|
"loss": 0.0124, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 22.91, |
|
"learning_rate": 1.083689676237019e-05, |
|
"loss": 0.013, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 23.06, |
|
"learning_rate": 1.0775809407452657e-05, |
|
"loss": 0.0124, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 23.21, |
|
"learning_rate": 1.0714722052535128e-05, |
|
"loss": 0.0092, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 23.37, |
|
"learning_rate": 1.0653634697617595e-05, |
|
"loss": 0.014, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 23.52, |
|
"learning_rate": 1.0592547342700062e-05, |
|
"loss": 0.0124, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 1.053145998778253e-05, |
|
"loss": 0.0116, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 23.82, |
|
"learning_rate": 1.0470372632864997e-05, |
|
"loss": 0.01, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 23.98, |
|
"learning_rate": 1.0409285277947466e-05, |
|
"loss": 0.0146, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 24.13, |
|
"learning_rate": 1.0348197923029935e-05, |
|
"loss": 0.011, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 24.28, |
|
"learning_rate": 1.0287110568112402e-05, |
|
"loss": 0.0101, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 24.43, |
|
"learning_rate": 1.0226023213194869e-05, |
|
"loss": 0.009, |
|
"step": 80000 |
|
} |
|
], |
|
"max_steps": 163700, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.152034161253376e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|