|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.6600798023940717, |
|
"global_step": 14000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.992019760592819e-06, |
|
"loss": 1.1564, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.982709481284439e-06, |
|
"loss": 1.132, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.97320919627589e-06, |
|
"loss": 1.1507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.96370891126734e-06, |
|
"loss": 1.0846, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.954208626258789e-06, |
|
"loss": 1.0877, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.944708341250238e-06, |
|
"loss": 1.1153, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.935208056241688e-06, |
|
"loss": 1.0876, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.925707771233137e-06, |
|
"loss": 1.0729, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.916207486224587e-06, |
|
"loss": 1.126, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.906707201216036e-06, |
|
"loss": 1.1262, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.897206916207487e-06, |
|
"loss": 1.0883, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.887706631198937e-06, |
|
"loss": 1.0926, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.878206346190386e-06, |
|
"loss": 1.0916, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.868706061181836e-06, |
|
"loss": 1.0892, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.859205776173287e-06, |
|
"loss": 1.0653, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.849705491164736e-06, |
|
"loss": 1.0391, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.840205206156186e-06, |
|
"loss": 1.0866, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.830704921147635e-06, |
|
"loss": 1.1192, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.821204636139085e-06, |
|
"loss": 1.0898, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.811704351130534e-06, |
|
"loss": 1.0693, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.802204066121984e-06, |
|
"loss": 1.1243, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.792703781113433e-06, |
|
"loss": 1.071, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.783203496104884e-06, |
|
"loss": 1.0569, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.773703211096334e-06, |
|
"loss": 1.0612, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.764202926087783e-06, |
|
"loss": 1.0432, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.754702641079233e-06, |
|
"loss": 1.092, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.745202356070684e-06, |
|
"loss": 1.0602, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.735702071062133e-06, |
|
"loss": 1.0689, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.726201786053583e-06, |
|
"loss": 1.0897, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.716701501045032e-06, |
|
"loss": 1.0482, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.707201216036482e-06, |
|
"loss": 1.1167, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.697700931027931e-06, |
|
"loss": 1.0707, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.68820064601938e-06, |
|
"loss": 1.0613, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.67870036101083e-06, |
|
"loss": 1.0534, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.669200076002281e-06, |
|
"loss": 1.0719, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.659699790993731e-06, |
|
"loss": 1.0678, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.65019950598518e-06, |
|
"loss": 1.064, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.64069922097663e-06, |
|
"loss": 1.0846, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.631198935968081e-06, |
|
"loss": 1.0716, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.62169865095953e-06, |
|
"loss": 1.0847, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.612198365950978e-06, |
|
"loss": 1.0237, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.60269808094243e-06, |
|
"loss": 1.0382, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.593197795933879e-06, |
|
"loss": 1.055, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.583697510925328e-06, |
|
"loss": 1.0257, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.57438723161695e-06, |
|
"loss": 1.0425, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.5648869466084e-06, |
|
"loss": 1.0603, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.555386661599849e-06, |
|
"loss": 1.0195, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.545886376591299e-06, |
|
"loss": 1.0311, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.536386091582748e-06, |
|
"loss": 1.0307, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.526885806574197e-06, |
|
"loss": 1.0965, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.517385521565647e-06, |
|
"loss": 1.0271, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.507885236557098e-06, |
|
"loss": 1.0315, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.498384951548548e-06, |
|
"loss": 1.0245, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.488884666539997e-06, |
|
"loss": 1.0208, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.479384381531446e-06, |
|
"loss": 1.0621, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.469884096522896e-06, |
|
"loss": 1.0774, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.460383811514347e-06, |
|
"loss": 1.0727, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.450883526505797e-06, |
|
"loss": 1.0411, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.441383241497246e-06, |
|
"loss": 1.0088, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.431882956488696e-06, |
|
"loss": 1.0133, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.422382671480145e-06, |
|
"loss": 1.0655, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.412882386471594e-06, |
|
"loss": 1.0241, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.403382101463044e-06, |
|
"loss": 1.0708, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.393881816454495e-06, |
|
"loss": 1.0304, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.384381531445945e-06, |
|
"loss": 1.0155, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.374881246437394e-06, |
|
"loss": 1.0224, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.365380961428843e-06, |
|
"loss": 1.0665, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.355880676420293e-06, |
|
"loss": 1.0238, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.346380391411744e-06, |
|
"loss": 1.1006, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.336880106403194e-06, |
|
"loss": 0.9987, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.327379821394643e-06, |
|
"loss": 1.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.317879536386093e-06, |
|
"loss": 1.0634, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.308379251377542e-06, |
|
"loss": 1.0277, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.298878966368991e-06, |
|
"loss": 1.0311, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.289378681360441e-06, |
|
"loss": 1.0317, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.279878396351892e-06, |
|
"loss": 1.0787, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.270378111343342e-06, |
|
"loss": 1.0583, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.260877826334791e-06, |
|
"loss": 1.0326, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.25137754132624e-06, |
|
"loss": 0.989, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.24187725631769e-06, |
|
"loss": 0.9879, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.232376971309141e-06, |
|
"loss": 1.0399, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.22287668630059e-06, |
|
"loss": 1.0045, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.213376401292038e-06, |
|
"loss": 1.0079, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.20387611628349e-06, |
|
"loss": 0.999, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.194375831274939e-06, |
|
"loss": 0.9867, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.184875546266388e-06, |
|
"loss": 0.9878, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.175375261257838e-06, |
|
"loss": 1.0098, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.165874976249289e-06, |
|
"loss": 1.0325, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.156374691240739e-06, |
|
"loss": 1.0108, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.146874406232188e-06, |
|
"loss": 1.0167, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.137374121223637e-06, |
|
"loss": 0.9871, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.128063841915258e-06, |
|
"loss": 0.983, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.118563556906707e-06, |
|
"loss": 0.9807, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.109063271898158e-06, |
|
"loss": 0.9794, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.099562986889608e-06, |
|
"loss": 1.0505, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.090062701881057e-06, |
|
"loss": 1.0033, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 9.080562416872507e-06, |
|
"loss": 0.9822, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.071062131863958e-06, |
|
"loss": 1.0046, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.061561846855407e-06, |
|
"loss": 0.9944, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.052061561846857e-06, |
|
"loss": 1.0161, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.042561276838306e-06, |
|
"loss": 1.0239, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.033060991829756e-06, |
|
"loss": 0.978, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.023560706821205e-06, |
|
"loss": 1.0055, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.014060421812655e-06, |
|
"loss": 0.9623, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.004560136804104e-06, |
|
"loss": 0.9563, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.995059851795555e-06, |
|
"loss": 0.9714, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.985559566787005e-06, |
|
"loss": 0.9661, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.976059281778454e-06, |
|
"loss": 0.9727, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.966558996769904e-06, |
|
"loss": 0.9874, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.957058711761355e-06, |
|
"loss": 0.9978, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.947558426752804e-06, |
|
"loss": 0.9971, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.938058141744254e-06, |
|
"loss": 1.017, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.928557856735703e-06, |
|
"loss": 0.9685, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.919057571727153e-06, |
|
"loss": 0.9842, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.909557286718602e-06, |
|
"loss": 0.9858, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.900057001710052e-06, |
|
"loss": 1.0461, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.890556716701501e-06, |
|
"loss": 0.9706, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.881056431692952e-06, |
|
"loss": 0.9727, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.871556146684402e-06, |
|
"loss": 1.0025, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.862055861675851e-06, |
|
"loss": 1.0136, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.8525555766673e-06, |
|
"loss": 0.953, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.843055291658752e-06, |
|
"loss": 0.986, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.833555006650201e-06, |
|
"loss": 0.9878, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.824054721641649e-06, |
|
"loss": 0.9738, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.8145544366331e-06, |
|
"loss": 0.9894, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.80505415162455e-06, |
|
"loss": 0.9732, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.795553866615999e-06, |
|
"loss": 0.9799, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.786053581607449e-06, |
|
"loss": 1.0178, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.776553296598898e-06, |
|
"loss": 0.9576, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.767053011590349e-06, |
|
"loss": 1.0088, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 8.757552726581799e-06, |
|
"loss": 0.9657, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 8.748052441573248e-06, |
|
"loss": 0.9517, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.738552156564698e-06, |
|
"loss": 1.0624, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 8.729051871556149e-06, |
|
"loss": 1.0197, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.719551586547596e-06, |
|
"loss": 0.9647, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 8.710051301539046e-06, |
|
"loss": 1.0109, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.700741022230668e-06, |
|
"loss": 0.9923, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 8.691240737222117e-06, |
|
"loss": 0.9976, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.681740452213567e-06, |
|
"loss": 0.9929, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.672240167205018e-06, |
|
"loss": 0.9783, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 8.662739882196467e-06, |
|
"loss": 0.9503, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.653239597187917e-06, |
|
"loss": 0.9915, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 8.643739312179366e-06, |
|
"loss": 0.9502, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.634239027170816e-06, |
|
"loss": 0.9867, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 8.624738742162265e-06, |
|
"loss": 0.9929, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 8.615238457153715e-06, |
|
"loss": 1.0304, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 8.605738172145164e-06, |
|
"loss": 0.9841, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.596237887136615e-06, |
|
"loss": 0.9915, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.586737602128065e-06, |
|
"loss": 0.9691, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.577237317119514e-06, |
|
"loss": 0.9655, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 8.567737032110964e-06, |
|
"loss": 0.9884, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 8.558236747102415e-06, |
|
"loss": 0.9736, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.548736462093864e-06, |
|
"loss": 0.9462, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.539236177085312e-06, |
|
"loss": 1.0023, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.529735892076763e-06, |
|
"loss": 0.9696, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.520235607068213e-06, |
|
"loss": 0.9498, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.510735322059662e-06, |
|
"loss": 0.9689, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 8.501235037051112e-06, |
|
"loss": 1.0175, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.491734752042561e-06, |
|
"loss": 0.9783, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.482234467034012e-06, |
|
"loss": 0.9942, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 8.472734182025462e-06, |
|
"loss": 0.9631, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 8.463233897016911e-06, |
|
"loss": 0.9417, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 8.45373361200836e-06, |
|
"loss": 0.9449, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.444233326999812e-06, |
|
"loss": 0.986, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.434733041991261e-06, |
|
"loss": 0.9353, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 8.425232756982709e-06, |
|
"loss": 0.9693, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.41573247197416e-06, |
|
"loss": 0.966, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.40623218696561e-06, |
|
"loss": 0.968, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.396731901957059e-06, |
|
"loss": 0.9226, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.387231616948509e-06, |
|
"loss": 0.9361, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.377731331939958e-06, |
|
"loss": 0.9552, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 8.36823104693141e-06, |
|
"loss": 0.9482, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.358730761922859e-06, |
|
"loss": 0.968, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.349230476914308e-06, |
|
"loss": 0.9786, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.339730191905758e-06, |
|
"loss": 0.9615, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.330229906897209e-06, |
|
"loss": 0.964, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.320729621888657e-06, |
|
"loss": 0.9676, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.311229336880106e-06, |
|
"loss": 0.9532, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.301729051871557e-06, |
|
"loss": 0.9506, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.292418772563177e-06, |
|
"loss": 0.9513, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.282918487554627e-06, |
|
"loss": 0.9185, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.273418202546078e-06, |
|
"loss": 0.9706, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.263917917537527e-06, |
|
"loss": 0.9235, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.254417632528977e-06, |
|
"loss": 0.958, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.244917347520426e-06, |
|
"loss": 0.9587, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.235417062511876e-06, |
|
"loss": 0.9674, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.225916777503325e-06, |
|
"loss": 0.9496, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.216416492494775e-06, |
|
"loss": 0.9579, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.206916207486226e-06, |
|
"loss": 0.9654, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.197415922477675e-06, |
|
"loss": 0.9548, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.187915637469125e-06, |
|
"loss": 0.9631, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.178415352460574e-06, |
|
"loss": 0.9395, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 8.168915067452024e-06, |
|
"loss": 0.9646, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.159414782443475e-06, |
|
"loss": 0.9239, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 8.149914497434924e-06, |
|
"loss": 0.9699, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.140414212426372e-06, |
|
"loss": 0.9407, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.130913927417823e-06, |
|
"loss": 0.9519, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 8.121413642409273e-06, |
|
"loss": 0.9786, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 8.111913357400722e-06, |
|
"loss": 0.9691, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.102413072392172e-06, |
|
"loss": 0.9379, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 8.092912787383623e-06, |
|
"loss": 0.9676, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.083412502375072e-06, |
|
"loss": 0.9751, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 8.073912217366522e-06, |
|
"loss": 0.9705, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.064411932357971e-06, |
|
"loss": 0.9586, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.05491164734942e-06, |
|
"loss": 0.9573, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 8.045411362340872e-06, |
|
"loss": 0.9581, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 8.03591107733232e-06, |
|
"loss": 0.9454, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.026410792323769e-06, |
|
"loss": 0.948, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 8.01691050731522e-06, |
|
"loss": 0.9652, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 8.00741022230667e-06, |
|
"loss": 0.9722, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.99790993729812e-06, |
|
"loss": 0.9283, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 7.988409652289569e-06, |
|
"loss": 0.9244, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.97890936728102e-06, |
|
"loss": 0.9315, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 7.96940908227247e-06, |
|
"loss": 0.9376, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 7.959908797263919e-06, |
|
"loss": 0.9886, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 7.950408512255368e-06, |
|
"loss": 0.9419, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 7.940908227246818e-06, |
|
"loss": 0.9548, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 7.931407942238267e-06, |
|
"loss": 0.9429, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 7.921907657229717e-06, |
|
"loss": 0.9772, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.912597377921338e-06, |
|
"loss": 0.9443, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 7.903097092912788e-06, |
|
"loss": 0.9411, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 7.893596807904237e-06, |
|
"loss": 0.9222, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 7.884096522895689e-06, |
|
"loss": 0.9312, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.874596237887138e-06, |
|
"loss": 0.9408, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 7.865095952878587e-06, |
|
"loss": 0.9164, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 7.855595667870037e-06, |
|
"loss": 0.9823, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 7.846095382861486e-06, |
|
"loss": 0.9457, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.836595097852936e-06, |
|
"loss": 0.9605, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 7.827094812844385e-06, |
|
"loss": 0.9283, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 7.817594527835835e-06, |
|
"loss": 0.9115, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 7.808094242827286e-06, |
|
"loss": 0.9508, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 7.798593957818735e-06, |
|
"loss": 0.9419, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.789093672810185e-06, |
|
"loss": 0.9078, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.779593387801634e-06, |
|
"loss": 0.9313, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.770093102793086e-06, |
|
"loss": 0.9452, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.760592817784535e-06, |
|
"loss": 0.9401, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.751092532775983e-06, |
|
"loss": 0.9278, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.741592247767434e-06, |
|
"loss": 0.931, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.732091962758883e-06, |
|
"loss": 0.9436, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.722591677750333e-06, |
|
"loss": 0.9212, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.713091392741782e-06, |
|
"loss": 0.95, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 7.703591107733232e-06, |
|
"loss": 0.9701, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 7.694090822724683e-06, |
|
"loss": 0.9402, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.684590537716132e-06, |
|
"loss": 0.9419, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 7.675090252707582e-06, |
|
"loss": 0.9436, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 7.665589967699031e-06, |
|
"loss": 0.9464, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 7.656089682690483e-06, |
|
"loss": 0.939, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.64658939768193e-06, |
|
"loss": 0.9839, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 7.63708911267338e-06, |
|
"loss": 0.9176, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.62758882766483e-06, |
|
"loss": 0.8935, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 7.61808854265628e-06, |
|
"loss": 0.9215, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.60858825764773e-06, |
|
"loss": 0.9403, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 7.599087972639179e-06, |
|
"loss": 0.9272, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.58958768763063e-06, |
|
"loss": 0.9528, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 7.580087402622079e-06, |
|
"loss": 0.9039, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.570587117613529e-06, |
|
"loss": 0.9194, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 7.561086832604979e-06, |
|
"loss": 0.9306, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 7.551586547596429e-06, |
|
"loss": 0.9242, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.542086262587879e-06, |
|
"loss": 0.8961, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 7.532585977579327e-06, |
|
"loss": 0.936, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.523085692570778e-06, |
|
"loss": 0.9598, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.513585407562227e-06, |
|
"loss": 0.9114, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.504085122553677e-06, |
|
"loss": 0.9217, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.494584837545127e-06, |
|
"loss": 0.9041, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.485274558236748e-06, |
|
"loss": 0.9399, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.475774273228198e-06, |
|
"loss": 0.9176, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.466273988219647e-06, |
|
"loss": 0.9235, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.456773703211096e-06, |
|
"loss": 0.8947, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.4472734182025465e-06, |
|
"loss": 0.9205, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.437773133193996e-06, |
|
"loss": 0.9188, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.428272848185446e-06, |
|
"loss": 0.9285, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.418772563176896e-06, |
|
"loss": 0.93, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.409272278168346e-06, |
|
"loss": 0.9355, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.3997719931597955e-06, |
|
"loss": 0.9297, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 7.390271708151245e-06, |
|
"loss": 0.9037, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.380771423142695e-06, |
|
"loss": 0.9066, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.371271138134145e-06, |
|
"loss": 0.9081, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.361770853125595e-06, |
|
"loss": 0.9332, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.352270568117044e-06, |
|
"loss": 0.9515, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.342770283108493e-06, |
|
"loss": 0.9192, |
|
"step": 14000 |
|
} |
|
], |
|
"max_steps": 52630, |
|
"num_train_epochs": 10, |
|
"total_flos": 5.352601970064384e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|