Hans-Christian Bøge Pedersen
Training in progress, step 14000
48c9174
raw
history blame
No virus
34.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.6600798023940717,
"global_step": 14000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 9.992019760592819e-06,
"loss": 1.1564,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 9.982709481284439e-06,
"loss": 1.132,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 9.97320919627589e-06,
"loss": 1.1507,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 9.96370891126734e-06,
"loss": 1.0846,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 9.954208626258789e-06,
"loss": 1.0877,
"step": 250
},
{
"epoch": 0.06,
"learning_rate": 9.944708341250238e-06,
"loss": 1.1153,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 9.935208056241688e-06,
"loss": 1.0876,
"step": 350
},
{
"epoch": 0.08,
"learning_rate": 9.925707771233137e-06,
"loss": 1.0729,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 9.916207486224587e-06,
"loss": 1.126,
"step": 450
},
{
"epoch": 0.1,
"learning_rate": 9.906707201216036e-06,
"loss": 1.1262,
"step": 500
},
{
"epoch": 0.1,
"learning_rate": 9.897206916207487e-06,
"loss": 1.0883,
"step": 550
},
{
"epoch": 0.11,
"learning_rate": 9.887706631198937e-06,
"loss": 1.0926,
"step": 600
},
{
"epoch": 0.12,
"learning_rate": 9.878206346190386e-06,
"loss": 1.0916,
"step": 650
},
{
"epoch": 0.13,
"learning_rate": 9.868706061181836e-06,
"loss": 1.0892,
"step": 700
},
{
"epoch": 0.14,
"learning_rate": 9.859205776173287e-06,
"loss": 1.0653,
"step": 750
},
{
"epoch": 0.15,
"learning_rate": 9.849705491164736e-06,
"loss": 1.0391,
"step": 800
},
{
"epoch": 0.16,
"learning_rate": 9.840205206156186e-06,
"loss": 1.0866,
"step": 850
},
{
"epoch": 0.17,
"learning_rate": 9.830704921147635e-06,
"loss": 1.1192,
"step": 900
},
{
"epoch": 0.18,
"learning_rate": 9.821204636139085e-06,
"loss": 1.0898,
"step": 950
},
{
"epoch": 0.19,
"learning_rate": 9.811704351130534e-06,
"loss": 1.0693,
"step": 1000
},
{
"epoch": 0.2,
"learning_rate": 9.802204066121984e-06,
"loss": 1.1243,
"step": 1050
},
{
"epoch": 0.21,
"learning_rate": 9.792703781113433e-06,
"loss": 1.071,
"step": 1100
},
{
"epoch": 0.22,
"learning_rate": 9.783203496104884e-06,
"loss": 1.0569,
"step": 1150
},
{
"epoch": 0.23,
"learning_rate": 9.773703211096334e-06,
"loss": 1.0612,
"step": 1200
},
{
"epoch": 0.24,
"learning_rate": 9.764202926087783e-06,
"loss": 1.0432,
"step": 1250
},
{
"epoch": 0.25,
"learning_rate": 9.754702641079233e-06,
"loss": 1.092,
"step": 1300
},
{
"epoch": 0.26,
"learning_rate": 9.745202356070684e-06,
"loss": 1.0602,
"step": 1350
},
{
"epoch": 0.27,
"learning_rate": 9.735702071062133e-06,
"loss": 1.0689,
"step": 1400
},
{
"epoch": 0.28,
"learning_rate": 9.726201786053583e-06,
"loss": 1.0897,
"step": 1450
},
{
"epoch": 0.29,
"learning_rate": 9.716701501045032e-06,
"loss": 1.0482,
"step": 1500
},
{
"epoch": 0.29,
"learning_rate": 9.707201216036482e-06,
"loss": 1.1167,
"step": 1550
},
{
"epoch": 0.3,
"learning_rate": 9.697700931027931e-06,
"loss": 1.0707,
"step": 1600
},
{
"epoch": 0.31,
"learning_rate": 9.68820064601938e-06,
"loss": 1.0613,
"step": 1650
},
{
"epoch": 0.32,
"learning_rate": 9.67870036101083e-06,
"loss": 1.0534,
"step": 1700
},
{
"epoch": 0.33,
"learning_rate": 9.669200076002281e-06,
"loss": 1.0719,
"step": 1750
},
{
"epoch": 0.34,
"learning_rate": 9.659699790993731e-06,
"loss": 1.0678,
"step": 1800
},
{
"epoch": 0.35,
"learning_rate": 9.65019950598518e-06,
"loss": 1.064,
"step": 1850
},
{
"epoch": 0.36,
"learning_rate": 9.64069922097663e-06,
"loss": 1.0846,
"step": 1900
},
{
"epoch": 0.37,
"learning_rate": 9.631198935968081e-06,
"loss": 1.0716,
"step": 1950
},
{
"epoch": 0.38,
"learning_rate": 9.62169865095953e-06,
"loss": 1.0847,
"step": 2000
},
{
"epoch": 0.39,
"learning_rate": 9.612198365950978e-06,
"loss": 1.0237,
"step": 2050
},
{
"epoch": 0.4,
"learning_rate": 9.60269808094243e-06,
"loss": 1.0382,
"step": 2100
},
{
"epoch": 0.41,
"learning_rate": 9.593197795933879e-06,
"loss": 1.055,
"step": 2150
},
{
"epoch": 0.42,
"learning_rate": 9.583697510925328e-06,
"loss": 1.0257,
"step": 2200
},
{
"epoch": 0.43,
"learning_rate": 9.57438723161695e-06,
"loss": 1.0425,
"step": 2250
},
{
"epoch": 0.44,
"learning_rate": 9.5648869466084e-06,
"loss": 1.0603,
"step": 2300
},
{
"epoch": 0.45,
"learning_rate": 9.555386661599849e-06,
"loss": 1.0195,
"step": 2350
},
{
"epoch": 0.46,
"learning_rate": 9.545886376591299e-06,
"loss": 1.0311,
"step": 2400
},
{
"epoch": 0.47,
"learning_rate": 9.536386091582748e-06,
"loss": 1.0307,
"step": 2450
},
{
"epoch": 0.48,
"learning_rate": 9.526885806574197e-06,
"loss": 1.0965,
"step": 2500
},
{
"epoch": 0.48,
"learning_rate": 9.517385521565647e-06,
"loss": 1.0271,
"step": 2550
},
{
"epoch": 0.49,
"learning_rate": 9.507885236557098e-06,
"loss": 1.0315,
"step": 2600
},
{
"epoch": 0.5,
"learning_rate": 9.498384951548548e-06,
"loss": 1.0245,
"step": 2650
},
{
"epoch": 0.51,
"learning_rate": 9.488884666539997e-06,
"loss": 1.0208,
"step": 2700
},
{
"epoch": 0.52,
"learning_rate": 9.479384381531446e-06,
"loss": 1.0621,
"step": 2750
},
{
"epoch": 0.53,
"learning_rate": 9.469884096522896e-06,
"loss": 1.0774,
"step": 2800
},
{
"epoch": 0.54,
"learning_rate": 9.460383811514347e-06,
"loss": 1.0727,
"step": 2850
},
{
"epoch": 0.55,
"learning_rate": 9.450883526505797e-06,
"loss": 1.0411,
"step": 2900
},
{
"epoch": 0.56,
"learning_rate": 9.441383241497246e-06,
"loss": 1.0088,
"step": 2950
},
{
"epoch": 0.57,
"learning_rate": 9.431882956488696e-06,
"loss": 1.0133,
"step": 3000
},
{
"epoch": 0.58,
"learning_rate": 9.422382671480145e-06,
"loss": 1.0655,
"step": 3050
},
{
"epoch": 0.59,
"learning_rate": 9.412882386471594e-06,
"loss": 1.0241,
"step": 3100
},
{
"epoch": 0.6,
"learning_rate": 9.403382101463044e-06,
"loss": 1.0708,
"step": 3150
},
{
"epoch": 0.61,
"learning_rate": 9.393881816454495e-06,
"loss": 1.0304,
"step": 3200
},
{
"epoch": 0.62,
"learning_rate": 9.384381531445945e-06,
"loss": 1.0155,
"step": 3250
},
{
"epoch": 0.63,
"learning_rate": 9.374881246437394e-06,
"loss": 1.0224,
"step": 3300
},
{
"epoch": 0.64,
"learning_rate": 9.365380961428843e-06,
"loss": 1.0665,
"step": 3350
},
{
"epoch": 0.65,
"learning_rate": 9.355880676420293e-06,
"loss": 1.0238,
"step": 3400
},
{
"epoch": 0.66,
"learning_rate": 9.346380391411744e-06,
"loss": 1.1006,
"step": 3450
},
{
"epoch": 0.67,
"learning_rate": 9.336880106403194e-06,
"loss": 0.9987,
"step": 3500
},
{
"epoch": 0.67,
"learning_rate": 9.327379821394643e-06,
"loss": 1.0001,
"step": 3550
},
{
"epoch": 0.68,
"learning_rate": 9.317879536386093e-06,
"loss": 1.0634,
"step": 3600
},
{
"epoch": 0.69,
"learning_rate": 9.308379251377542e-06,
"loss": 1.0277,
"step": 3650
},
{
"epoch": 0.7,
"learning_rate": 9.298878966368991e-06,
"loss": 1.0311,
"step": 3700
},
{
"epoch": 0.71,
"learning_rate": 9.289378681360441e-06,
"loss": 1.0317,
"step": 3750
},
{
"epoch": 0.72,
"learning_rate": 9.279878396351892e-06,
"loss": 1.0787,
"step": 3800
},
{
"epoch": 0.73,
"learning_rate": 9.270378111343342e-06,
"loss": 1.0583,
"step": 3850
},
{
"epoch": 0.74,
"learning_rate": 9.260877826334791e-06,
"loss": 1.0326,
"step": 3900
},
{
"epoch": 0.75,
"learning_rate": 9.25137754132624e-06,
"loss": 0.989,
"step": 3950
},
{
"epoch": 0.76,
"learning_rate": 9.24187725631769e-06,
"loss": 0.9879,
"step": 4000
},
{
"epoch": 0.77,
"learning_rate": 9.232376971309141e-06,
"loss": 1.0399,
"step": 4050
},
{
"epoch": 0.78,
"learning_rate": 9.22287668630059e-06,
"loss": 1.0045,
"step": 4100
},
{
"epoch": 0.79,
"learning_rate": 9.213376401292038e-06,
"loss": 1.0079,
"step": 4150
},
{
"epoch": 0.8,
"learning_rate": 9.20387611628349e-06,
"loss": 0.999,
"step": 4200
},
{
"epoch": 0.81,
"learning_rate": 9.194375831274939e-06,
"loss": 0.9867,
"step": 4250
},
{
"epoch": 0.82,
"learning_rate": 9.184875546266388e-06,
"loss": 0.9878,
"step": 4300
},
{
"epoch": 0.83,
"learning_rate": 9.175375261257838e-06,
"loss": 1.0098,
"step": 4350
},
{
"epoch": 0.84,
"learning_rate": 9.165874976249289e-06,
"loss": 1.0325,
"step": 4400
},
{
"epoch": 0.85,
"learning_rate": 9.156374691240739e-06,
"loss": 1.0108,
"step": 4450
},
{
"epoch": 0.86,
"learning_rate": 9.146874406232188e-06,
"loss": 1.0167,
"step": 4500
},
{
"epoch": 0.86,
"learning_rate": 9.137374121223637e-06,
"loss": 0.9871,
"step": 4550
},
{
"epoch": 0.87,
"learning_rate": 9.128063841915258e-06,
"loss": 0.983,
"step": 4600
},
{
"epoch": 0.88,
"learning_rate": 9.118563556906707e-06,
"loss": 0.9807,
"step": 4650
},
{
"epoch": 0.89,
"learning_rate": 9.109063271898158e-06,
"loss": 0.9794,
"step": 4700
},
{
"epoch": 0.9,
"learning_rate": 9.099562986889608e-06,
"loss": 1.0505,
"step": 4750
},
{
"epoch": 0.91,
"learning_rate": 9.090062701881057e-06,
"loss": 1.0033,
"step": 4800
},
{
"epoch": 0.92,
"learning_rate": 9.080562416872507e-06,
"loss": 0.9822,
"step": 4850
},
{
"epoch": 0.93,
"learning_rate": 9.071062131863958e-06,
"loss": 1.0046,
"step": 4900
},
{
"epoch": 0.94,
"learning_rate": 9.061561846855407e-06,
"loss": 0.9944,
"step": 4950
},
{
"epoch": 0.95,
"learning_rate": 9.052061561846857e-06,
"loss": 1.0161,
"step": 5000
},
{
"epoch": 0.96,
"learning_rate": 9.042561276838306e-06,
"loss": 1.0239,
"step": 5050
},
{
"epoch": 0.97,
"learning_rate": 9.033060991829756e-06,
"loss": 0.978,
"step": 5100
},
{
"epoch": 0.98,
"learning_rate": 9.023560706821205e-06,
"loss": 1.0055,
"step": 5150
},
{
"epoch": 0.99,
"learning_rate": 9.014060421812655e-06,
"loss": 0.9623,
"step": 5200
},
{
"epoch": 1.0,
"learning_rate": 9.004560136804104e-06,
"loss": 0.9563,
"step": 5250
},
{
"epoch": 1.01,
"learning_rate": 8.995059851795555e-06,
"loss": 0.9714,
"step": 5300
},
{
"epoch": 1.02,
"learning_rate": 8.985559566787005e-06,
"loss": 0.9661,
"step": 5350
},
{
"epoch": 1.03,
"learning_rate": 8.976059281778454e-06,
"loss": 0.9727,
"step": 5400
},
{
"epoch": 1.04,
"learning_rate": 8.966558996769904e-06,
"loss": 0.9874,
"step": 5450
},
{
"epoch": 1.05,
"learning_rate": 8.957058711761355e-06,
"loss": 0.9978,
"step": 5500
},
{
"epoch": 1.05,
"learning_rate": 8.947558426752804e-06,
"loss": 0.9971,
"step": 5550
},
{
"epoch": 1.06,
"learning_rate": 8.938058141744254e-06,
"loss": 1.017,
"step": 5600
},
{
"epoch": 1.07,
"learning_rate": 8.928557856735703e-06,
"loss": 0.9685,
"step": 5650
},
{
"epoch": 1.08,
"learning_rate": 8.919057571727153e-06,
"loss": 0.9842,
"step": 5700
},
{
"epoch": 1.09,
"learning_rate": 8.909557286718602e-06,
"loss": 0.9858,
"step": 5750
},
{
"epoch": 1.1,
"learning_rate": 8.900057001710052e-06,
"loss": 1.0461,
"step": 5800
},
{
"epoch": 1.11,
"learning_rate": 8.890556716701501e-06,
"loss": 0.9706,
"step": 5850
},
{
"epoch": 1.12,
"learning_rate": 8.881056431692952e-06,
"loss": 0.9727,
"step": 5900
},
{
"epoch": 1.13,
"learning_rate": 8.871556146684402e-06,
"loss": 1.0025,
"step": 5950
},
{
"epoch": 1.14,
"learning_rate": 8.862055861675851e-06,
"loss": 1.0136,
"step": 6000
},
{
"epoch": 1.15,
"learning_rate": 8.8525555766673e-06,
"loss": 0.953,
"step": 6050
},
{
"epoch": 1.16,
"learning_rate": 8.843055291658752e-06,
"loss": 0.986,
"step": 6100
},
{
"epoch": 1.17,
"learning_rate": 8.833555006650201e-06,
"loss": 0.9878,
"step": 6150
},
{
"epoch": 1.18,
"learning_rate": 8.824054721641649e-06,
"loss": 0.9738,
"step": 6200
},
{
"epoch": 1.19,
"learning_rate": 8.8145544366331e-06,
"loss": 0.9894,
"step": 6250
},
{
"epoch": 1.2,
"learning_rate": 8.80505415162455e-06,
"loss": 0.9732,
"step": 6300
},
{
"epoch": 1.21,
"learning_rate": 8.795553866615999e-06,
"loss": 0.9799,
"step": 6350
},
{
"epoch": 1.22,
"learning_rate": 8.786053581607449e-06,
"loss": 1.0178,
"step": 6400
},
{
"epoch": 1.23,
"learning_rate": 8.776553296598898e-06,
"loss": 0.9576,
"step": 6450
},
{
"epoch": 1.24,
"learning_rate": 8.767053011590349e-06,
"loss": 1.0088,
"step": 6500
},
{
"epoch": 1.24,
"learning_rate": 8.757552726581799e-06,
"loss": 0.9657,
"step": 6550
},
{
"epoch": 1.25,
"learning_rate": 8.748052441573248e-06,
"loss": 0.9517,
"step": 6600
},
{
"epoch": 1.26,
"learning_rate": 8.738552156564698e-06,
"loss": 1.0624,
"step": 6650
},
{
"epoch": 1.27,
"learning_rate": 8.729051871556149e-06,
"loss": 1.0197,
"step": 6700
},
{
"epoch": 1.28,
"learning_rate": 8.719551586547596e-06,
"loss": 0.9647,
"step": 6750
},
{
"epoch": 1.29,
"learning_rate": 8.710051301539046e-06,
"loss": 1.0109,
"step": 6800
},
{
"epoch": 1.3,
"learning_rate": 8.700741022230668e-06,
"loss": 0.9923,
"step": 6850
},
{
"epoch": 1.31,
"learning_rate": 8.691240737222117e-06,
"loss": 0.9976,
"step": 6900
},
{
"epoch": 1.32,
"learning_rate": 8.681740452213567e-06,
"loss": 0.9929,
"step": 6950
},
{
"epoch": 1.33,
"learning_rate": 8.672240167205018e-06,
"loss": 0.9783,
"step": 7000
},
{
"epoch": 1.34,
"learning_rate": 8.662739882196467e-06,
"loss": 0.9503,
"step": 7050
},
{
"epoch": 1.35,
"learning_rate": 8.653239597187917e-06,
"loss": 0.9915,
"step": 7100
},
{
"epoch": 1.36,
"learning_rate": 8.643739312179366e-06,
"loss": 0.9502,
"step": 7150
},
{
"epoch": 1.37,
"learning_rate": 8.634239027170816e-06,
"loss": 0.9867,
"step": 7200
},
{
"epoch": 1.38,
"learning_rate": 8.624738742162265e-06,
"loss": 0.9929,
"step": 7250
},
{
"epoch": 1.39,
"learning_rate": 8.615238457153715e-06,
"loss": 1.0304,
"step": 7300
},
{
"epoch": 1.4,
"learning_rate": 8.605738172145164e-06,
"loss": 0.9841,
"step": 7350
},
{
"epoch": 1.41,
"learning_rate": 8.596237887136615e-06,
"loss": 0.9915,
"step": 7400
},
{
"epoch": 1.42,
"learning_rate": 8.586737602128065e-06,
"loss": 0.9691,
"step": 7450
},
{
"epoch": 1.43,
"learning_rate": 8.577237317119514e-06,
"loss": 0.9655,
"step": 7500
},
{
"epoch": 1.43,
"learning_rate": 8.567737032110964e-06,
"loss": 0.9884,
"step": 7550
},
{
"epoch": 1.44,
"learning_rate": 8.558236747102415e-06,
"loss": 0.9736,
"step": 7600
},
{
"epoch": 1.45,
"learning_rate": 8.548736462093864e-06,
"loss": 0.9462,
"step": 7650
},
{
"epoch": 1.46,
"learning_rate": 8.539236177085312e-06,
"loss": 1.0023,
"step": 7700
},
{
"epoch": 1.47,
"learning_rate": 8.529735892076763e-06,
"loss": 0.9696,
"step": 7750
},
{
"epoch": 1.48,
"learning_rate": 8.520235607068213e-06,
"loss": 0.9498,
"step": 7800
},
{
"epoch": 1.49,
"learning_rate": 8.510735322059662e-06,
"loss": 0.9689,
"step": 7850
},
{
"epoch": 1.5,
"learning_rate": 8.501235037051112e-06,
"loss": 1.0175,
"step": 7900
},
{
"epoch": 1.51,
"learning_rate": 8.491734752042561e-06,
"loss": 0.9783,
"step": 7950
},
{
"epoch": 1.52,
"learning_rate": 8.482234467034012e-06,
"loss": 0.9942,
"step": 8000
},
{
"epoch": 1.53,
"learning_rate": 8.472734182025462e-06,
"loss": 0.9631,
"step": 8050
},
{
"epoch": 1.54,
"learning_rate": 8.463233897016911e-06,
"loss": 0.9417,
"step": 8100
},
{
"epoch": 1.55,
"learning_rate": 8.45373361200836e-06,
"loss": 0.9449,
"step": 8150
},
{
"epoch": 1.56,
"learning_rate": 8.444233326999812e-06,
"loss": 0.986,
"step": 8200
},
{
"epoch": 1.57,
"learning_rate": 8.434733041991261e-06,
"loss": 0.9353,
"step": 8250
},
{
"epoch": 1.58,
"learning_rate": 8.425232756982709e-06,
"loss": 0.9693,
"step": 8300
},
{
"epoch": 1.59,
"learning_rate": 8.41573247197416e-06,
"loss": 0.966,
"step": 8350
},
{
"epoch": 1.6,
"learning_rate": 8.40623218696561e-06,
"loss": 0.968,
"step": 8400
},
{
"epoch": 1.61,
"learning_rate": 8.396731901957059e-06,
"loss": 0.9226,
"step": 8450
},
{
"epoch": 1.62,
"learning_rate": 8.387231616948509e-06,
"loss": 0.9361,
"step": 8500
},
{
"epoch": 1.62,
"learning_rate": 8.377731331939958e-06,
"loss": 0.9552,
"step": 8550
},
{
"epoch": 1.63,
"learning_rate": 8.36823104693141e-06,
"loss": 0.9482,
"step": 8600
},
{
"epoch": 1.64,
"learning_rate": 8.358730761922859e-06,
"loss": 0.968,
"step": 8650
},
{
"epoch": 1.65,
"learning_rate": 8.349230476914308e-06,
"loss": 0.9786,
"step": 8700
},
{
"epoch": 1.66,
"learning_rate": 8.339730191905758e-06,
"loss": 0.9615,
"step": 8750
},
{
"epoch": 1.67,
"learning_rate": 8.330229906897209e-06,
"loss": 0.964,
"step": 8800
},
{
"epoch": 1.68,
"learning_rate": 8.320729621888657e-06,
"loss": 0.9676,
"step": 8850
},
{
"epoch": 1.69,
"learning_rate": 8.311229336880106e-06,
"loss": 0.9532,
"step": 8900
},
{
"epoch": 1.7,
"learning_rate": 8.301729051871557e-06,
"loss": 0.9506,
"step": 8950
},
{
"epoch": 1.71,
"learning_rate": 8.292418772563177e-06,
"loss": 0.9513,
"step": 9000
},
{
"epoch": 1.72,
"learning_rate": 8.282918487554627e-06,
"loss": 0.9185,
"step": 9050
},
{
"epoch": 1.73,
"learning_rate": 8.273418202546078e-06,
"loss": 0.9706,
"step": 9100
},
{
"epoch": 1.74,
"learning_rate": 8.263917917537527e-06,
"loss": 0.9235,
"step": 9150
},
{
"epoch": 1.75,
"learning_rate": 8.254417632528977e-06,
"loss": 0.958,
"step": 9200
},
{
"epoch": 1.76,
"learning_rate": 8.244917347520426e-06,
"loss": 0.9587,
"step": 9250
},
{
"epoch": 1.77,
"learning_rate": 8.235417062511876e-06,
"loss": 0.9674,
"step": 9300
},
{
"epoch": 1.78,
"learning_rate": 8.225916777503325e-06,
"loss": 0.9496,
"step": 9350
},
{
"epoch": 1.79,
"learning_rate": 8.216416492494775e-06,
"loss": 0.9579,
"step": 9400
},
{
"epoch": 1.8,
"learning_rate": 8.206916207486226e-06,
"loss": 0.9654,
"step": 9450
},
{
"epoch": 1.81,
"learning_rate": 8.197415922477675e-06,
"loss": 0.9548,
"step": 9500
},
{
"epoch": 1.81,
"learning_rate": 8.187915637469125e-06,
"loss": 0.9631,
"step": 9550
},
{
"epoch": 1.82,
"learning_rate": 8.178415352460574e-06,
"loss": 0.9395,
"step": 9600
},
{
"epoch": 1.83,
"learning_rate": 8.168915067452024e-06,
"loss": 0.9646,
"step": 9650
},
{
"epoch": 1.84,
"learning_rate": 8.159414782443475e-06,
"loss": 0.9239,
"step": 9700
},
{
"epoch": 1.85,
"learning_rate": 8.149914497434924e-06,
"loss": 0.9699,
"step": 9750
},
{
"epoch": 1.86,
"learning_rate": 8.140414212426372e-06,
"loss": 0.9407,
"step": 9800
},
{
"epoch": 1.87,
"learning_rate": 8.130913927417823e-06,
"loss": 0.9519,
"step": 9850
},
{
"epoch": 1.88,
"learning_rate": 8.121413642409273e-06,
"loss": 0.9786,
"step": 9900
},
{
"epoch": 1.89,
"learning_rate": 8.111913357400722e-06,
"loss": 0.9691,
"step": 9950
},
{
"epoch": 1.9,
"learning_rate": 8.102413072392172e-06,
"loss": 0.9379,
"step": 10000
},
{
"epoch": 1.91,
"learning_rate": 8.092912787383623e-06,
"loss": 0.9676,
"step": 10050
},
{
"epoch": 1.92,
"learning_rate": 8.083412502375072e-06,
"loss": 0.9751,
"step": 10100
},
{
"epoch": 1.93,
"learning_rate": 8.073912217366522e-06,
"loss": 0.9705,
"step": 10150
},
{
"epoch": 1.94,
"learning_rate": 8.064411932357971e-06,
"loss": 0.9586,
"step": 10200
},
{
"epoch": 1.95,
"learning_rate": 8.05491164734942e-06,
"loss": 0.9573,
"step": 10250
},
{
"epoch": 1.96,
"learning_rate": 8.045411362340872e-06,
"loss": 0.9581,
"step": 10300
},
{
"epoch": 1.97,
"learning_rate": 8.03591107733232e-06,
"loss": 0.9454,
"step": 10350
},
{
"epoch": 1.98,
"learning_rate": 8.026410792323769e-06,
"loss": 0.948,
"step": 10400
},
{
"epoch": 1.99,
"learning_rate": 8.01691050731522e-06,
"loss": 0.9652,
"step": 10450
},
{
"epoch": 2.0,
"learning_rate": 8.00741022230667e-06,
"loss": 0.9722,
"step": 10500
},
{
"epoch": 2.0,
"learning_rate": 7.99790993729812e-06,
"loss": 0.9283,
"step": 10550
},
{
"epoch": 2.01,
"learning_rate": 7.988409652289569e-06,
"loss": 0.9244,
"step": 10600
},
{
"epoch": 2.02,
"learning_rate": 7.97890936728102e-06,
"loss": 0.9315,
"step": 10650
},
{
"epoch": 2.03,
"learning_rate": 7.96940908227247e-06,
"loss": 0.9376,
"step": 10700
},
{
"epoch": 2.04,
"learning_rate": 7.959908797263919e-06,
"loss": 0.9886,
"step": 10750
},
{
"epoch": 2.05,
"learning_rate": 7.950408512255368e-06,
"loss": 0.9419,
"step": 10800
},
{
"epoch": 2.06,
"learning_rate": 7.940908227246818e-06,
"loss": 0.9548,
"step": 10850
},
{
"epoch": 2.07,
"learning_rate": 7.931407942238267e-06,
"loss": 0.9429,
"step": 10900
},
{
"epoch": 2.08,
"learning_rate": 7.921907657229717e-06,
"loss": 0.9772,
"step": 10950
},
{
"epoch": 2.09,
"learning_rate": 7.912597377921338e-06,
"loss": 0.9443,
"step": 11000
},
{
"epoch": 2.1,
"learning_rate": 7.903097092912788e-06,
"loss": 0.9411,
"step": 11050
},
{
"epoch": 2.11,
"learning_rate": 7.893596807904237e-06,
"loss": 0.9222,
"step": 11100
},
{
"epoch": 2.12,
"learning_rate": 7.884096522895689e-06,
"loss": 0.9312,
"step": 11150
},
{
"epoch": 2.13,
"learning_rate": 7.874596237887138e-06,
"loss": 0.9408,
"step": 11200
},
{
"epoch": 2.14,
"learning_rate": 7.865095952878587e-06,
"loss": 0.9164,
"step": 11250
},
{
"epoch": 2.15,
"learning_rate": 7.855595667870037e-06,
"loss": 0.9823,
"step": 11300
},
{
"epoch": 2.16,
"learning_rate": 7.846095382861486e-06,
"loss": 0.9457,
"step": 11350
},
{
"epoch": 2.17,
"learning_rate": 7.836595097852936e-06,
"loss": 0.9605,
"step": 11400
},
{
"epoch": 2.18,
"learning_rate": 7.827094812844385e-06,
"loss": 0.9283,
"step": 11450
},
{
"epoch": 2.19,
"learning_rate": 7.817594527835835e-06,
"loss": 0.9115,
"step": 11500
},
{
"epoch": 2.19,
"learning_rate": 7.808094242827286e-06,
"loss": 0.9508,
"step": 11550
},
{
"epoch": 2.2,
"learning_rate": 7.798593957818735e-06,
"loss": 0.9419,
"step": 11600
},
{
"epoch": 2.21,
"learning_rate": 7.789093672810185e-06,
"loss": 0.9078,
"step": 11650
},
{
"epoch": 2.22,
"learning_rate": 7.779593387801634e-06,
"loss": 0.9313,
"step": 11700
},
{
"epoch": 2.23,
"learning_rate": 7.770093102793086e-06,
"loss": 0.9452,
"step": 11750
},
{
"epoch": 2.24,
"learning_rate": 7.760592817784535e-06,
"loss": 0.9401,
"step": 11800
},
{
"epoch": 2.25,
"learning_rate": 7.751092532775983e-06,
"loss": 0.9278,
"step": 11850
},
{
"epoch": 2.26,
"learning_rate": 7.741592247767434e-06,
"loss": 0.931,
"step": 11900
},
{
"epoch": 2.27,
"learning_rate": 7.732091962758883e-06,
"loss": 0.9436,
"step": 11950
},
{
"epoch": 2.28,
"learning_rate": 7.722591677750333e-06,
"loss": 0.9212,
"step": 12000
},
{
"epoch": 2.29,
"learning_rate": 7.713091392741782e-06,
"loss": 0.95,
"step": 12050
},
{
"epoch": 2.3,
"learning_rate": 7.703591107733232e-06,
"loss": 0.9701,
"step": 12100
},
{
"epoch": 2.31,
"learning_rate": 7.694090822724683e-06,
"loss": 0.9402,
"step": 12150
},
{
"epoch": 2.32,
"learning_rate": 7.684590537716132e-06,
"loss": 0.9419,
"step": 12200
},
{
"epoch": 2.33,
"learning_rate": 7.675090252707582e-06,
"loss": 0.9436,
"step": 12250
},
{
"epoch": 2.34,
"learning_rate": 7.665589967699031e-06,
"loss": 0.9464,
"step": 12300
},
{
"epoch": 2.35,
"learning_rate": 7.656089682690483e-06,
"loss": 0.939,
"step": 12350
},
{
"epoch": 2.36,
"learning_rate": 7.64658939768193e-06,
"loss": 0.9839,
"step": 12400
},
{
"epoch": 2.37,
"learning_rate": 7.63708911267338e-06,
"loss": 0.9176,
"step": 12450
},
{
"epoch": 2.38,
"learning_rate": 7.62758882766483e-06,
"loss": 0.8935,
"step": 12500
},
{
"epoch": 2.38,
"learning_rate": 7.61808854265628e-06,
"loss": 0.9215,
"step": 12550
},
{
"epoch": 2.39,
"learning_rate": 7.60858825764773e-06,
"loss": 0.9403,
"step": 12600
},
{
"epoch": 2.4,
"learning_rate": 7.599087972639179e-06,
"loss": 0.9272,
"step": 12650
},
{
"epoch": 2.41,
"learning_rate": 7.58958768763063e-06,
"loss": 0.9528,
"step": 12700
},
{
"epoch": 2.42,
"learning_rate": 7.580087402622079e-06,
"loss": 0.9039,
"step": 12750
},
{
"epoch": 2.43,
"learning_rate": 7.570587117613529e-06,
"loss": 0.9194,
"step": 12800
},
{
"epoch": 2.44,
"learning_rate": 7.561086832604979e-06,
"loss": 0.9306,
"step": 12850
},
{
"epoch": 2.45,
"learning_rate": 7.551586547596429e-06,
"loss": 0.9242,
"step": 12900
},
{
"epoch": 2.46,
"learning_rate": 7.542086262587879e-06,
"loss": 0.8961,
"step": 12950
},
{
"epoch": 2.47,
"learning_rate": 7.532585977579327e-06,
"loss": 0.936,
"step": 13000
},
{
"epoch": 2.48,
"learning_rate": 7.523085692570778e-06,
"loss": 0.9598,
"step": 13050
},
{
"epoch": 2.49,
"learning_rate": 7.513585407562227e-06,
"loss": 0.9114,
"step": 13100
},
{
"epoch": 2.5,
"learning_rate": 7.504085122553677e-06,
"loss": 0.9217,
"step": 13150
},
{
"epoch": 2.51,
"learning_rate": 7.494584837545127e-06,
"loss": 0.9041,
"step": 13200
},
{
"epoch": 2.52,
"learning_rate": 7.485274558236748e-06,
"loss": 0.9399,
"step": 13250
},
{
"epoch": 2.53,
"learning_rate": 7.475774273228198e-06,
"loss": 0.9176,
"step": 13300
},
{
"epoch": 2.54,
"learning_rate": 7.466273988219647e-06,
"loss": 0.9235,
"step": 13350
},
{
"epoch": 2.55,
"learning_rate": 7.456773703211096e-06,
"loss": 0.8947,
"step": 13400
},
{
"epoch": 2.56,
"learning_rate": 7.4472734182025465e-06,
"loss": 0.9205,
"step": 13450
},
{
"epoch": 2.57,
"learning_rate": 7.437773133193996e-06,
"loss": 0.9188,
"step": 13500
},
{
"epoch": 2.57,
"learning_rate": 7.428272848185446e-06,
"loss": 0.9285,
"step": 13550
},
{
"epoch": 2.58,
"learning_rate": 7.418772563176896e-06,
"loss": 0.93,
"step": 13600
},
{
"epoch": 2.59,
"learning_rate": 7.409272278168346e-06,
"loss": 0.9355,
"step": 13650
},
{
"epoch": 2.6,
"learning_rate": 7.3997719931597955e-06,
"loss": 0.9297,
"step": 13700
},
{
"epoch": 2.61,
"learning_rate": 7.390271708151245e-06,
"loss": 0.9037,
"step": 13750
},
{
"epoch": 2.62,
"learning_rate": 7.380771423142695e-06,
"loss": 0.9066,
"step": 13800
},
{
"epoch": 2.63,
"learning_rate": 7.371271138134145e-06,
"loss": 0.9081,
"step": 13850
},
{
"epoch": 2.64,
"learning_rate": 7.361770853125595e-06,
"loss": 0.9332,
"step": 13900
},
{
"epoch": 2.65,
"learning_rate": 7.352270568117044e-06,
"loss": 0.9515,
"step": 13950
},
{
"epoch": 2.66,
"learning_rate": 7.342770283108493e-06,
"loss": 0.9192,
"step": 14000
}
],
"max_steps": 52630,
"num_train_epochs": 10,
"total_flos": 5.352601970064384e+19,
"trial_name": null,
"trial_params": null
}