{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.6600798023940717, "global_step": 14000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 9.992019760592819e-06, "loss": 1.1564, "step": 50 }, { "epoch": 0.02, "learning_rate": 9.982709481284439e-06, "loss": 1.132, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.97320919627589e-06, "loss": 1.1507, "step": 150 }, { "epoch": 0.04, "learning_rate": 9.96370891126734e-06, "loss": 1.0846, "step": 200 }, { "epoch": 0.05, "learning_rate": 9.954208626258789e-06, "loss": 1.0877, "step": 250 }, { "epoch": 0.06, "learning_rate": 9.944708341250238e-06, "loss": 1.1153, "step": 300 }, { "epoch": 0.07, "learning_rate": 9.935208056241688e-06, "loss": 1.0876, "step": 350 }, { "epoch": 0.08, "learning_rate": 9.925707771233137e-06, "loss": 1.0729, "step": 400 }, { "epoch": 0.09, "learning_rate": 9.916207486224587e-06, "loss": 1.126, "step": 450 }, { "epoch": 0.1, "learning_rate": 9.906707201216036e-06, "loss": 1.1262, "step": 500 }, { "epoch": 0.1, "learning_rate": 9.897206916207487e-06, "loss": 1.0883, "step": 550 }, { "epoch": 0.11, "learning_rate": 9.887706631198937e-06, "loss": 1.0926, "step": 600 }, { "epoch": 0.12, "learning_rate": 9.878206346190386e-06, "loss": 1.0916, "step": 650 }, { "epoch": 0.13, "learning_rate": 9.868706061181836e-06, "loss": 1.0892, "step": 700 }, { "epoch": 0.14, "learning_rate": 9.859205776173287e-06, "loss": 1.0653, "step": 750 }, { "epoch": 0.15, "learning_rate": 9.849705491164736e-06, "loss": 1.0391, "step": 800 }, { "epoch": 0.16, "learning_rate": 9.840205206156186e-06, "loss": 1.0866, "step": 850 }, { "epoch": 0.17, "learning_rate": 9.830704921147635e-06, "loss": 1.1192, "step": 900 }, { "epoch": 0.18, "learning_rate": 9.821204636139085e-06, "loss": 1.0898, "step": 950 }, { "epoch": 0.19, "learning_rate": 9.811704351130534e-06, "loss": 1.0693, "step": 1000 }, { "epoch": 0.2, "learning_rate": 9.802204066121984e-06, "loss": 1.1243, "step": 1050 }, { "epoch": 0.21, "learning_rate": 9.792703781113433e-06, "loss": 1.071, "step": 1100 }, { "epoch": 0.22, "learning_rate": 9.783203496104884e-06, "loss": 1.0569, "step": 1150 }, { "epoch": 0.23, "learning_rate": 9.773703211096334e-06, "loss": 1.0612, "step": 1200 }, { "epoch": 0.24, "learning_rate": 9.764202926087783e-06, "loss": 1.0432, "step": 1250 }, { "epoch": 0.25, "learning_rate": 9.754702641079233e-06, "loss": 1.092, "step": 1300 }, { "epoch": 0.26, "learning_rate": 9.745202356070684e-06, "loss": 1.0602, "step": 1350 }, { "epoch": 0.27, "learning_rate": 9.735702071062133e-06, "loss": 1.0689, "step": 1400 }, { "epoch": 0.28, "learning_rate": 9.726201786053583e-06, "loss": 1.0897, "step": 1450 }, { "epoch": 0.29, "learning_rate": 9.716701501045032e-06, "loss": 1.0482, "step": 1500 }, { "epoch": 0.29, "learning_rate": 9.707201216036482e-06, "loss": 1.1167, "step": 1550 }, { "epoch": 0.3, "learning_rate": 9.697700931027931e-06, "loss": 1.0707, "step": 1600 }, { "epoch": 0.31, "learning_rate": 9.68820064601938e-06, "loss": 1.0613, "step": 1650 }, { "epoch": 0.32, "learning_rate": 9.67870036101083e-06, "loss": 1.0534, "step": 1700 }, { "epoch": 0.33, "learning_rate": 9.669200076002281e-06, "loss": 1.0719, "step": 1750 }, { "epoch": 0.34, "learning_rate": 9.659699790993731e-06, "loss": 1.0678, "step": 1800 }, { "epoch": 0.35, "learning_rate": 9.65019950598518e-06, "loss": 1.064, "step": 1850 }, { "epoch": 0.36, "learning_rate": 9.64069922097663e-06, "loss": 1.0846, "step": 1900 }, { "epoch": 0.37, "learning_rate": 9.631198935968081e-06, "loss": 1.0716, "step": 1950 }, { "epoch": 0.38, "learning_rate": 9.62169865095953e-06, "loss": 1.0847, "step": 2000 }, { "epoch": 0.39, "learning_rate": 9.612198365950978e-06, "loss": 1.0237, "step": 2050 }, { "epoch": 0.4, "learning_rate": 9.60269808094243e-06, "loss": 1.0382, "step": 2100 }, { "epoch": 0.41, "learning_rate": 9.593197795933879e-06, "loss": 1.055, "step": 2150 }, { "epoch": 0.42, "learning_rate": 9.583697510925328e-06, "loss": 1.0257, "step": 2200 }, { "epoch": 0.43, "learning_rate": 9.57438723161695e-06, "loss": 1.0425, "step": 2250 }, { "epoch": 0.44, "learning_rate": 9.5648869466084e-06, "loss": 1.0603, "step": 2300 }, { "epoch": 0.45, "learning_rate": 9.555386661599849e-06, "loss": 1.0195, "step": 2350 }, { "epoch": 0.46, "learning_rate": 9.545886376591299e-06, "loss": 1.0311, "step": 2400 }, { "epoch": 0.47, "learning_rate": 9.536386091582748e-06, "loss": 1.0307, "step": 2450 }, { "epoch": 0.48, "learning_rate": 9.526885806574197e-06, "loss": 1.0965, "step": 2500 }, { "epoch": 0.48, "learning_rate": 9.517385521565647e-06, "loss": 1.0271, "step": 2550 }, { "epoch": 0.49, "learning_rate": 9.507885236557098e-06, "loss": 1.0315, "step": 2600 }, { "epoch": 0.5, "learning_rate": 9.498384951548548e-06, "loss": 1.0245, "step": 2650 }, { "epoch": 0.51, "learning_rate": 9.488884666539997e-06, "loss": 1.0208, "step": 2700 }, { "epoch": 0.52, "learning_rate": 9.479384381531446e-06, "loss": 1.0621, "step": 2750 }, { "epoch": 0.53, "learning_rate": 9.469884096522896e-06, "loss": 1.0774, "step": 2800 }, { "epoch": 0.54, "learning_rate": 9.460383811514347e-06, "loss": 1.0727, "step": 2850 }, { "epoch": 0.55, "learning_rate": 9.450883526505797e-06, "loss": 1.0411, "step": 2900 }, { "epoch": 0.56, "learning_rate": 9.441383241497246e-06, "loss": 1.0088, "step": 2950 }, { "epoch": 0.57, "learning_rate": 9.431882956488696e-06, "loss": 1.0133, "step": 3000 }, { "epoch": 0.58, "learning_rate": 9.422382671480145e-06, "loss": 1.0655, "step": 3050 }, { "epoch": 0.59, "learning_rate": 9.412882386471594e-06, "loss": 1.0241, "step": 3100 }, { "epoch": 0.6, "learning_rate": 9.403382101463044e-06, "loss": 1.0708, "step": 3150 }, { "epoch": 0.61, "learning_rate": 9.393881816454495e-06, "loss": 1.0304, "step": 3200 }, { "epoch": 0.62, "learning_rate": 9.384381531445945e-06, "loss": 1.0155, "step": 3250 }, { "epoch": 0.63, "learning_rate": 9.374881246437394e-06, "loss": 1.0224, "step": 3300 }, { "epoch": 0.64, "learning_rate": 9.365380961428843e-06, "loss": 1.0665, "step": 3350 }, { "epoch": 0.65, "learning_rate": 9.355880676420293e-06, "loss": 1.0238, "step": 3400 }, { "epoch": 0.66, "learning_rate": 9.346380391411744e-06, "loss": 1.1006, "step": 3450 }, { "epoch": 0.67, "learning_rate": 9.336880106403194e-06, "loss": 0.9987, "step": 3500 }, { "epoch": 0.67, "learning_rate": 9.327379821394643e-06, "loss": 1.0001, "step": 3550 }, { "epoch": 0.68, "learning_rate": 9.317879536386093e-06, "loss": 1.0634, "step": 3600 }, { "epoch": 0.69, "learning_rate": 9.308379251377542e-06, "loss": 1.0277, "step": 3650 }, { "epoch": 0.7, "learning_rate": 9.298878966368991e-06, "loss": 1.0311, "step": 3700 }, { "epoch": 0.71, "learning_rate": 9.289378681360441e-06, "loss": 1.0317, "step": 3750 }, { "epoch": 0.72, "learning_rate": 9.279878396351892e-06, "loss": 1.0787, "step": 3800 }, { "epoch": 0.73, "learning_rate": 9.270378111343342e-06, "loss": 1.0583, "step": 3850 }, { "epoch": 0.74, "learning_rate": 9.260877826334791e-06, "loss": 1.0326, "step": 3900 }, { "epoch": 0.75, "learning_rate": 9.25137754132624e-06, "loss": 0.989, "step": 3950 }, { "epoch": 0.76, "learning_rate": 9.24187725631769e-06, "loss": 0.9879, "step": 4000 }, { "epoch": 0.77, "learning_rate": 9.232376971309141e-06, "loss": 1.0399, "step": 4050 }, { "epoch": 0.78, "learning_rate": 9.22287668630059e-06, "loss": 1.0045, "step": 4100 }, { "epoch": 0.79, "learning_rate": 9.213376401292038e-06, "loss": 1.0079, "step": 4150 }, { "epoch": 0.8, "learning_rate": 9.20387611628349e-06, "loss": 0.999, "step": 4200 }, { "epoch": 0.81, "learning_rate": 9.194375831274939e-06, "loss": 0.9867, "step": 4250 }, { "epoch": 0.82, "learning_rate": 9.184875546266388e-06, "loss": 0.9878, "step": 4300 }, { "epoch": 0.83, "learning_rate": 9.175375261257838e-06, "loss": 1.0098, "step": 4350 }, { "epoch": 0.84, "learning_rate": 9.165874976249289e-06, "loss": 1.0325, "step": 4400 }, { "epoch": 0.85, "learning_rate": 9.156374691240739e-06, "loss": 1.0108, "step": 4450 }, { "epoch": 0.86, "learning_rate": 9.146874406232188e-06, "loss": 1.0167, "step": 4500 }, { "epoch": 0.86, "learning_rate": 9.137374121223637e-06, "loss": 0.9871, "step": 4550 }, { "epoch": 0.87, "learning_rate": 9.128063841915258e-06, "loss": 0.983, "step": 4600 }, { "epoch": 0.88, "learning_rate": 9.118563556906707e-06, "loss": 0.9807, "step": 4650 }, { "epoch": 0.89, "learning_rate": 9.109063271898158e-06, "loss": 0.9794, "step": 4700 }, { "epoch": 0.9, "learning_rate": 9.099562986889608e-06, "loss": 1.0505, "step": 4750 }, { "epoch": 0.91, "learning_rate": 9.090062701881057e-06, "loss": 1.0033, "step": 4800 }, { "epoch": 0.92, "learning_rate": 9.080562416872507e-06, "loss": 0.9822, "step": 4850 }, { "epoch": 0.93, "learning_rate": 9.071062131863958e-06, "loss": 1.0046, "step": 4900 }, { "epoch": 0.94, "learning_rate": 9.061561846855407e-06, "loss": 0.9944, "step": 4950 }, { "epoch": 0.95, "learning_rate": 9.052061561846857e-06, "loss": 1.0161, "step": 5000 }, { "epoch": 0.96, "learning_rate": 9.042561276838306e-06, "loss": 1.0239, "step": 5050 }, { "epoch": 0.97, "learning_rate": 9.033060991829756e-06, "loss": 0.978, "step": 5100 }, { "epoch": 0.98, "learning_rate": 9.023560706821205e-06, "loss": 1.0055, "step": 5150 }, { "epoch": 0.99, "learning_rate": 9.014060421812655e-06, "loss": 0.9623, "step": 5200 }, { "epoch": 1.0, "learning_rate": 9.004560136804104e-06, "loss": 0.9563, "step": 5250 }, { "epoch": 1.01, "learning_rate": 8.995059851795555e-06, "loss": 0.9714, "step": 5300 }, { "epoch": 1.02, "learning_rate": 8.985559566787005e-06, "loss": 0.9661, "step": 5350 }, { "epoch": 1.03, "learning_rate": 8.976059281778454e-06, "loss": 0.9727, "step": 5400 }, { "epoch": 1.04, "learning_rate": 8.966558996769904e-06, "loss": 0.9874, "step": 5450 }, { "epoch": 1.05, "learning_rate": 8.957058711761355e-06, "loss": 0.9978, "step": 5500 }, { "epoch": 1.05, "learning_rate": 8.947558426752804e-06, "loss": 0.9971, "step": 5550 }, { "epoch": 1.06, "learning_rate": 8.938058141744254e-06, "loss": 1.017, "step": 5600 }, { "epoch": 1.07, "learning_rate": 8.928557856735703e-06, "loss": 0.9685, "step": 5650 }, { "epoch": 1.08, "learning_rate": 8.919057571727153e-06, "loss": 0.9842, "step": 5700 }, { "epoch": 1.09, "learning_rate": 8.909557286718602e-06, "loss": 0.9858, "step": 5750 }, { "epoch": 1.1, "learning_rate": 8.900057001710052e-06, "loss": 1.0461, "step": 5800 }, { "epoch": 1.11, "learning_rate": 8.890556716701501e-06, "loss": 0.9706, "step": 5850 }, { "epoch": 1.12, "learning_rate": 8.881056431692952e-06, "loss": 0.9727, "step": 5900 }, { "epoch": 1.13, "learning_rate": 8.871556146684402e-06, "loss": 1.0025, "step": 5950 }, { "epoch": 1.14, "learning_rate": 8.862055861675851e-06, "loss": 1.0136, "step": 6000 }, { "epoch": 1.15, "learning_rate": 8.8525555766673e-06, "loss": 0.953, "step": 6050 }, { "epoch": 1.16, "learning_rate": 8.843055291658752e-06, "loss": 0.986, "step": 6100 }, { "epoch": 1.17, "learning_rate": 8.833555006650201e-06, "loss": 0.9878, "step": 6150 }, { "epoch": 1.18, "learning_rate": 8.824054721641649e-06, "loss": 0.9738, "step": 6200 }, { "epoch": 1.19, "learning_rate": 8.8145544366331e-06, "loss": 0.9894, "step": 6250 }, { "epoch": 1.2, "learning_rate": 8.80505415162455e-06, "loss": 0.9732, "step": 6300 }, { "epoch": 1.21, "learning_rate": 8.795553866615999e-06, "loss": 0.9799, "step": 6350 }, { "epoch": 1.22, "learning_rate": 8.786053581607449e-06, "loss": 1.0178, "step": 6400 }, { "epoch": 1.23, "learning_rate": 8.776553296598898e-06, "loss": 0.9576, "step": 6450 }, { "epoch": 1.24, "learning_rate": 8.767053011590349e-06, "loss": 1.0088, "step": 6500 }, { "epoch": 1.24, "learning_rate": 8.757552726581799e-06, "loss": 0.9657, "step": 6550 }, { "epoch": 1.25, "learning_rate": 8.748052441573248e-06, "loss": 0.9517, "step": 6600 }, { "epoch": 1.26, "learning_rate": 8.738552156564698e-06, "loss": 1.0624, "step": 6650 }, { "epoch": 1.27, "learning_rate": 8.729051871556149e-06, "loss": 1.0197, "step": 6700 }, { "epoch": 1.28, "learning_rate": 8.719551586547596e-06, "loss": 0.9647, "step": 6750 }, { "epoch": 1.29, "learning_rate": 8.710051301539046e-06, "loss": 1.0109, "step": 6800 }, { "epoch": 1.3, "learning_rate": 8.700741022230668e-06, "loss": 0.9923, "step": 6850 }, { "epoch": 1.31, "learning_rate": 8.691240737222117e-06, "loss": 0.9976, "step": 6900 }, { "epoch": 1.32, "learning_rate": 8.681740452213567e-06, "loss": 0.9929, "step": 6950 }, { "epoch": 1.33, "learning_rate": 8.672240167205018e-06, "loss": 0.9783, "step": 7000 }, { "epoch": 1.34, "learning_rate": 8.662739882196467e-06, "loss": 0.9503, "step": 7050 }, { "epoch": 1.35, "learning_rate": 8.653239597187917e-06, "loss": 0.9915, "step": 7100 }, { "epoch": 1.36, "learning_rate": 8.643739312179366e-06, "loss": 0.9502, "step": 7150 }, { "epoch": 1.37, "learning_rate": 8.634239027170816e-06, "loss": 0.9867, "step": 7200 }, { "epoch": 1.38, "learning_rate": 8.624738742162265e-06, "loss": 0.9929, "step": 7250 }, { "epoch": 1.39, "learning_rate": 8.615238457153715e-06, "loss": 1.0304, "step": 7300 }, { "epoch": 1.4, "learning_rate": 8.605738172145164e-06, "loss": 0.9841, "step": 7350 }, { "epoch": 1.41, "learning_rate": 8.596237887136615e-06, "loss": 0.9915, "step": 7400 }, { "epoch": 1.42, "learning_rate": 8.586737602128065e-06, "loss": 0.9691, "step": 7450 }, { "epoch": 1.43, "learning_rate": 8.577237317119514e-06, "loss": 0.9655, "step": 7500 }, { "epoch": 1.43, "learning_rate": 8.567737032110964e-06, "loss": 0.9884, "step": 7550 }, { "epoch": 1.44, "learning_rate": 8.558236747102415e-06, "loss": 0.9736, "step": 7600 }, { "epoch": 1.45, "learning_rate": 8.548736462093864e-06, "loss": 0.9462, "step": 7650 }, { "epoch": 1.46, "learning_rate": 8.539236177085312e-06, "loss": 1.0023, "step": 7700 }, { "epoch": 1.47, "learning_rate": 8.529735892076763e-06, "loss": 0.9696, "step": 7750 }, { "epoch": 1.48, "learning_rate": 8.520235607068213e-06, "loss": 0.9498, "step": 7800 }, { "epoch": 1.49, "learning_rate": 8.510735322059662e-06, "loss": 0.9689, "step": 7850 }, { "epoch": 1.5, "learning_rate": 8.501235037051112e-06, "loss": 1.0175, "step": 7900 }, { "epoch": 1.51, "learning_rate": 8.491734752042561e-06, "loss": 0.9783, "step": 7950 }, { "epoch": 1.52, "learning_rate": 8.482234467034012e-06, "loss": 0.9942, "step": 8000 }, { "epoch": 1.53, "learning_rate": 8.472734182025462e-06, "loss": 0.9631, "step": 8050 }, { "epoch": 1.54, "learning_rate": 8.463233897016911e-06, "loss": 0.9417, "step": 8100 }, { "epoch": 1.55, "learning_rate": 8.45373361200836e-06, "loss": 0.9449, "step": 8150 }, { "epoch": 1.56, "learning_rate": 8.444233326999812e-06, "loss": 0.986, "step": 8200 }, { "epoch": 1.57, "learning_rate": 8.434733041991261e-06, "loss": 0.9353, "step": 8250 }, { "epoch": 1.58, "learning_rate": 8.425232756982709e-06, "loss": 0.9693, "step": 8300 }, { "epoch": 1.59, "learning_rate": 8.41573247197416e-06, "loss": 0.966, "step": 8350 }, { "epoch": 1.6, "learning_rate": 8.40623218696561e-06, "loss": 0.968, "step": 8400 }, { "epoch": 1.61, "learning_rate": 8.396731901957059e-06, "loss": 0.9226, "step": 8450 }, { "epoch": 1.62, "learning_rate": 8.387231616948509e-06, "loss": 0.9361, "step": 8500 }, { "epoch": 1.62, "learning_rate": 8.377731331939958e-06, "loss": 0.9552, "step": 8550 }, { "epoch": 1.63, "learning_rate": 8.36823104693141e-06, "loss": 0.9482, "step": 8600 }, { "epoch": 1.64, "learning_rate": 8.358730761922859e-06, "loss": 0.968, "step": 8650 }, { "epoch": 1.65, "learning_rate": 8.349230476914308e-06, "loss": 0.9786, "step": 8700 }, { "epoch": 1.66, "learning_rate": 8.339730191905758e-06, "loss": 0.9615, "step": 8750 }, { "epoch": 1.67, "learning_rate": 8.330229906897209e-06, "loss": 0.964, "step": 8800 }, { "epoch": 1.68, "learning_rate": 8.320729621888657e-06, "loss": 0.9676, "step": 8850 }, { "epoch": 1.69, "learning_rate": 8.311229336880106e-06, "loss": 0.9532, "step": 8900 }, { "epoch": 1.7, "learning_rate": 8.301729051871557e-06, "loss": 0.9506, "step": 8950 }, { "epoch": 1.71, "learning_rate": 8.292418772563177e-06, "loss": 0.9513, "step": 9000 }, { "epoch": 1.72, "learning_rate": 8.282918487554627e-06, "loss": 0.9185, "step": 9050 }, { "epoch": 1.73, "learning_rate": 8.273418202546078e-06, "loss": 0.9706, "step": 9100 }, { "epoch": 1.74, "learning_rate": 8.263917917537527e-06, "loss": 0.9235, "step": 9150 }, { "epoch": 1.75, "learning_rate": 8.254417632528977e-06, "loss": 0.958, "step": 9200 }, { "epoch": 1.76, "learning_rate": 8.244917347520426e-06, "loss": 0.9587, "step": 9250 }, { "epoch": 1.77, "learning_rate": 8.235417062511876e-06, "loss": 0.9674, "step": 9300 }, { "epoch": 1.78, "learning_rate": 8.225916777503325e-06, "loss": 0.9496, "step": 9350 }, { "epoch": 1.79, "learning_rate": 8.216416492494775e-06, "loss": 0.9579, "step": 9400 }, { "epoch": 1.8, "learning_rate": 8.206916207486226e-06, "loss": 0.9654, "step": 9450 }, { "epoch": 1.81, "learning_rate": 8.197415922477675e-06, "loss": 0.9548, "step": 9500 }, { "epoch": 1.81, "learning_rate": 8.187915637469125e-06, "loss": 0.9631, "step": 9550 }, { "epoch": 1.82, "learning_rate": 8.178415352460574e-06, "loss": 0.9395, "step": 9600 }, { "epoch": 1.83, "learning_rate": 8.168915067452024e-06, "loss": 0.9646, "step": 9650 }, { "epoch": 1.84, "learning_rate": 8.159414782443475e-06, "loss": 0.9239, "step": 9700 }, { "epoch": 1.85, "learning_rate": 8.149914497434924e-06, "loss": 0.9699, "step": 9750 }, { "epoch": 1.86, "learning_rate": 8.140414212426372e-06, "loss": 0.9407, "step": 9800 }, { "epoch": 1.87, "learning_rate": 8.130913927417823e-06, "loss": 0.9519, "step": 9850 }, { "epoch": 1.88, "learning_rate": 8.121413642409273e-06, "loss": 0.9786, "step": 9900 }, { "epoch": 1.89, "learning_rate": 8.111913357400722e-06, "loss": 0.9691, "step": 9950 }, { "epoch": 1.9, "learning_rate": 8.102413072392172e-06, "loss": 0.9379, "step": 10000 }, { "epoch": 1.91, "learning_rate": 8.092912787383623e-06, "loss": 0.9676, "step": 10050 }, { "epoch": 1.92, "learning_rate": 8.083412502375072e-06, "loss": 0.9751, "step": 10100 }, { "epoch": 1.93, "learning_rate": 8.073912217366522e-06, "loss": 0.9705, "step": 10150 }, { "epoch": 1.94, "learning_rate": 8.064411932357971e-06, "loss": 0.9586, "step": 10200 }, { "epoch": 1.95, "learning_rate": 8.05491164734942e-06, "loss": 0.9573, "step": 10250 }, { "epoch": 1.96, "learning_rate": 8.045411362340872e-06, "loss": 0.9581, "step": 10300 }, { "epoch": 1.97, "learning_rate": 8.03591107733232e-06, "loss": 0.9454, "step": 10350 }, { "epoch": 1.98, "learning_rate": 8.026410792323769e-06, "loss": 0.948, "step": 10400 }, { "epoch": 1.99, "learning_rate": 8.01691050731522e-06, "loss": 0.9652, "step": 10450 }, { "epoch": 2.0, "learning_rate": 8.00741022230667e-06, "loss": 0.9722, "step": 10500 }, { "epoch": 2.0, "learning_rate": 7.99790993729812e-06, "loss": 0.9283, "step": 10550 }, { "epoch": 2.01, "learning_rate": 7.988409652289569e-06, "loss": 0.9244, "step": 10600 }, { "epoch": 2.02, "learning_rate": 7.97890936728102e-06, "loss": 0.9315, "step": 10650 }, { "epoch": 2.03, "learning_rate": 7.96940908227247e-06, "loss": 0.9376, "step": 10700 }, { "epoch": 2.04, "learning_rate": 7.959908797263919e-06, "loss": 0.9886, "step": 10750 }, { "epoch": 2.05, "learning_rate": 7.950408512255368e-06, "loss": 0.9419, "step": 10800 }, { "epoch": 2.06, "learning_rate": 7.940908227246818e-06, "loss": 0.9548, "step": 10850 }, { "epoch": 2.07, "learning_rate": 7.931407942238267e-06, "loss": 0.9429, "step": 10900 }, { "epoch": 2.08, "learning_rate": 7.921907657229717e-06, "loss": 0.9772, "step": 10950 }, { "epoch": 2.09, "learning_rate": 7.912597377921338e-06, "loss": 0.9443, "step": 11000 }, { "epoch": 2.1, "learning_rate": 7.903097092912788e-06, "loss": 0.9411, "step": 11050 }, { "epoch": 2.11, "learning_rate": 7.893596807904237e-06, "loss": 0.9222, "step": 11100 }, { "epoch": 2.12, "learning_rate": 7.884096522895689e-06, "loss": 0.9312, "step": 11150 }, { "epoch": 2.13, "learning_rate": 7.874596237887138e-06, "loss": 0.9408, "step": 11200 }, { "epoch": 2.14, "learning_rate": 7.865095952878587e-06, "loss": 0.9164, "step": 11250 }, { "epoch": 2.15, "learning_rate": 7.855595667870037e-06, "loss": 0.9823, "step": 11300 }, { "epoch": 2.16, "learning_rate": 7.846095382861486e-06, "loss": 0.9457, "step": 11350 }, { "epoch": 2.17, "learning_rate": 7.836595097852936e-06, "loss": 0.9605, "step": 11400 }, { "epoch": 2.18, "learning_rate": 7.827094812844385e-06, "loss": 0.9283, "step": 11450 }, { "epoch": 2.19, "learning_rate": 7.817594527835835e-06, "loss": 0.9115, "step": 11500 }, { "epoch": 2.19, "learning_rate": 7.808094242827286e-06, "loss": 0.9508, "step": 11550 }, { "epoch": 2.2, "learning_rate": 7.798593957818735e-06, "loss": 0.9419, "step": 11600 }, { "epoch": 2.21, "learning_rate": 7.789093672810185e-06, "loss": 0.9078, "step": 11650 }, { "epoch": 2.22, "learning_rate": 7.779593387801634e-06, "loss": 0.9313, "step": 11700 }, { "epoch": 2.23, "learning_rate": 7.770093102793086e-06, "loss": 0.9452, "step": 11750 }, { "epoch": 2.24, "learning_rate": 7.760592817784535e-06, "loss": 0.9401, "step": 11800 }, { "epoch": 2.25, "learning_rate": 7.751092532775983e-06, "loss": 0.9278, "step": 11850 }, { "epoch": 2.26, "learning_rate": 7.741592247767434e-06, "loss": 0.931, "step": 11900 }, { "epoch": 2.27, "learning_rate": 7.732091962758883e-06, "loss": 0.9436, "step": 11950 }, { "epoch": 2.28, "learning_rate": 7.722591677750333e-06, "loss": 0.9212, "step": 12000 }, { "epoch": 2.29, "learning_rate": 7.713091392741782e-06, "loss": 0.95, "step": 12050 }, { "epoch": 2.3, "learning_rate": 7.703591107733232e-06, "loss": 0.9701, "step": 12100 }, { "epoch": 2.31, "learning_rate": 7.694090822724683e-06, "loss": 0.9402, "step": 12150 }, { "epoch": 2.32, "learning_rate": 7.684590537716132e-06, "loss": 0.9419, "step": 12200 }, { "epoch": 2.33, "learning_rate": 7.675090252707582e-06, "loss": 0.9436, "step": 12250 }, { "epoch": 2.34, "learning_rate": 7.665589967699031e-06, "loss": 0.9464, "step": 12300 }, { "epoch": 2.35, "learning_rate": 7.656089682690483e-06, "loss": 0.939, "step": 12350 }, { "epoch": 2.36, "learning_rate": 7.64658939768193e-06, "loss": 0.9839, "step": 12400 }, { "epoch": 2.37, "learning_rate": 7.63708911267338e-06, "loss": 0.9176, "step": 12450 }, { "epoch": 2.38, "learning_rate": 7.62758882766483e-06, "loss": 0.8935, "step": 12500 }, { "epoch": 2.38, "learning_rate": 7.61808854265628e-06, "loss": 0.9215, "step": 12550 }, { "epoch": 2.39, "learning_rate": 7.60858825764773e-06, "loss": 0.9403, "step": 12600 }, { "epoch": 2.4, "learning_rate": 7.599087972639179e-06, "loss": 0.9272, "step": 12650 }, { "epoch": 2.41, "learning_rate": 7.58958768763063e-06, "loss": 0.9528, "step": 12700 }, { "epoch": 2.42, "learning_rate": 7.580087402622079e-06, "loss": 0.9039, "step": 12750 }, { "epoch": 2.43, "learning_rate": 7.570587117613529e-06, "loss": 0.9194, "step": 12800 }, { "epoch": 2.44, "learning_rate": 7.561086832604979e-06, "loss": 0.9306, "step": 12850 }, { "epoch": 2.45, "learning_rate": 7.551586547596429e-06, "loss": 0.9242, "step": 12900 }, { "epoch": 2.46, "learning_rate": 7.542086262587879e-06, "loss": 0.8961, "step": 12950 }, { "epoch": 2.47, "learning_rate": 7.532585977579327e-06, "loss": 0.936, "step": 13000 }, { "epoch": 2.48, "learning_rate": 7.523085692570778e-06, "loss": 0.9598, "step": 13050 }, { "epoch": 2.49, "learning_rate": 7.513585407562227e-06, "loss": 0.9114, "step": 13100 }, { "epoch": 2.5, "learning_rate": 7.504085122553677e-06, "loss": 0.9217, "step": 13150 }, { "epoch": 2.51, "learning_rate": 7.494584837545127e-06, "loss": 0.9041, "step": 13200 }, { "epoch": 2.52, "learning_rate": 7.485274558236748e-06, "loss": 0.9399, "step": 13250 }, { "epoch": 2.53, "learning_rate": 7.475774273228198e-06, "loss": 0.9176, "step": 13300 }, { "epoch": 2.54, "learning_rate": 7.466273988219647e-06, "loss": 0.9235, "step": 13350 }, { "epoch": 2.55, "learning_rate": 7.456773703211096e-06, "loss": 0.8947, "step": 13400 }, { "epoch": 2.56, "learning_rate": 7.4472734182025465e-06, "loss": 0.9205, "step": 13450 }, { "epoch": 2.57, "learning_rate": 7.437773133193996e-06, "loss": 0.9188, "step": 13500 }, { "epoch": 2.57, "learning_rate": 7.428272848185446e-06, "loss": 0.9285, "step": 13550 }, { "epoch": 2.58, "learning_rate": 7.418772563176896e-06, "loss": 0.93, "step": 13600 }, { "epoch": 2.59, "learning_rate": 7.409272278168346e-06, "loss": 0.9355, "step": 13650 }, { "epoch": 2.6, "learning_rate": 7.3997719931597955e-06, "loss": 0.9297, "step": 13700 }, { "epoch": 2.61, "learning_rate": 7.390271708151245e-06, "loss": 0.9037, "step": 13750 }, { "epoch": 2.62, "learning_rate": 7.380771423142695e-06, "loss": 0.9066, "step": 13800 }, { "epoch": 2.63, "learning_rate": 7.371271138134145e-06, "loss": 0.9081, "step": 13850 }, { "epoch": 2.64, "learning_rate": 7.361770853125595e-06, "loss": 0.9332, "step": 13900 }, { "epoch": 2.65, "learning_rate": 7.352270568117044e-06, "loss": 0.9515, "step": 13950 }, { "epoch": 2.66, "learning_rate": 7.342770283108493e-06, "loss": 0.9192, "step": 14000 } ], "max_steps": 52630, "num_train_epochs": 10, "total_flos": 5.352601970064384e+19, "trial_name": null, "trial_params": null }