| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1626, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0024622960911049553, |
| "grad_norm": 1.5390625, |
| "learning_rate": 2.040816326530612e-10, |
| "loss": 1.3865270614624023, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0049245921822099106, |
| "grad_norm": 4.375, |
| "learning_rate": 6.122448979591837e-10, |
| "loss": 1.8760377168655396, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.007386888273314866, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.020408163265306e-09, |
| "loss": 1.1314038038253784, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.009849184364419821, |
| "grad_norm": 5.71875, |
| "learning_rate": 1.4285714285714286e-09, |
| "loss": 1.8253700733184814, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.012311480455524777, |
| "grad_norm": 12.625, |
| "learning_rate": 1.8367346938775511e-09, |
| "loss": 2.2051210403442383, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.014773776546629732, |
| "grad_norm": 20.375, |
| "learning_rate": 2.2448979591836736e-09, |
| "loss": 2.4439101219177246, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.017236072637734686, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.653061224489796e-09, |
| "loss": 1.3878843784332275, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.019698368728839642, |
| "grad_norm": 1.765625, |
| "learning_rate": 3.0612244897959187e-09, |
| "loss": 1.1822748184204102, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0221606648199446, |
| "grad_norm": 2.53125, |
| "learning_rate": 3.4693877551020408e-09, |
| "loss": 1.1794735193252563, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.024622960911049555, |
| "grad_norm": 14.625, |
| "learning_rate": 3.877551020408163e-09, |
| "loss": 2.3212547302246094, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02708525700215451, |
| "grad_norm": 5.625, |
| "learning_rate": 4.285714285714286e-09, |
| "loss": 1.7700073719024658, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.029547553093259463, |
| "grad_norm": 14.25, |
| "learning_rate": 4.693877551020409e-09, |
| "loss": 2.191647529602051, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.03200984918436442, |
| "grad_norm": 4.15625, |
| "learning_rate": 5.102040816326531e-09, |
| "loss": 1.7301385402679443, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.03447214527546937, |
| "grad_norm": 14.1875, |
| "learning_rate": 5.510204081632653e-09, |
| "loss": 2.343463659286499, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.03693444136657433, |
| "grad_norm": 5.90625, |
| "learning_rate": 5.918367346938776e-09, |
| "loss": 1.2581849098205566, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.039396737457679284, |
| "grad_norm": 5.1875, |
| "learning_rate": 6.326530612244899e-09, |
| "loss": 1.9037660360336304, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.041859033548784244, |
| "grad_norm": 6.25, |
| "learning_rate": 6.73469387755102e-09, |
| "loss": 1.8926417827606201, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0443213296398892, |
| "grad_norm": 4.15625, |
| "learning_rate": 7.142857142857143e-09, |
| "loss": 1.494161605834961, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.04678362573099415, |
| "grad_norm": 72.5, |
| "learning_rate": 7.551020408163264e-09, |
| "loss": 2.4310765266418457, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.04924592182209911, |
| "grad_norm": 13.1875, |
| "learning_rate": 7.959183673469387e-09, |
| "loss": 2.401200294494629, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.05170821791320406, |
| "grad_norm": 17.875, |
| "learning_rate": 8.36734693877551e-09, |
| "loss": 2.269543170928955, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.05417051400430902, |
| "grad_norm": 6.375, |
| "learning_rate": 8.775510204081633e-09, |
| "loss": 1.880392074584961, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.056632810095413974, |
| "grad_norm": 10.9375, |
| "learning_rate": 9.183673469387756e-09, |
| "loss": 2.2891359329223633, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.05909510618651893, |
| "grad_norm": 2.953125, |
| "learning_rate": 9.591836734693877e-09, |
| "loss": 1.245388150215149, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.061557402277623886, |
| "grad_norm": 14.0625, |
| "learning_rate": 1e-08, |
| "loss": 1.8519728183746338, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.06401969836872884, |
| "grad_norm": 12.3125, |
| "learning_rate": 9.99996825131286e-09, |
| "loss": 2.678940773010254, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0664819944598338, |
| "grad_norm": 13.4375, |
| "learning_rate": 9.999873005755431e-09, |
| "loss": 2.3168435096740723, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.06894429055093874, |
| "grad_norm": 23.125, |
| "learning_rate": 9.999714264839672e-09, |
| "loss": 2.218395233154297, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0714065866420437, |
| "grad_norm": 3.265625, |
| "learning_rate": 9.999492031085492e-09, |
| "loss": 1.2967658042907715, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.07386888273314866, |
| "grad_norm": 8.4375, |
| "learning_rate": 9.999206308020707e-09, |
| "loss": 2.0597116947174072, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.07633117882425362, |
| "grad_norm": 3.984375, |
| "learning_rate": 9.99885710018098e-09, |
| "loss": 1.6437733173370361, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.07879347491535857, |
| "grad_norm": 6.9375, |
| "learning_rate": 9.99844441310976e-09, |
| "loss": 1.878865122795105, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.08125577100646353, |
| "grad_norm": 5.34375, |
| "learning_rate": 9.997968253358178e-09, |
| "loss": 1.8909335136413574, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.08371806709756849, |
| "grad_norm": 15.9375, |
| "learning_rate": 9.997428628484963e-09, |
| "loss": 2.290242910385132, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.08618036318867343, |
| "grad_norm": 8.9375, |
| "learning_rate": 9.996825547056302e-09, |
| "loss": 2.0678482055664062, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.0886426592797784, |
| "grad_norm": 5.75, |
| "learning_rate": 9.996159018645721e-09, |
| "loss": 1.8928303718566895, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.09110495537088335, |
| "grad_norm": 7.53125, |
| "learning_rate": 9.995429053833917e-09, |
| "loss": 1.9023447036743164, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.0935672514619883, |
| "grad_norm": 7.59375, |
| "learning_rate": 9.994635664208602e-09, |
| "loss": 1.914489507675171, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.09602954755309326, |
| "grad_norm": 11.125, |
| "learning_rate": 9.99377886236432e-09, |
| "loss": 2.057431221008301, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.09849184364419822, |
| "grad_norm": 7.21875, |
| "learning_rate": 9.992858661902233e-09, |
| "loss": 1.9636759757995605, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10095413973530316, |
| "grad_norm": 4.15625, |
| "learning_rate": 9.99187507742992e-09, |
| "loss": 1.298654317855835, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.10341643582640812, |
| "grad_norm": 2.953125, |
| "learning_rate": 9.990828124561143e-09, |
| "loss": 1.1845377683639526, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.10587873191751308, |
| "grad_norm": 12.0625, |
| "learning_rate": 9.989717819915584e-09, |
| "loss": 2.3120527267456055, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.10834102800861804, |
| "grad_norm": 6.75, |
| "learning_rate": 9.988544181118608e-09, |
| "loss": 1.792182445526123, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.11080332409972299, |
| "grad_norm": 4.03125, |
| "learning_rate": 9.987307226800957e-09, |
| "loss": 1.4169440269470215, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.11326562019082795, |
| "grad_norm": 19.375, |
| "learning_rate": 9.98600697659847e-09, |
| "loss": 2.2629003524780273, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.11572791628193291, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.984643451151764e-09, |
| "loss": 1.8561232089996338, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.11819021237303785, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.98321667210591e-09, |
| "loss": 1.8327598571777344, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.12065250846414281, |
| "grad_norm": 3.34375, |
| "learning_rate": 9.98172666211009e-09, |
| "loss": 1.2463821172714233, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.12311480455524777, |
| "grad_norm": 6.375, |
| "learning_rate": 9.980173444817238e-09, |
| "loss": 1.351346731185913, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.12557710064635272, |
| "grad_norm": 4.0625, |
| "learning_rate": 9.978557044883651e-09, |
| "loss": 1.2666093111038208, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.12803939673745768, |
| "grad_norm": 4.78125, |
| "learning_rate": 9.976877487968623e-09, |
| "loss": 1.905246615409851, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.13050169282856264, |
| "grad_norm": 3.0, |
| "learning_rate": 9.975134800734015e-09, |
| "loss": 1.1379789113998413, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.1329639889196676, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.973329010843847e-09, |
| "loss": 1.8731987476348877, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.13542628501077256, |
| "grad_norm": 10.3125, |
| "learning_rate": 9.97146014696384e-09, |
| "loss": 1.897504448890686, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1378885811018775, |
| "grad_norm": 2.375, |
| "learning_rate": 9.96952823876099e-09, |
| "loss": 1.1055809259414673, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 14.5625, |
| "learning_rate": 9.967533316903066e-09, |
| "loss": 2.4285759925842285, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.1428131732840874, |
| "grad_norm": 6.0625, |
| "learning_rate": 9.965475413058142e-09, |
| "loss": 1.8401623964309692, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.14527546937519237, |
| "grad_norm": 3.625, |
| "learning_rate": 9.963354559894099e-09, |
| "loss": 1.2698298692703247, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.14773776546629733, |
| "grad_norm": 1.6875, |
| "learning_rate": 9.961170791078078e-09, |
| "loss": 1.1040065288543701, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1502000615574023, |
| "grad_norm": 16.375, |
| "learning_rate": 9.958924141275982e-09, |
| "loss": 1.8983745574951172, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.15266235764850725, |
| "grad_norm": 5.125, |
| "learning_rate": 9.956614646151903e-09, |
| "loss": 1.9957232475280762, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.15512465373961218, |
| "grad_norm": 12.75, |
| "learning_rate": 9.954242342367555e-09, |
| "loss": 2.3904964923858643, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.15758694983071714, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.951807267581707e-09, |
| "loss": 1.8866188526153564, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.1600492459218221, |
| "grad_norm": 1.984375, |
| "learning_rate": 9.94930946044957e-09, |
| "loss": 1.2808419466018677, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.16251154201292706, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.946748960622197e-09, |
| "loss": 1.3167526721954346, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.16497383810403202, |
| "grad_norm": 2.4375, |
| "learning_rate": 9.944125808745837e-09, |
| "loss": 1.2127764225006104, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.16743613419513698, |
| "grad_norm": 4.5, |
| "learning_rate": 9.941440046461305e-09, |
| "loss": 1.9335191249847412, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.1698984302862419, |
| "grad_norm": 9.0, |
| "learning_rate": 9.938691716403316e-09, |
| "loss": 1.9803462028503418, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.17236072637734687, |
| "grad_norm": 4.65625, |
| "learning_rate": 9.935880862199809e-09, |
| "loss": 1.820433259010315, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.17482302246845183, |
| "grad_norm": 5.78125, |
| "learning_rate": 9.93300752847124e-09, |
| "loss": 1.9337809085845947, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.1772853185595568, |
| "grad_norm": 5.28125, |
| "learning_rate": 9.930071760829904e-09, |
| "loss": 1.8973931074142456, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.17974761465066175, |
| "grad_norm": 5.40625, |
| "learning_rate": 9.927073605879185e-09, |
| "loss": 1.9531124830245972, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.1822099107417667, |
| "grad_norm": 5.75, |
| "learning_rate": 9.924013111212818e-09, |
| "loss": 1.9310762882232666, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.18467220683287167, |
| "grad_norm": 9.375, |
| "learning_rate": 9.920890325414153e-09, |
| "loss": 2.008820056915283, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.1871345029239766, |
| "grad_norm": 82.5, |
| "learning_rate": 9.917705298055361e-09, |
| "loss": 3.0185141563415527, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.18959679901508156, |
| "grad_norm": 8.625, |
| "learning_rate": 9.914458079696664e-09, |
| "loss": 2.008962631225586, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.19205909510618652, |
| "grad_norm": 9.25, |
| "learning_rate": 9.91114872188552e-09, |
| "loss": 1.6197317838668823, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.19452139119729148, |
| "grad_norm": 4.53125, |
| "learning_rate": 9.907777277155811e-09, |
| "loss": 1.8305246829986572, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.19698368728839644, |
| "grad_norm": 9.75, |
| "learning_rate": 9.904343799027012e-09, |
| "loss": 1.9033877849578857, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1994459833795014, |
| "grad_norm": 8.5, |
| "learning_rate": 9.90084834200333e-09, |
| "loss": 1.9224884510040283, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.20190827947060633, |
| "grad_norm": 5.5, |
| "learning_rate": 9.897290961572854e-09, |
| "loss": 1.5109963417053223, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.2043705755617113, |
| "grad_norm": 6.0625, |
| "learning_rate": 9.893671714206662e-09, |
| "loss": 1.9377520084381104, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.20683287165281625, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.889990657357933e-09, |
| "loss": 1.6958491802215576, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.2092951677439212, |
| "grad_norm": 5.1875, |
| "learning_rate": 9.886247849461023e-09, |
| "loss": 1.320851445198059, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.21175746383502617, |
| "grad_norm": 17.375, |
| "learning_rate": 9.882443349930552e-09, |
| "loss": 2.529175281524658, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.21421975992613113, |
| "grad_norm": 5.53125, |
| "learning_rate": 9.878577219160456e-09, |
| "loss": 1.9636085033416748, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.21668205601723609, |
| "grad_norm": 5.84375, |
| "learning_rate": 9.87464951852302e-09, |
| "loss": 1.9693580865859985, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.21914435210834102, |
| "grad_norm": 8.5, |
| "learning_rate": 9.870660310367915e-09, |
| "loss": 1.955024242401123, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.22160664819944598, |
| "grad_norm": 11.5, |
| "learning_rate": 9.866609658021202e-09, |
| "loss": 2.3577377796173096, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.22406894429055094, |
| "grad_norm": 14.1875, |
| "learning_rate": 9.862497625784324e-09, |
| "loss": 2.3302321434020996, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.2265312403816559, |
| "grad_norm": 5.40625, |
| "learning_rate": 9.8583242789331e-09, |
| "loss": 1.872032642364502, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.22899353647276086, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.854089683716666e-09, |
| "loss": 1.9843339920043945, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.23145583256386582, |
| "grad_norm": 6.375, |
| "learning_rate": 9.849793907356444e-09, |
| "loss": 1.8600096702575684, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.23391812865497075, |
| "grad_norm": 11.0, |
| "learning_rate": 9.845437018045063e-09, |
| "loss": 2.281198024749756, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2363804247460757, |
| "grad_norm": 4.34375, |
| "learning_rate": 9.841019084945281e-09, |
| "loss": 1.8489793539047241, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.23884272083718067, |
| "grad_norm": 4.40625, |
| "learning_rate": 9.836540178188888e-09, |
| "loss": 1.8184915781021118, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.24130501692828563, |
| "grad_norm": 39.5, |
| "learning_rate": 9.832000368875586e-09, |
| "loss": 2.5119130611419678, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.24376731301939059, |
| "grad_norm": 42.25, |
| "learning_rate": 9.82739972907187e-09, |
| "loss": 1.7983183860778809, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.24622960911049555, |
| "grad_norm": 7.9375, |
| "learning_rate": 9.822738331809873e-09, |
| "loss": 1.8701186180114746, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2486919052016005, |
| "grad_norm": 10.8125, |
| "learning_rate": 9.818016251086222e-09, |
| "loss": 2.0227789878845215, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.25115420129270544, |
| "grad_norm": 9.625, |
| "learning_rate": 9.813233561860844e-09, |
| "loss": 2.185953140258789, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.2536164973838104, |
| "grad_norm": 5.0, |
| "learning_rate": 9.808390340055792e-09, |
| "loss": 1.850534439086914, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.25607879347491536, |
| "grad_norm": 5.125, |
| "learning_rate": 9.803486662554038e-09, |
| "loss": 1.9469786882400513, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.2585410895660203, |
| "grad_norm": 4.03125, |
| "learning_rate": 9.798522607198235e-09, |
| "loss": 1.7527638673782349, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2610033856571253, |
| "grad_norm": 4.75, |
| "learning_rate": 9.79349825278951e-09, |
| "loss": 1.9203780889511108, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.2634656817482302, |
| "grad_norm": 4.53125, |
| "learning_rate": 9.788413679086188e-09, |
| "loss": 1.8700388669967651, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.2659279778393352, |
| "grad_norm": 5.78125, |
| "learning_rate": 9.783268966802539e-09, |
| "loss": 2.030698299407959, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.2683902739304401, |
| "grad_norm": 7.375, |
| "learning_rate": 9.778064197607495e-09, |
| "loss": 1.936469316482544, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.2708525700215451, |
| "grad_norm": 18.875, |
| "learning_rate": 9.772799454123349e-09, |
| "loss": 2.471208095550537, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.27331486611265005, |
| "grad_norm": 13.25, |
| "learning_rate": 9.767474819924447e-09, |
| "loss": 2.437526226043701, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.275777162203755, |
| "grad_norm": 6.5625, |
| "learning_rate": 9.762090379535862e-09, |
| "loss": 2.013521909713745, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.27823945829485996, |
| "grad_norm": 14.0625, |
| "learning_rate": 9.756646218432053e-09, |
| "loss": 2.0168678760528564, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 5.8125, |
| "learning_rate": 9.751142423035501e-09, |
| "loss": 1.995202660560608, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.2831640504770699, |
| "grad_norm": 42.5, |
| "learning_rate": 9.74557908071535e-09, |
| "loss": 1.953993320465088, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2856263465681748, |
| "grad_norm": 2.46875, |
| "learning_rate": 9.739956279786e-09, |
| "loss": 1.149980068206787, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.2880886426592798, |
| "grad_norm": 4.21875, |
| "learning_rate": 9.734274109505729e-09, |
| "loss": 1.7589616775512695, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.29055093875038474, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.72853266007526e-09, |
| "loss": 1.9171326160430908, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.29301323484148967, |
| "grad_norm": 11.4375, |
| "learning_rate": 9.722732022636333e-09, |
| "loss": 1.6742775440216064, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.29547553093259465, |
| "grad_norm": 4.78125, |
| "learning_rate": 9.716872289270262e-09, |
| "loss": 1.7873895168304443, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2979378270236996, |
| "grad_norm": 4.40625, |
| "learning_rate": 9.710953552996464e-09, |
| "loss": 1.9001209735870361, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.3004001231148046, |
| "grad_norm": 4.78125, |
| "learning_rate": 9.704975907770995e-09, |
| "loss": 1.869600534439087, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.3028624192059095, |
| "grad_norm": 3.46875, |
| "learning_rate": 9.69893944848505e-09, |
| "loss": 1.5148907899856567, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.3053247152970145, |
| "grad_norm": 14.6875, |
| "learning_rate": 9.69284427096345e-09, |
| "loss": 1.914973497390747, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.3077870113881194, |
| "grad_norm": 13.125, |
| "learning_rate": 9.686690471963147e-09, |
| "loss": 2.230684757232666, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.31024930747922436, |
| "grad_norm": 7.34375, |
| "learning_rate": 9.680478149171657e-09, |
| "loss": 2.0974578857421875, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.31271160357032934, |
| "grad_norm": 13.5625, |
| "learning_rate": 9.674207401205524e-09, |
| "loss": 2.2117700576782227, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.3151738996614343, |
| "grad_norm": 5.25, |
| "learning_rate": 9.667878327608756e-09, |
| "loss": 1.8505613803863525, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.31763619575253926, |
| "grad_norm": 14.25, |
| "learning_rate": 9.661491028851246e-09, |
| "loss": 1.7967166900634766, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.3200984918436442, |
| "grad_norm": 4.0625, |
| "learning_rate": 9.655045606327165e-09, |
| "loss": 1.869051456451416, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3225607879347491, |
| "grad_norm": 9.0625, |
| "learning_rate": 9.648542162353366e-09, |
| "loss": 1.876924753189087, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.3250230840258541, |
| "grad_norm": 5.21875, |
| "learning_rate": 9.64198080016775e-09, |
| "loss": 2.0315141677856445, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.32748538011695905, |
| "grad_norm": 8.5625, |
| "learning_rate": 9.635361623927643e-09, |
| "loss": 2.1542179584503174, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.32994767620806403, |
| "grad_norm": 3.140625, |
| "learning_rate": 9.62868473870811e-09, |
| "loss": 1.1597316265106201, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.33240997229916897, |
| "grad_norm": 10.5, |
| "learning_rate": 9.621950250500333e-09, |
| "loss": 2.637326717376709, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.33487226839027395, |
| "grad_norm": 2.859375, |
| "learning_rate": 9.615158266209887e-09, |
| "loss": 1.283077597618103, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.3373345644813789, |
| "grad_norm": 7.125, |
| "learning_rate": 9.608308893655061e-09, |
| "loss": 2.046065092086792, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.3397968605724838, |
| "grad_norm": 2.953125, |
| "learning_rate": 9.601402241565154e-09, |
| "loss": 1.1603574752807617, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.3422591566635888, |
| "grad_norm": 5.34375, |
| "learning_rate": 9.59443841957873e-09, |
| "loss": 1.7637038230895996, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.34472145275469374, |
| "grad_norm": 5.21875, |
| "learning_rate": 9.587417538241892e-09, |
| "loss": 1.938485860824585, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.3471837488457987, |
| "grad_norm": 29.0, |
| "learning_rate": 9.580339709006524e-09, |
| "loss": 2.3233187198638916, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.34964604493690365, |
| "grad_norm": 6.0, |
| "learning_rate": 9.573205044228518e-09, |
| "loss": 1.4073760509490967, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.35210834102800864, |
| "grad_norm": 6.375, |
| "learning_rate": 9.566013657165994e-09, |
| "loss": 1.3963334560394287, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.3545706371191136, |
| "grad_norm": 6.8125, |
| "learning_rate": 9.558765661977503e-09, |
| "loss": 1.9514954090118408, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.3570329332102185, |
| "grad_norm": 5.75, |
| "learning_rate": 9.551461173720208e-09, |
| "loss": 2.0840539932250977, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3594952293013235, |
| "grad_norm": 11.9375, |
| "learning_rate": 9.544100308348067e-09, |
| "loss": 2.2709197998046875, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.3619575253924284, |
| "grad_norm": 12.3125, |
| "learning_rate": 9.536683182709986e-09, |
| "loss": 2.443535327911377, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.3644198214835334, |
| "grad_norm": 18.875, |
| "learning_rate": 9.529209914547962e-09, |
| "loss": 2.240347385406494, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.36688211757463834, |
| "grad_norm": 12.375, |
| "learning_rate": 9.521680622495228e-09, |
| "loss": 2.1307570934295654, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.36934441366574333, |
| "grad_norm": 11.8125, |
| "learning_rate": 9.514095426074347e-09, |
| "loss": 2.510369062423706, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.37180670975684826, |
| "grad_norm": 5.03125, |
| "learning_rate": 9.506454445695337e-09, |
| "loss": 1.9031611680984497, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.3742690058479532, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.498757802653741e-09, |
| "loss": 1.2329223155975342, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.3767313019390582, |
| "grad_norm": 5.28125, |
| "learning_rate": 9.491005619128721e-09, |
| "loss": 1.8155068159103394, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.3791935980301631, |
| "grad_norm": 7.625, |
| "learning_rate": 9.483198018181099e-09, |
| "loss": 1.736093282699585, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.3816558941212681, |
| "grad_norm": 13.5625, |
| "learning_rate": 9.475335123751412e-09, |
| "loss": 1.9234977960586548, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.38411819021237303, |
| "grad_norm": 8.5, |
| "learning_rate": 9.467417060657952e-09, |
| "loss": 1.9270076751708984, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.38658048630347797, |
| "grad_norm": 4.0625, |
| "learning_rate": 9.459443954594769e-09, |
| "loss": 1.350337028503418, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.38904278239458295, |
| "grad_norm": 2.609375, |
| "learning_rate": 9.451415932129692e-09, |
| "loss": 1.1429853439331055, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.3915050784856879, |
| "grad_norm": 4.90625, |
| "learning_rate": 9.443333120702307e-09, |
| "loss": 1.8531888723373413, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.3939673745767929, |
| "grad_norm": 3.0625, |
| "learning_rate": 9.435195648621935e-09, |
| "loss": 1.3913381099700928, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.3964296706678978, |
| "grad_norm": 5.15625, |
| "learning_rate": 9.42700364506561e-09, |
| "loss": 1.8761987686157227, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.3988919667590028, |
| "grad_norm": 4.4375, |
| "learning_rate": 9.418757240076008e-09, |
| "loss": 1.9191958904266357, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.4013542628501077, |
| "grad_norm": 2.75, |
| "learning_rate": 9.410456564559393e-09, |
| "loss": 1.175315260887146, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.40381655894121266, |
| "grad_norm": 12.375, |
| "learning_rate": 9.402101750283545e-09, |
| "loss": 2.3216049671173096, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.40627885503231764, |
| "grad_norm": 2.265625, |
| "learning_rate": 9.39369292987565e-09, |
| "loss": 1.1453694105148315, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4087411511234226, |
| "grad_norm": 5.71875, |
| "learning_rate": 9.38523023682022e-09, |
| "loss": 1.9262512922286987, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.41120344721452756, |
| "grad_norm": 10.375, |
| "learning_rate": 9.376713805456945e-09, |
| "loss": 2.126582622528076, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.4136657433056325, |
| "grad_norm": 2.609375, |
| "learning_rate": 9.368143770978586e-09, |
| "loss": 1.1786751747131348, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.4161280393967375, |
| "grad_norm": 8.125, |
| "learning_rate": 9.359520269428812e-09, |
| "loss": 2.126143217086792, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.4185903354878424, |
| "grad_norm": 2.390625, |
| "learning_rate": 9.350843437700052e-09, |
| "loss": 1.245577335357666, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 29.625, |
| "learning_rate": 9.342113413531315e-09, |
| "loss": 2.009819507598877, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.42351492767005233, |
| "grad_norm": 3.875, |
| "learning_rate": 9.333330335506001e-09, |
| "loss": 1.1387863159179688, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.42597722376115726, |
| "grad_norm": 12.5625, |
| "learning_rate": 9.324494343049707e-09, |
| "loss": 2.2192680835723877, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.42843951985226225, |
| "grad_norm": 11.0, |
| "learning_rate": 9.315605576428018e-09, |
| "loss": 1.939860463142395, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.4309018159433672, |
| "grad_norm": 13.8125, |
| "learning_rate": 9.306664176744266e-09, |
| "loss": 2.318619728088379, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.43336411203447217, |
| "grad_norm": 3.15625, |
| "learning_rate": 9.297670285937303e-09, |
| "loss": 1.0619254112243652, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.4358264081255771, |
| "grad_norm": 5.4375, |
| "learning_rate": 9.288624046779241e-09, |
| "loss": 1.834202766418457, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.43828870421668203, |
| "grad_norm": 9.8125, |
| "learning_rate": 9.279525602873189e-09, |
| "loss": 1.9926815032958984, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.440751000307787, |
| "grad_norm": 4.8125, |
| "learning_rate": 9.27037509865097e-09, |
| "loss": 1.9792507886886597, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.44321329639889195, |
| "grad_norm": 9.125, |
| "learning_rate": 9.26117267937083e-09, |
| "loss": 1.5881253480911255, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.44567559248999694, |
| "grad_norm": 15.5, |
| "learning_rate": 9.251918491115142e-09, |
| "loss": 2.488168239593506, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.4481378885811019, |
| "grad_norm": 4.8125, |
| "learning_rate": 9.242612680788061e-09, |
| "loss": 1.9684348106384277, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.45060018467220686, |
| "grad_norm": 9.375, |
| "learning_rate": 9.233255396113223e-09, |
| "loss": 2.305130958557129, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.4530624807633118, |
| "grad_norm": 16.75, |
| "learning_rate": 9.223846785631378e-09, |
| "loss": 2.335341215133667, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.4555247768544167, |
| "grad_norm": 5.25, |
| "learning_rate": 9.214386998698039e-09, |
| "loss": 1.7638440132141113, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4579870729455217, |
| "grad_norm": 5.53125, |
| "learning_rate": 9.20487618548112e-09, |
| "loss": 1.7996431589126587, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.46044936903662664, |
| "grad_norm": 4.6875, |
| "learning_rate": 9.195314496958531e-09, |
| "loss": 1.7842280864715576, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.46291166512773163, |
| "grad_norm": 38.0, |
| "learning_rate": 9.185702084915805e-09, |
| "loss": 2.152765989303589, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.46537396121883656, |
| "grad_norm": 5.125, |
| "learning_rate": 9.176039101943672e-09, |
| "loss": 1.7519220113754272, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.4678362573099415, |
| "grad_norm": 45.0, |
| "learning_rate": 9.166325701435644e-09, |
| "loss": 2.9101526737213135, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4702985534010465, |
| "grad_norm": 12.9375, |
| "learning_rate": 9.156562037585576e-09, |
| "loss": 2.2048463821411133, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.4727608494921514, |
| "grad_norm": 5.4375, |
| "learning_rate": 9.146748265385223e-09, |
| "loss": 1.8226771354675293, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.4752231455832564, |
| "grad_norm": 16.0, |
| "learning_rate": 9.13688454062178e-09, |
| "loss": 2.297773838043213, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.47768544167436133, |
| "grad_norm": 16.375, |
| "learning_rate": 9.126971019875397e-09, |
| "loss": 2.2794573307037354, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.4801477377654663, |
| "grad_norm": 8.1875, |
| "learning_rate": 9.117007860516713e-09, |
| "loss": 1.2689777612686157, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.48261003385657125, |
| "grad_norm": 10.4375, |
| "learning_rate": 9.106995220704344e-09, |
| "loss": 2.273574113845825, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.4850723299476762, |
| "grad_norm": 4.3125, |
| "learning_rate": 9.09693325938237e-09, |
| "loss": 1.7581639289855957, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.48753462603878117, |
| "grad_norm": 4.25, |
| "learning_rate": 9.08682213627782e-09, |
| "loss": 1.8824234008789062, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.4899969221298861, |
| "grad_norm": 40.0, |
| "learning_rate": 9.076662011898145e-09, |
| "loss": 2.692976951599121, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.4924592182209911, |
| "grad_norm": 5.0625, |
| "learning_rate": 9.066453047528642e-09, |
| "loss": 1.951959490776062, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.494921514312096, |
| "grad_norm": 19.125, |
| "learning_rate": 9.056195405229922e-09, |
| "loss": 2.419041156768799, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.497383810403201, |
| "grad_norm": 4.3125, |
| "learning_rate": 9.045889247835322e-09, |
| "loss": 1.7131880521774292, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.49984610649430594, |
| "grad_norm": 2.875, |
| "learning_rate": 9.035534738948328e-09, |
| "loss": 1.2638614177703857, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.5023084025854109, |
| "grad_norm": 6.90625, |
| "learning_rate": 9.02513204293997e-09, |
| "loss": 1.8727983236312866, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.5047706986765158, |
| "grad_norm": 2.203125, |
| "learning_rate": 9.014681324946216e-09, |
| "loss": 1.1091878414154053, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5072329947676208, |
| "grad_norm": 5.5625, |
| "learning_rate": 9.004182750865357e-09, |
| "loss": 2.032684326171875, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.5096952908587258, |
| "grad_norm": 3.25, |
| "learning_rate": 8.993636487355366e-09, |
| "loss": 1.4393967390060425, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.5121575869498307, |
| "grad_norm": 15.4375, |
| "learning_rate": 8.98304270183125e-09, |
| "loss": 2.364288806915283, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.5146198830409356, |
| "grad_norm": 6.84375, |
| "learning_rate": 8.9724015624624e-09, |
| "loss": 1.4677906036376953, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.5170821791320406, |
| "grad_norm": 4.53125, |
| "learning_rate": 8.961713238169922e-09, |
| "loss": 1.9610824584960938, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5195444752231456, |
| "grad_norm": 6.1875, |
| "learning_rate": 8.950977898623947e-09, |
| "loss": 1.8107311725616455, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.5220067713142506, |
| "grad_norm": 2.859375, |
| "learning_rate": 8.940195714240937e-09, |
| "loss": 1.2439892292022705, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.5244690674053555, |
| "grad_norm": 9.75, |
| "learning_rate": 8.929366856181003e-09, |
| "loss": 1.985514521598816, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.5269313634964604, |
| "grad_norm": 3.703125, |
| "learning_rate": 8.918491496345149e-09, |
| "loss": 1.8395881652832031, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.5293936595875655, |
| "grad_norm": 3.421875, |
| "learning_rate": 8.907569807372576e-09, |
| "loss": 1.2282559871673584, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5318559556786704, |
| "grad_norm": 4.75, |
| "learning_rate": 8.896601962637927e-09, |
| "loss": 1.9522662162780762, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.5343182517697753, |
| "grad_norm": 6.4375, |
| "learning_rate": 8.885588136248539e-09, |
| "loss": 1.831364631652832, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.5367805478608803, |
| "grad_norm": 3.21875, |
| "learning_rate": 8.874528503041674e-09, |
| "loss": 1.3392367362976074, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.5392428439519852, |
| "grad_norm": 2.03125, |
| "learning_rate": 8.86342323858175e-09, |
| "loss": 1.154931664466858, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.5417051400430902, |
| "grad_norm": 2.84375, |
| "learning_rate": 8.852272519157554e-09, |
| "loss": 1.1106712818145752, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5441674361341952, |
| "grad_norm": 12.6875, |
| "learning_rate": 8.841076521779431e-09, |
| "loss": 2.266367197036743, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.5466297322253001, |
| "grad_norm": 6.78125, |
| "learning_rate": 8.829835424176495e-09, |
| "loss": 1.9257324934005737, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.549092028316405, |
| "grad_norm": 7.6875, |
| "learning_rate": 8.81854940479379e-09, |
| "loss": 1.2584561109542847, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.55155432440751, |
| "grad_norm": 8.3125, |
| "learning_rate": 8.807218642789463e-09, |
| "loss": 2.150424003601074, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.554016620498615, |
| "grad_norm": 3.6875, |
| "learning_rate": 8.795843318031926e-09, |
| "loss": 1.100125789642334, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5564789165897199, |
| "grad_norm": 4.71875, |
| "learning_rate": 8.78442361109699e-09, |
| "loss": 1.8502240180969238, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.5589412126808249, |
| "grad_norm": 4.625, |
| "learning_rate": 8.772959703265008e-09, |
| "loss": 1.7188208103179932, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 2.25, |
| "learning_rate": 8.76145177651799e-09, |
| "loss": 1.1569561958312988, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.5638658048630347, |
| "grad_norm": 13.5, |
| "learning_rate": 8.74990001353672e-09, |
| "loss": 2.2237837314605713, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.5663281009541398, |
| "grad_norm": 2.625, |
| "learning_rate": 8.738304597697855e-09, |
| "loss": 1.2278821468353271, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5687903970452447, |
| "grad_norm": 2.984375, |
| "learning_rate": 8.726665713071004e-09, |
| "loss": 1.4073512554168701, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.5712526931363496, |
| "grad_norm": 12.375, |
| "learning_rate": 8.714983544415824e-09, |
| "loss": 2.3128976821899414, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.5737149892274546, |
| "grad_norm": 13.3125, |
| "learning_rate": 8.703258277179076e-09, |
| "loss": 2.249760627746582, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.5761772853185596, |
| "grad_norm": 5.75, |
| "learning_rate": 8.691490097491676e-09, |
| "loss": 1.949746012687683, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.5786395814096645, |
| "grad_norm": 8.0625, |
| "learning_rate": 8.679679192165755e-09, |
| "loss": 2.0255026817321777, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5811018775007695, |
| "grad_norm": 2.953125, |
| "learning_rate": 8.667825748691678e-09, |
| "loss": 1.172034502029419, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.5835641735918744, |
| "grad_norm": 13.4375, |
| "learning_rate": 8.655929955235084e-09, |
| "loss": 1.7464905977249146, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.5860264696829793, |
| "grad_norm": 4.875, |
| "learning_rate": 8.643992000633882e-09, |
| "loss": 1.7516231536865234, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.5884887657740844, |
| "grad_norm": 13.6875, |
| "learning_rate": 8.632012074395267e-09, |
| "loss": 1.9086973667144775, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.5909510618651893, |
| "grad_norm": 20.375, |
| "learning_rate": 8.619990366692703e-09, |
| "loss": 1.120478630065918, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.5934133579562942, |
| "grad_norm": 5.40625, |
| "learning_rate": 8.607927068362909e-09, |
| "loss": 1.8365321159362793, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.5958756540473992, |
| "grad_norm": 4.21875, |
| "learning_rate": 8.595822370902824e-09, |
| "loss": 1.8781213760375977, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.5983379501385041, |
| "grad_norm": 5.09375, |
| "learning_rate": 8.583676466466578e-09, |
| "loss": 1.8990083932876587, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.6008002462296091, |
| "grad_norm": 9.25, |
| "learning_rate": 8.571489547862432e-09, |
| "loss": 2.005687713623047, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.6032625423207141, |
| "grad_norm": 11.75, |
| "learning_rate": 8.559261808549717e-09, |
| "loss": 2.288544178009033, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.605724838411819, |
| "grad_norm": 12.0625, |
| "learning_rate": 8.546993442635767e-09, |
| "loss": 1.9239308834075928, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.6081871345029239, |
| "grad_norm": 3.203125, |
| "learning_rate": 8.534684644872836e-09, |
| "loss": 1.2520358562469482, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.610649430594029, |
| "grad_norm": 7.65625, |
| "learning_rate": 8.522335610655014e-09, |
| "loss": 2.1090569496154785, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.6131117266851339, |
| "grad_norm": 10.3125, |
| "learning_rate": 8.509946536015109e-09, |
| "loss": 2.2030882835388184, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.6155740227762388, |
| "grad_norm": 18.75, |
| "learning_rate": 8.497517617621549e-09, |
| "loss": 2.205538034439087, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6180363188673438, |
| "grad_norm": 3.484375, |
| "learning_rate": 8.485049052775255e-09, |
| "loss": 1.5225834846496582, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.6204986149584487, |
| "grad_norm": 4.8125, |
| "learning_rate": 8.472541039406509e-09, |
| "loss": 1.8662419319152832, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.6229609110495538, |
| "grad_norm": 3.3125, |
| "learning_rate": 8.459993776071815e-09, |
| "loss": 1.5459778308868408, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.6254232071406587, |
| "grad_norm": 2.359375, |
| "learning_rate": 8.44740746195074e-09, |
| "loss": 1.2113550901412964, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.6278855032317636, |
| "grad_norm": 3.078125, |
| "learning_rate": 8.434782296842755e-09, |
| "loss": 1.2501018047332764, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6303477993228686, |
| "grad_norm": 5.46875, |
| "learning_rate": 8.422118481164076e-09, |
| "loss": 1.3121228218078613, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.6328100954139735, |
| "grad_norm": 8.875, |
| "learning_rate": 8.409416215944459e-09, |
| "loss": 2.0257339477539062, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.6352723915050785, |
| "grad_norm": 2.828125, |
| "learning_rate": 8.396675702824026e-09, |
| "loss": 1.249032974243164, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.6377346875961835, |
| "grad_norm": 2.40625, |
| "learning_rate": 8.38389714405006e-09, |
| "loss": 1.089784026145935, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.6401969836872884, |
| "grad_norm": 3.078125, |
| "learning_rate": 8.371080742473797e-09, |
| "loss": 1.107433795928955, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6426592797783933, |
| "grad_norm": 24.25, |
| "learning_rate": 8.358226701547196e-09, |
| "loss": 2.397225856781006, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.6451215758694983, |
| "grad_norm": 30.625, |
| "learning_rate": 8.345335225319716e-09, |
| "loss": 2.917544364929199, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.6475838719606033, |
| "grad_norm": 5.3125, |
| "learning_rate": 8.332406518435087e-09, |
| "loss": 1.9733543395996094, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.6500461680517082, |
| "grad_norm": 11.5625, |
| "learning_rate": 8.319440786128039e-09, |
| "loss": 2.30487060546875, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.6525084641428132, |
| "grad_norm": 14.125, |
| "learning_rate": 8.306438234221058e-09, |
| "loss": 2.489694118499756, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6549707602339181, |
| "grad_norm": 4.90625, |
| "learning_rate": 8.293399069121128e-09, |
| "loss": 1.7912418842315674, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.6574330563250231, |
| "grad_norm": 9.4375, |
| "learning_rate": 8.280323497816431e-09, |
| "loss": 1.935392141342163, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.6598953524161281, |
| "grad_norm": 6.09375, |
| "learning_rate": 8.267211727873078e-09, |
| "loss": 1.9411722421646118, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.662357648507233, |
| "grad_norm": 3.765625, |
| "learning_rate": 8.254063967431816e-09, |
| "loss": 1.7723370790481567, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.6648199445983379, |
| "grad_norm": 10.75, |
| "learning_rate": 8.240880425204702e-09, |
| "loss": 2.3154473304748535, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6672822406894429, |
| "grad_norm": 6.25, |
| "learning_rate": 8.22766131047182e-09, |
| "loss": 1.941293716430664, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.6697445367805479, |
| "grad_norm": 5.15625, |
| "learning_rate": 8.21440683307794e-09, |
| "loss": 1.8273173570632935, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.6722068328716528, |
| "grad_norm": 5.75, |
| "learning_rate": 8.201117203429187e-09, |
| "loss": 1.917323112487793, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.6746691289627578, |
| "grad_norm": 4.53125, |
| "learning_rate": 8.18779263248971e-09, |
| "loss": 1.5516306161880493, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.6771314250538627, |
| "grad_norm": 5.71875, |
| "learning_rate": 8.174433331778322e-09, |
| "loss": 2.0121002197265625, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.6795937211449676, |
| "grad_norm": 4.34375, |
| "learning_rate": 8.161039513365158e-09, |
| "loss": 1.2636222839355469, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.6820560172360727, |
| "grad_norm": 3.8125, |
| "learning_rate": 8.147611389868293e-09, |
| "loss": 1.3448388576507568, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.6845183133271776, |
| "grad_norm": 8.0625, |
| "learning_rate": 8.13414917445037e-09, |
| "loss": 2.0951576232910156, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.6869806094182825, |
| "grad_norm": 10.875, |
| "learning_rate": 8.120653080815219e-09, |
| "loss": 2.3154006004333496, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.6894429055093875, |
| "grad_norm": 2.96875, |
| "learning_rate": 8.107123323204473e-09, |
| "loss": 1.1850239038467407, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.6919052016004925, |
| "grad_norm": 4.4375, |
| "learning_rate": 8.093560116394149e-09, |
| "loss": 1.9023423194885254, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.6943674976915974, |
| "grad_norm": 5.5, |
| "learning_rate": 8.079963675691255e-09, |
| "loss": 1.9364053010940552, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.6968297937827024, |
| "grad_norm": 8.1875, |
| "learning_rate": 8.06633421693036e-09, |
| "loss": 1.8559212684631348, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.6992920898738073, |
| "grad_norm": 12.0, |
| "learning_rate": 8.052671956470177e-09, |
| "loss": 1.9172155857086182, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 5.1875, |
| "learning_rate": 8.038977111190119e-09, |
| "loss": 1.7878023386001587, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7042166820560173, |
| "grad_norm": 5.28125, |
| "learning_rate": 8.025249898486866e-09, |
| "loss": 1.9518636465072632, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.7066789781471222, |
| "grad_norm": 4.875, |
| "learning_rate": 8.011490536270911e-09, |
| "loss": 1.7933154106140137, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.7091412742382271, |
| "grad_norm": 4.75, |
| "learning_rate": 7.997699242963094e-09, |
| "loss": 1.7392499446868896, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.7116035703293321, |
| "grad_norm": 3.734375, |
| "learning_rate": 7.983876237491148e-09, |
| "loss": 1.403039813041687, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.714065866420437, |
| "grad_norm": 2.921875, |
| "learning_rate": 7.970021739286207e-09, |
| "loss": 1.1680914163589478, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.716528162511542, |
| "grad_norm": 2.234375, |
| "learning_rate": 7.956135968279335e-09, |
| "loss": 1.1165484189987183, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.718990458602647, |
| "grad_norm": 10.9375, |
| "learning_rate": 7.942219144898033e-09, |
| "loss": 2.342836856842041, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.7214527546937519, |
| "grad_norm": 3.953125, |
| "learning_rate": 7.928271490062737e-09, |
| "loss": 1.8495182991027832, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.7239150507848569, |
| "grad_norm": 5.875, |
| "learning_rate": 7.914293225183313e-09, |
| "loss": 1.9028046131134033, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.7263773468759618, |
| "grad_norm": 10.0625, |
| "learning_rate": 7.900284572155538e-09, |
| "loss": 1.9208589792251587, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7288396429670668, |
| "grad_norm": 4.59375, |
| "learning_rate": 7.886245753357586e-09, |
| "loss": 1.8670642375946045, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.7313019390581718, |
| "grad_norm": 65.0, |
| "learning_rate": 7.872176991646488e-09, |
| "loss": 1.555503487586975, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.7337642351492767, |
| "grad_norm": 5.46875, |
| "learning_rate": 7.858078510354597e-09, |
| "loss": 1.9539310932159424, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.7362265312403816, |
| "grad_norm": 2.703125, |
| "learning_rate": 7.843950533286057e-09, |
| "loss": 1.2128690481185913, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.7386888273314867, |
| "grad_norm": 4.46875, |
| "learning_rate": 7.829793284713224e-09, |
| "loss": 1.873086929321289, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7411511234225916, |
| "grad_norm": 2.578125, |
| "learning_rate": 7.81560698937313e-09, |
| "loss": 1.1673393249511719, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.7436134195136965, |
| "grad_norm": 12.8125, |
| "learning_rate": 7.801391872463896e-09, |
| "loss": 2.315310001373291, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.7460757156048015, |
| "grad_norm": 11.3125, |
| "learning_rate": 7.787148159641176e-09, |
| "loss": 2.4388017654418945, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.7485380116959064, |
| "grad_norm": 8.75, |
| "learning_rate": 7.77287607701456e-09, |
| "loss": 2.1161627769470215, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.7510003077870114, |
| "grad_norm": 3.921875, |
| "learning_rate": 7.758575851143987e-09, |
| "loss": 1.1796162128448486, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7534626038781164, |
| "grad_norm": 4.90625, |
| "learning_rate": 7.744247709036165e-09, |
| "loss": 1.3470849990844727, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.7559248999692213, |
| "grad_norm": 12.3125, |
| "learning_rate": 7.729891878140936e-09, |
| "loss": 2.33459734916687, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.7583871960603262, |
| "grad_norm": 6.96875, |
| "learning_rate": 7.715508586347695e-09, |
| "loss": 1.9637078046798706, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.7608494921514312, |
| "grad_norm": 4.34375, |
| "learning_rate": 7.701098061981757e-09, |
| "loss": 1.9413955211639404, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.7633117882425362, |
| "grad_norm": 9.6875, |
| "learning_rate": 7.686660533800736e-09, |
| "loss": 1.9719551801681519, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7657740843336411, |
| "grad_norm": 3.71875, |
| "learning_rate": 7.672196230990918e-09, |
| "loss": 1.3401029109954834, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.7682363804247461, |
| "grad_norm": 6.59375, |
| "learning_rate": 7.65770538316361e-09, |
| "loss": 1.7963333129882812, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.770698676515851, |
| "grad_norm": 7.96875, |
| "learning_rate": 7.643188220351516e-09, |
| "loss": 2.0712432861328125, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.7731609726069559, |
| "grad_norm": 11.125, |
| "learning_rate": 7.628644973005061e-09, |
| "loss": 2.3805270195007324, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.775623268698061, |
| "grad_norm": 7.34375, |
| "learning_rate": 7.61407587198875e-09, |
| "loss": 1.2845838069915771, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7780855647891659, |
| "grad_norm": 20.875, |
| "learning_rate": 7.5994811485775e-09, |
| "loss": 2.2516846656799316, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.7805478608802708, |
| "grad_norm": 5.0, |
| "learning_rate": 7.584861034452963e-09, |
| "loss": 1.964002251625061, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.7830101569713758, |
| "grad_norm": 3.046875, |
| "learning_rate": 7.570215761699855e-09, |
| "loss": 1.3124688863754272, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.7854724530624808, |
| "grad_norm": 11.6875, |
| "learning_rate": 7.55554556280227e-09, |
| "loss": 2.2107834815979004, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.7879347491535857, |
| "grad_norm": 5.6875, |
| "learning_rate": 7.540850670639978e-09, |
| "loss": 1.9630699157714844, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.7903970452446907, |
| "grad_norm": 5.75, |
| "learning_rate": 7.526131318484753e-09, |
| "loss": 1.9335198402404785, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.7928593413357956, |
| "grad_norm": 3.765625, |
| "learning_rate": 7.511387739996644e-09, |
| "loss": 1.2916162014007568, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.7953216374269005, |
| "grad_norm": 14.5625, |
| "learning_rate": 7.496620169220286e-09, |
| "loss": 2.1263046264648438, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.7977839335180056, |
| "grad_norm": 5.78125, |
| "learning_rate": 7.481828840581164e-09, |
| "loss": 1.8862347602844238, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.8002462296091105, |
| "grad_norm": 11.75, |
| "learning_rate": 7.46701398888192e-09, |
| "loss": 2.1435751914978027, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8027085257002154, |
| "grad_norm": 36.25, |
| "learning_rate": 7.45217584929859e-09, |
| "loss": 1.8985021114349365, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.8051708217913204, |
| "grad_norm": 3.96875, |
| "learning_rate": 7.437314657376906e-09, |
| "loss": 1.255218267440796, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.8076331178824253, |
| "grad_norm": 6.71875, |
| "learning_rate": 7.422430649028533e-09, |
| "loss": 1.8039145469665527, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.8100954139735304, |
| "grad_norm": 2.828125, |
| "learning_rate": 7.407524060527333e-09, |
| "loss": 1.2014645338058472, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.8125577100646353, |
| "grad_norm": 9.5625, |
| "learning_rate": 7.3925951285056146e-09, |
| "loss": 2.114205837249756, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8150200061557402, |
| "grad_norm": 18.0, |
| "learning_rate": 7.377644089950371e-09, |
| "loss": 2.3271141052246094, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.8174823022468451, |
| "grad_norm": 4.59375, |
| "learning_rate": 7.362671182199527e-09, |
| "loss": 1.9512523412704468, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.8199445983379502, |
| "grad_norm": 4.875, |
| "learning_rate": 7.347676642938163e-09, |
| "loss": 1.875675082206726, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.8224068944290551, |
| "grad_norm": 7.28125, |
| "learning_rate": 7.332660710194749e-09, |
| "loss": 2.120806932449341, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.8248691905201601, |
| "grad_norm": 12.1875, |
| "learning_rate": 7.3176236223373595e-09, |
| "loss": 2.482332229614258, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.827331486611265, |
| "grad_norm": 5.34375, |
| "learning_rate": 7.302565618069894e-09, |
| "loss": 1.932433843612671, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.8297937827023699, |
| "grad_norm": 2.296875, |
| "learning_rate": 7.287486936428282e-09, |
| "loss": 1.1869601011276245, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.832256078793475, |
| "grad_norm": 2.40625, |
| "learning_rate": 7.272387816776704e-09, |
| "loss": 1.2416247129440308, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.8347183748845799, |
| "grad_norm": 6.34375, |
| "learning_rate": 7.257268498803767e-09, |
| "loss": 1.4887652397155762, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.8371806709756848, |
| "grad_norm": 5.34375, |
| "learning_rate": 7.2421292225187186e-09, |
| "loss": 1.833484411239624, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8396429670667898, |
| "grad_norm": 13.8125, |
| "learning_rate": 7.2269702282476335e-09, |
| "loss": 2.041853904724121, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 14.625, |
| "learning_rate": 7.211791756629598e-09, |
| "loss": 2.366133689880371, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.8445675592489997, |
| "grad_norm": 10.875, |
| "learning_rate": 7.196594048612881e-09, |
| "loss": 1.9250491857528687, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.8470298553401047, |
| "grad_norm": 10.3125, |
| "learning_rate": 7.1813773454511215e-09, |
| "loss": 2.2896928787231445, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.8494921514312096, |
| "grad_norm": 5.40625, |
| "learning_rate": 7.166141888699495e-09, |
| "loss": 1.9879870414733887, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8519544475223145, |
| "grad_norm": 11.625, |
| "learning_rate": 7.150887920210878e-09, |
| "loss": 2.2236876487731934, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.8544167436134195, |
| "grad_norm": 10.0, |
| "learning_rate": 7.135615682132004e-09, |
| "loss": 1.4050698280334473, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.8568790397045245, |
| "grad_norm": 22.25, |
| "learning_rate": 7.120325416899629e-09, |
| "loss": 2.2749319076538086, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.8593413357956294, |
| "grad_norm": 15.75, |
| "learning_rate": 7.105017367236675e-09, |
| "loss": 2.3958988189697266, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.8618036318867344, |
| "grad_norm": 11.0, |
| "learning_rate": 7.089691776148384e-09, |
| "loss": 2.313142776489258, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8642659279778393, |
| "grad_norm": 11.625, |
| "learning_rate": 7.0743488869184535e-09, |
| "loss": 2.3592798709869385, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.8667282240689443, |
| "grad_norm": 8.5625, |
| "learning_rate": 7.058988943105175e-09, |
| "loss": 2.11894154548645, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.8691905201600493, |
| "grad_norm": 2.34375, |
| "learning_rate": 7.04361218853758e-09, |
| "loss": 1.3712561130523682, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.8716528162511542, |
| "grad_norm": 13.4375, |
| "learning_rate": 7.0282188673115514e-09, |
| "loss": 2.092770576477051, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.8741151123422591, |
| "grad_norm": 15.0625, |
| "learning_rate": 7.012809223785957e-09, |
| "loss": 1.9357192516326904, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8765774084333641, |
| "grad_norm": 2.953125, |
| "learning_rate": 6.9973835025787715e-09, |
| "loss": 1.2680325508117676, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.8790397045244691, |
| "grad_norm": 7.125, |
| "learning_rate": 6.981941948563198e-09, |
| "loss": 1.7719722986221313, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.881502000615574, |
| "grad_norm": 5.0625, |
| "learning_rate": 6.966484806863764e-09, |
| "loss": 1.8633275032043457, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.883964296706679, |
| "grad_norm": 3.296875, |
| "learning_rate": 6.9510123228524545e-09, |
| "loss": 1.4539438486099243, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.8864265927977839, |
| "grad_norm": 13.25, |
| "learning_rate": 6.935524742144792e-09, |
| "loss": 2.2359728813171387, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 6.78125, |
| "learning_rate": 6.920022310595953e-09, |
| "loss": 1.8414530754089355, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.8913511849799939, |
| "grad_norm": 3.84375, |
| "learning_rate": 6.904505274296864e-09, |
| "loss": 1.2079766988754272, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.8938134810710988, |
| "grad_norm": 8.625, |
| "learning_rate": 6.88897387957029e-09, |
| "loss": 1.9165315628051758, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.8962757771622037, |
| "grad_norm": 3.34375, |
| "learning_rate": 6.87342837296693e-09, |
| "loss": 1.2759442329406738, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.8987380732533087, |
| "grad_norm": 5.34375, |
| "learning_rate": 6.857869001261491e-09, |
| "loss": 1.2644639015197754, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9012003693444137, |
| "grad_norm": 12.75, |
| "learning_rate": 6.842296011448788e-09, |
| "loss": 2.2167718410491943, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.9036626654355187, |
| "grad_norm": 7.1875, |
| "learning_rate": 6.826709650739812e-09, |
| "loss": 1.402853012084961, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.9061249615266236, |
| "grad_norm": 9.25, |
| "learning_rate": 6.811110166557809e-09, |
| "loss": 2.0942487716674805, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.9085872576177285, |
| "grad_norm": 4.40625, |
| "learning_rate": 6.795497806534348e-09, |
| "loss": 1.8234786987304688, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.9110495537088334, |
| "grad_norm": 16.5, |
| "learning_rate": 6.779872818505397e-09, |
| "loss": 1.8784126043319702, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9135118497999385, |
| "grad_norm": 9.5, |
| "learning_rate": 6.7642354505073835e-09, |
| "loss": 2.2190794944763184, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.9159741458910434, |
| "grad_norm": 4.8125, |
| "learning_rate": 6.748585950773263e-09, |
| "loss": 1.9413115978240967, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.9184364419821484, |
| "grad_norm": 3.109375, |
| "learning_rate": 6.732924567728566e-09, |
| "loss": 1.3823771476745605, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.9208987380732533, |
| "grad_norm": 5.03125, |
| "learning_rate": 6.7172515499874705e-09, |
| "loss": 1.9463045597076416, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.9233610341643582, |
| "grad_norm": 6.71875, |
| "learning_rate": 6.701567146348843e-09, |
| "loss": 2.0039689540863037, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9258233302554633, |
| "grad_norm": 3.828125, |
| "learning_rate": 6.685871605792301e-09, |
| "loss": 1.438122272491455, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.9282856263465682, |
| "grad_norm": 34.25, |
| "learning_rate": 6.670165177474241e-09, |
| "loss": 1.7374298572540283, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.9307479224376731, |
| "grad_norm": 2.796875, |
| "learning_rate": 6.6544481107239054e-09, |
| "loss": 1.4571634531021118, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.9332102185287781, |
| "grad_norm": 4.78125, |
| "learning_rate": 6.638720655039412e-09, |
| "loss": 1.7221906185150146, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.935672514619883, |
| "grad_norm": 22.25, |
| "learning_rate": 6.622983060083796e-09, |
| "loss": 1.344387173652649, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.938134810710988, |
| "grad_norm": 2.4375, |
| "learning_rate": 6.607235575681045e-09, |
| "loss": 1.2809216976165771, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.940597106802093, |
| "grad_norm": 2.609375, |
| "learning_rate": 6.591478451812138e-09, |
| "loss": 1.1766109466552734, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.9430594028931979, |
| "grad_norm": 3.765625, |
| "learning_rate": 6.575711938611073e-09, |
| "loss": 1.3128526210784912, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.9455216989843028, |
| "grad_norm": 5.625, |
| "learning_rate": 6.559936286360897e-09, |
| "loss": 1.8674499988555908, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.9479839950754079, |
| "grad_norm": 5.28125, |
| "learning_rate": 6.544151745489735e-09, |
| "loss": 1.934564471244812, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9504462911665128, |
| "grad_norm": 7.625, |
| "learning_rate": 6.52835856656681e-09, |
| "loss": 2.1300408840179443, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.9529085872576177, |
| "grad_norm": 10.3125, |
| "learning_rate": 6.512557000298471e-09, |
| "loss": 2.284024715423584, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.9553708833487227, |
| "grad_norm": 5.15625, |
| "learning_rate": 6.49674729752421e-09, |
| "loss": 1.9190423488616943, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.9578331794398276, |
| "grad_norm": 9.0, |
| "learning_rate": 6.480929709212682e-09, |
| "loss": 2.2223734855651855, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.9602954755309326, |
| "grad_norm": 5.5, |
| "learning_rate": 6.465104486457718e-09, |
| "loss": 1.9598147869110107, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.9627577716220376, |
| "grad_norm": 6.59375, |
| "learning_rate": 6.4492718804743365e-09, |
| "loss": 2.041882276535034, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.9652200677131425, |
| "grad_norm": 2.125, |
| "learning_rate": 6.433432142594771e-09, |
| "loss": 1.2188262939453125, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.9676823638042474, |
| "grad_norm": 11.375, |
| "learning_rate": 6.4175855242644575e-09, |
| "loss": 2.208829879760742, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.9701446598953524, |
| "grad_norm": 5.0, |
| "learning_rate": 6.401732277038063e-09, |
| "loss": 2.0125837326049805, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.9726069559864574, |
| "grad_norm": 8.75, |
| "learning_rate": 6.3858726525754814e-09, |
| "loss": 2.2643885612487793, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9750692520775623, |
| "grad_norm": 7.0625, |
| "learning_rate": 6.370006902637836e-09, |
| "loss": 1.9207779169082642, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.9775315481686673, |
| "grad_norm": 2.59375, |
| "learning_rate": 6.354135279083497e-09, |
| "loss": 1.2121376991271973, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.9799938442597722, |
| "grad_norm": 10.9375, |
| "learning_rate": 6.338258033864067e-09, |
| "loss": 2.1134583950042725, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.9824561403508771, |
| "grad_norm": 18.125, |
| "learning_rate": 6.3223754190203895e-09, |
| "loss": 2.3652374744415283, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.9849184364419822, |
| "grad_norm": 11.6875, |
| "learning_rate": 6.306487686678556e-09, |
| "loss": 1.956110954284668, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.9873807325330871, |
| "grad_norm": 5.21875, |
| "learning_rate": 6.290595089045882e-09, |
| "loss": 1.993713140487671, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.989843028624192, |
| "grad_norm": 19.25, |
| "learning_rate": 6.274697878406925e-09, |
| "loss": 1.3555768728256226, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.992305324715297, |
| "grad_norm": 14.9375, |
| "learning_rate": 6.2587963071194695e-09, |
| "loss": 1.7694034576416016, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.994767620806402, |
| "grad_norm": 14.0, |
| "learning_rate": 6.242890627610518e-09, |
| "loss": 2.2126145362854004, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.997229916897507, |
| "grad_norm": 5.46875, |
| "learning_rate": 6.226981092372297e-09, |
| "loss": 1.7438420057296753, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.9996922129886119, |
| "grad_norm": 3.671875, |
| "learning_rate": 6.211067953958229e-09, |
| "loss": 1.237831711769104, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.0012311480455525, |
| "grad_norm": 2.15625, |
| "learning_rate": 6.195151464978945e-09, |
| "loss": 1.2776278257369995, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.0036934441366574, |
| "grad_norm": 5.625, |
| "learning_rate": 6.179231878098257e-09, |
| "loss": 1.6098976135253906, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.0061557402277623, |
| "grad_norm": 1.875, |
| "learning_rate": 6.163309446029157e-09, |
| "loss": 1.5421602725982666, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.0086180363188673, |
| "grad_norm": 5.3125, |
| "learning_rate": 6.1473844215298045e-09, |
| "loss": 1.4228730201721191, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.0110803324099722, |
| "grad_norm": 14.75, |
| "learning_rate": 6.131457057399506e-09, |
| "loss": 2.0147526264190674, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.0135426285010773, |
| "grad_norm": 13.9375, |
| "learning_rate": 6.115527606474713e-09, |
| "loss": 2.301534652709961, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.0160049245921823, |
| "grad_norm": 6.75, |
| "learning_rate": 6.099596321625005e-09, |
| "loss": 1.9000599384307861, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.0184672206832872, |
| "grad_norm": 1.4140625, |
| "learning_rate": 6.083663455749068e-09, |
| "loss": 1.2694454193115234, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.0209295167743921, |
| "grad_norm": 2.453125, |
| "learning_rate": 6.0677292617706915e-09, |
| "loss": 1.1476200819015503, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.023391812865497, |
| "grad_norm": 15.125, |
| "learning_rate": 6.051793992634741e-09, |
| "loss": 1.685870885848999, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.025854108956602, |
| "grad_norm": 5.15625, |
| "learning_rate": 6.035857901303159e-09, |
| "loss": 2.1021130084991455, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.028316405047707, |
| "grad_norm": 9.25, |
| "learning_rate": 6.019921240750932e-09, |
| "loss": 1.9393489360809326, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.0307787011388119, |
| "grad_norm": 3.640625, |
| "learning_rate": 6.0039842639620844e-09, |
| "loss": 1.9408633708953857, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.0332409972299168, |
| "grad_norm": 16.875, |
| "learning_rate": 5.988047223925661e-09, |
| "loss": 2.042579174041748, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.035703293321022, |
| "grad_norm": 2.328125, |
| "learning_rate": 5.9721103736317114e-09, |
| "loss": 1.7358704805374146, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.0381655894121269, |
| "grad_norm": 7.53125, |
| "learning_rate": 5.956173966067275e-09, |
| "loss": 1.5867335796356201, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.0406278855032318, |
| "grad_norm": 4.34375, |
| "learning_rate": 5.940238254212358e-09, |
| "loss": 1.8849399089813232, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.0430901815943368, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.924303491035925e-09, |
| "loss": 1.643231987953186, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.0455524776854417, |
| "grad_norm": 14.0625, |
| "learning_rate": 5.9083699294918835e-09, |
| "loss": 2.0420408248901367, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.0480147737765466, |
| "grad_norm": 10.5, |
| "learning_rate": 5.89243782251506e-09, |
| "loss": 2.353334903717041, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.0504770698676515, |
| "grad_norm": 12.625, |
| "learning_rate": 5.876507423017199e-09, |
| "loss": 2.2866880893707275, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.0529393659587565, |
| "grad_norm": 5.09375, |
| "learning_rate": 5.8605789838829335e-09, |
| "loss": 2.091262102127075, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.0554016620498614, |
| "grad_norm": 15.1875, |
| "learning_rate": 5.844652757965778e-09, |
| "loss": 2.1091365814208984, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.0578639581409663, |
| "grad_norm": 2.4375, |
| "learning_rate": 5.828728998084117e-09, |
| "loss": 1.6677895784378052, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.0603262542320715, |
| "grad_norm": 4.4375, |
| "learning_rate": 5.812807957017181e-09, |
| "loss": 1.5235992670059204, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.0627885503231764, |
| "grad_norm": 12.1875, |
| "learning_rate": 5.796889887501051e-09, |
| "loss": 2.279834270477295, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.0652508464142814, |
| "grad_norm": 9.125, |
| "learning_rate": 5.780975042224629e-09, |
| "loss": 2.450547456741333, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.0677131425053863, |
| "grad_norm": 61.25, |
| "learning_rate": 5.765063673825634e-09, |
| "loss": 2.2601470947265625, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.0701754385964912, |
| "grad_norm": 3.140625, |
| "learning_rate": 5.749156034886602e-09, |
| "loss": 1.6974682807922363, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.0726377346875962, |
| "grad_norm": 5.75, |
| "learning_rate": 5.733252377930853e-09, |
| "loss": 1.7504122257232666, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.075100030778701, |
| "grad_norm": 3.640625, |
| "learning_rate": 5.7173529554185045e-09, |
| "loss": 1.7744596004486084, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.077562326869806, |
| "grad_norm": 5.0625, |
| "learning_rate": 5.701458019742448e-09, |
| "loss": 1.8063809871673584, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.080024622960911, |
| "grad_norm": 4.75, |
| "learning_rate": 5.685567823224358e-09, |
| "loss": 1.8798420429229736, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.082486919052016, |
| "grad_norm": 12.75, |
| "learning_rate": 5.669682618110672e-09, |
| "loss": 2.0758848190307617, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.084949215143121, |
| "grad_norm": 12.5, |
| "learning_rate": 5.653802656568592e-09, |
| "loss": 2.1326591968536377, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.087411511234226, |
| "grad_norm": 4.8125, |
| "learning_rate": 5.637928190682084e-09, |
| "loss": 1.9486507177352905, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.089873807325331, |
| "grad_norm": 6.75, |
| "learning_rate": 5.622059472447876e-09, |
| "loss": 1.9365224838256836, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.0923361034164358, |
| "grad_norm": 6.4375, |
| "learning_rate": 5.606196753771449e-09, |
| "loss": 1.8881072998046875, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.0947983995075408, |
| "grad_norm": 7.21875, |
| "learning_rate": 5.590340286463054e-09, |
| "loss": 1.9489333629608154, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.0972606955986457, |
| "grad_norm": 8.5, |
| "learning_rate": 5.574490322233697e-09, |
| "loss": 1.9946143627166748, |
| "step": 892 |
| }, |
| { |
| "epoch": 1.0997229916897506, |
| "grad_norm": 3.484375, |
| "learning_rate": 5.558647112691158e-09, |
| "loss": 1.6062787771224976, |
| "step": 894 |
| }, |
| { |
| "epoch": 1.1021852877808556, |
| "grad_norm": 2.859375, |
| "learning_rate": 5.542810909335987e-09, |
| "loss": 1.2802103757858276, |
| "step": 896 |
| }, |
| { |
| "epoch": 1.1046475838719605, |
| "grad_norm": 17.0, |
| "learning_rate": 5.526981963557518e-09, |
| "loss": 1.7315878868103027, |
| "step": 898 |
| }, |
| { |
| "epoch": 1.1071098799630656, |
| "grad_norm": 9.0, |
| "learning_rate": 5.511160526629875e-09, |
| "loss": 1.9750934839248657, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.1095721760541706, |
| "grad_norm": 3.515625, |
| "learning_rate": 5.495346849707981e-09, |
| "loss": 1.6797375679016113, |
| "step": 902 |
| }, |
| { |
| "epoch": 1.1120344721452755, |
| "grad_norm": 10.75, |
| "learning_rate": 5.479541183823578e-09, |
| "loss": 1.8305199146270752, |
| "step": 904 |
| }, |
| { |
| "epoch": 1.1144967682363804, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.463743779881238e-09, |
| "loss": 1.9975595474243164, |
| "step": 906 |
| }, |
| { |
| "epoch": 1.1169590643274854, |
| "grad_norm": 4.65625, |
| "learning_rate": 5.447954888654378e-09, |
| "loss": 1.7815577983856201, |
| "step": 908 |
| }, |
| { |
| "epoch": 1.1194213604185903, |
| "grad_norm": 3.109375, |
| "learning_rate": 5.432174760781281e-09, |
| "loss": 1.5837122201919556, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.1218836565096952, |
| "grad_norm": 2.25, |
| "learning_rate": 5.416403646761119e-09, |
| "loss": 1.2701913118362427, |
| "step": 912 |
| }, |
| { |
| "epoch": 1.1243459526008002, |
| "grad_norm": 2.890625, |
| "learning_rate": 5.400641796949976e-09, |
| "loss": 1.3599649667739868, |
| "step": 914 |
| }, |
| { |
| "epoch": 1.1268082486919053, |
| "grad_norm": 6.34375, |
| "learning_rate": 5.384889461556868e-09, |
| "loss": 1.5575028657913208, |
| "step": 916 |
| }, |
| { |
| "epoch": 1.1292705447830103, |
| "grad_norm": 3.34375, |
| "learning_rate": 5.36914689063978e-09, |
| "loss": 1.4743753671646118, |
| "step": 918 |
| }, |
| { |
| "epoch": 1.1317328408741152, |
| "grad_norm": 5.25, |
| "learning_rate": 5.353414334101692e-09, |
| "loss": 1.5236045122146606, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.1341951369652201, |
| "grad_norm": 4.4375, |
| "learning_rate": 5.337692041686615e-09, |
| "loss": 1.891930341720581, |
| "step": 922 |
| }, |
| { |
| "epoch": 1.136657433056325, |
| "grad_norm": 2.046875, |
| "learning_rate": 5.321980262975614e-09, |
| "loss": 1.522653341293335, |
| "step": 924 |
| }, |
| { |
| "epoch": 1.13911972914743, |
| "grad_norm": 15.625, |
| "learning_rate": 5.306279247382867e-09, |
| "loss": 1.66744065284729, |
| "step": 926 |
| }, |
| { |
| "epoch": 1.141582025238535, |
| "grad_norm": 16.875, |
| "learning_rate": 5.290589244151689e-09, |
| "loss": 2.157740592956543, |
| "step": 928 |
| }, |
| { |
| "epoch": 1.1440443213296398, |
| "grad_norm": 2.390625, |
| "learning_rate": 5.274910502350581e-09, |
| "loss": 1.5675222873687744, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.1465066174207448, |
| "grad_norm": 4.84375, |
| "learning_rate": 5.259243270869276e-09, |
| "loss": 1.1499652862548828, |
| "step": 932 |
| }, |
| { |
| "epoch": 1.1489689135118497, |
| "grad_norm": 12.75, |
| "learning_rate": 5.243587798414792e-09, |
| "loss": 1.5367200374603271, |
| "step": 934 |
| }, |
| { |
| "epoch": 1.1514312096029546, |
| "grad_norm": 5.34375, |
| "learning_rate": 5.227944333507477e-09, |
| "loss": 1.9310216903686523, |
| "step": 936 |
| }, |
| { |
| "epoch": 1.1538935056940598, |
| "grad_norm": 11.5, |
| "learning_rate": 5.212313124477067e-09, |
| "loss": 2.123908519744873, |
| "step": 938 |
| }, |
| { |
| "epoch": 1.1563558017851647, |
| "grad_norm": 7.28125, |
| "learning_rate": 5.196694419458744e-09, |
| "loss": 2.1816015243530273, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.1588180978762697, |
| "grad_norm": 1.84375, |
| "learning_rate": 5.1810884663891986e-09, |
| "loss": 1.5526807308197021, |
| "step": 942 |
| }, |
| { |
| "epoch": 1.1612803939673746, |
| "grad_norm": 1.8671875, |
| "learning_rate": 5.165495513002691e-09, |
| "loss": 1.3024842739105225, |
| "step": 944 |
| }, |
| { |
| "epoch": 1.1637426900584795, |
| "grad_norm": 2.796875, |
| "learning_rate": 5.149915806827121e-09, |
| "loss": 1.2783153057098389, |
| "step": 946 |
| }, |
| { |
| "epoch": 1.1662049861495845, |
| "grad_norm": 5.125, |
| "learning_rate": 5.134349595180094e-09, |
| "loss": 1.5641247034072876, |
| "step": 948 |
| }, |
| { |
| "epoch": 1.1686672822406894, |
| "grad_norm": 7.0, |
| "learning_rate": 5.1187971251650065e-09, |
| "loss": 1.9546620845794678, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.1711295783317943, |
| "grad_norm": 4.4375, |
| "learning_rate": 5.10325864366711e-09, |
| "loss": 1.87162446975708, |
| "step": 952 |
| }, |
| { |
| "epoch": 1.1735918744228995, |
| "grad_norm": 11.5, |
| "learning_rate": 5.087734397349596e-09, |
| "loss": 1.8723485469818115, |
| "step": 954 |
| }, |
| { |
| "epoch": 1.1760541705140044, |
| "grad_norm": 5.21875, |
| "learning_rate": 5.072224632649684e-09, |
| "loss": 1.91074538230896, |
| "step": 956 |
| }, |
| { |
| "epoch": 1.1785164666051093, |
| "grad_norm": 5.25, |
| "learning_rate": 5.056729595774712e-09, |
| "loss": 1.9009315967559814, |
| "step": 958 |
| }, |
| { |
| "epoch": 1.1809787626962143, |
| "grad_norm": 7.3125, |
| "learning_rate": 5.041249532698214e-09, |
| "loss": 1.9836119413375854, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.1834410587873192, |
| "grad_norm": 9.375, |
| "learning_rate": 5.025784689156032e-09, |
| "loss": 1.9037981033325195, |
| "step": 962 |
| }, |
| { |
| "epoch": 1.1859033548784241, |
| "grad_norm": 27.875, |
| "learning_rate": 5.0103353106424065e-09, |
| "loss": 2.551020622253418, |
| "step": 964 |
| }, |
| { |
| "epoch": 1.188365650969529, |
| "grad_norm": 12.75, |
| "learning_rate": 4.994901642406078e-09, |
| "loss": 2.474264144897461, |
| "step": 966 |
| }, |
| { |
| "epoch": 1.190827947060634, |
| "grad_norm": 11.5625, |
| "learning_rate": 4.979483929446398e-09, |
| "loss": 1.7837506532669067, |
| "step": 968 |
| }, |
| { |
| "epoch": 1.193290243151739, |
| "grad_norm": 3.65625, |
| "learning_rate": 4.964082416509442e-09, |
| "loss": 1.760176181793213, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.1957525392428439, |
| "grad_norm": 17.75, |
| "learning_rate": 4.948697348084115e-09, |
| "loss": 1.9721624851226807, |
| "step": 972 |
| }, |
| { |
| "epoch": 1.1982148353339488, |
| "grad_norm": 6.6875, |
| "learning_rate": 4.933328968398283e-09, |
| "loss": 1.8035709857940674, |
| "step": 974 |
| }, |
| { |
| "epoch": 1.200677131425054, |
| "grad_norm": 5.21875, |
| "learning_rate": 4.9179775214148806e-09, |
| "loss": 1.6362351179122925, |
| "step": 976 |
| }, |
| { |
| "epoch": 1.2031394275161589, |
| "grad_norm": 5.90625, |
| "learning_rate": 4.902643250828055e-09, |
| "loss": 1.7732539176940918, |
| "step": 978 |
| }, |
| { |
| "epoch": 1.2056017236072638, |
| "grad_norm": 4.875, |
| "learning_rate": 4.887326400059283e-09, |
| "loss": 1.7590731382369995, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.2080640196983687, |
| "grad_norm": 2.421875, |
| "learning_rate": 4.8720272122535195e-09, |
| "loss": 1.590978980064392, |
| "step": 982 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 22.875, |
| "learning_rate": 4.8567459302753234e-09, |
| "loss": 1.8453547954559326, |
| "step": 984 |
| }, |
| { |
| "epoch": 1.2129886118805786, |
| "grad_norm": 6.71875, |
| "learning_rate": 4.841482796705019e-09, |
| "loss": 2.2472167015075684, |
| "step": 986 |
| }, |
| { |
| "epoch": 1.2154509079716835, |
| "grad_norm": 5.0625, |
| "learning_rate": 4.826238053834831e-09, |
| "loss": 1.9840574264526367, |
| "step": 988 |
| }, |
| { |
| "epoch": 1.2179132040627885, |
| "grad_norm": 9.3125, |
| "learning_rate": 4.811011943665047e-09, |
| "loss": 1.930182695388794, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.2203755001538936, |
| "grad_norm": 15.875, |
| "learning_rate": 4.795804707900169e-09, |
| "loss": 2.222364664077759, |
| "step": 992 |
| }, |
| { |
| "epoch": 1.2228377962449986, |
| "grad_norm": 10.9375, |
| "learning_rate": 4.780616587945083e-09, |
| "loss": 2.241105079650879, |
| "step": 994 |
| }, |
| { |
| "epoch": 1.2253000923361035, |
| "grad_norm": 6.09375, |
| "learning_rate": 4.765447824901222e-09, |
| "loss": 2.1059789657592773, |
| "step": 996 |
| }, |
| { |
| "epoch": 1.2277623884272084, |
| "grad_norm": 5.0625, |
| "learning_rate": 4.750298659562745e-09, |
| "loss": 1.9286503791809082, |
| "step": 998 |
| }, |
| { |
| "epoch": 1.2302246845183133, |
| "grad_norm": 4.84375, |
| "learning_rate": 4.735169332412704e-09, |
| "loss": 1.8667454719543457, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.2326869806094183, |
| "grad_norm": 9.4375, |
| "learning_rate": 4.720060083619239e-09, |
| "loss": 2.0463290214538574, |
| "step": 1002 |
| }, |
| { |
| "epoch": 1.2351492767005232, |
| "grad_norm": 6.28125, |
| "learning_rate": 4.7049711530317564e-09, |
| "loss": 2.106719970703125, |
| "step": 1004 |
| }, |
| { |
| "epoch": 1.2376115727916281, |
| "grad_norm": 3.8125, |
| "learning_rate": 4.6899027801771234e-09, |
| "loss": 1.829174518585205, |
| "step": 1006 |
| }, |
| { |
| "epoch": 1.240073868882733, |
| "grad_norm": 47.5, |
| "learning_rate": 4.6748552042558664e-09, |
| "loss": 2.110135555267334, |
| "step": 1008 |
| }, |
| { |
| "epoch": 1.242536164973838, |
| "grad_norm": 15.9375, |
| "learning_rate": 4.659828664138378e-09, |
| "loss": 2.152853012084961, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.244998461064943, |
| "grad_norm": 10.75, |
| "learning_rate": 4.6448233983611165e-09, |
| "loss": 1.862748622894287, |
| "step": 1012 |
| }, |
| { |
| "epoch": 1.247460757156048, |
| "grad_norm": 20.375, |
| "learning_rate": 4.629839645122828e-09, |
| "loss": 2.054180860519409, |
| "step": 1014 |
| }, |
| { |
| "epoch": 1.249923053247153, |
| "grad_norm": 10.5, |
| "learning_rate": 4.614877642280759e-09, |
| "loss": 2.0183398723602295, |
| "step": 1016 |
| }, |
| { |
| "epoch": 1.252385349338258, |
| "grad_norm": 4.3125, |
| "learning_rate": 4.59993762734688e-09, |
| "loss": 1.9448716640472412, |
| "step": 1018 |
| }, |
| { |
| "epoch": 1.254847645429363, |
| "grad_norm": 5.3125, |
| "learning_rate": 4.585019837484127e-09, |
| "loss": 1.909618854522705, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.2573099415204678, |
| "grad_norm": 4.4375, |
| "learning_rate": 4.5701245095026175e-09, |
| "loss": 1.8093581199645996, |
| "step": 1022 |
| }, |
| { |
| "epoch": 1.2597722376115728, |
| "grad_norm": 4.375, |
| "learning_rate": 4.555251879855905e-09, |
| "loss": 1.8561820983886719, |
| "step": 1024 |
| }, |
| { |
| "epoch": 1.2622345337026777, |
| "grad_norm": 5.71875, |
| "learning_rate": 4.540402184637225e-09, |
| "loss": 1.9136399030685425, |
| "step": 1026 |
| }, |
| { |
| "epoch": 1.2646968297937828, |
| "grad_norm": 6.1875, |
| "learning_rate": 4.525575659575739e-09, |
| "loss": 1.922465443611145, |
| "step": 1028 |
| }, |
| { |
| "epoch": 1.2671591258848878, |
| "grad_norm": 6.125, |
| "learning_rate": 4.510772540032801e-09, |
| "loss": 1.945884346961975, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.2696214219759927, |
| "grad_norm": 11.6875, |
| "learning_rate": 4.495993060998216e-09, |
| "loss": 2.1394665241241455, |
| "step": 1032 |
| }, |
| { |
| "epoch": 1.2720837180670976, |
| "grad_norm": 12.875, |
| "learning_rate": 4.481237457086511e-09, |
| "loss": 2.548738479614258, |
| "step": 1034 |
| }, |
| { |
| "epoch": 1.2745460141582026, |
| "grad_norm": 6.65625, |
| "learning_rate": 4.466505962533216e-09, |
| "loss": 2.148568868637085, |
| "step": 1036 |
| }, |
| { |
| "epoch": 1.2770083102493075, |
| "grad_norm": 143.0, |
| "learning_rate": 4.451798811191132e-09, |
| "loss": 2.0206987857818604, |
| "step": 1038 |
| }, |
| { |
| "epoch": 1.2794706063404124, |
| "grad_norm": 4.78125, |
| "learning_rate": 4.437116236526635e-09, |
| "loss": 2.025409698486328, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.2819329024315174, |
| "grad_norm": 14.875, |
| "learning_rate": 4.42245847161596e-09, |
| "loss": 1.8983882665634155, |
| "step": 1042 |
| }, |
| { |
| "epoch": 1.2843951985226223, |
| "grad_norm": 1.8515625, |
| "learning_rate": 4.4078257491415e-09, |
| "loss": 1.594254732131958, |
| "step": 1044 |
| }, |
| { |
| "epoch": 1.2868574946137272, |
| "grad_norm": 3.75, |
| "learning_rate": 4.393218301388123e-09, |
| "loss": 1.4578649997711182, |
| "step": 1046 |
| }, |
| { |
| "epoch": 1.2893197907048322, |
| "grad_norm": 6.0625, |
| "learning_rate": 4.378636360239471e-09, |
| "loss": 1.8163200616836548, |
| "step": 1048 |
| }, |
| { |
| "epoch": 1.291782086795937, |
| "grad_norm": 21.625, |
| "learning_rate": 4.364080157174287e-09, |
| "loss": 1.811424732208252, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.2942443828870422, |
| "grad_norm": 6.46875, |
| "learning_rate": 4.349549923262743e-09, |
| "loss": 1.6952979564666748, |
| "step": 1052 |
| }, |
| { |
| "epoch": 1.2967066789781472, |
| "grad_norm": 8.9375, |
| "learning_rate": 4.33504588916276e-09, |
| "loss": 1.85584557056427, |
| "step": 1054 |
| }, |
| { |
| "epoch": 1.299168975069252, |
| "grad_norm": 6.25, |
| "learning_rate": 4.320568285116362e-09, |
| "loss": 1.8780372142791748, |
| "step": 1056 |
| }, |
| { |
| "epoch": 1.301631271160357, |
| "grad_norm": 3.265625, |
| "learning_rate": 4.306117340946008e-09, |
| "loss": 1.694900393486023, |
| "step": 1058 |
| }, |
| { |
| "epoch": 1.304093567251462, |
| "grad_norm": 5.40625, |
| "learning_rate": 4.291693286050951e-09, |
| "loss": 1.7237621545791626, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.306555863342567, |
| "grad_norm": 7.8125, |
| "learning_rate": 4.277296349403592e-09, |
| "loss": 1.9782402515411377, |
| "step": 1062 |
| }, |
| { |
| "epoch": 1.3090181594336718, |
| "grad_norm": 11.625, |
| "learning_rate": 4.262926759545853e-09, |
| "loss": 2.2806496620178223, |
| "step": 1064 |
| }, |
| { |
| "epoch": 1.311480455524777, |
| "grad_norm": 14.9375, |
| "learning_rate": 4.2485847445855384e-09, |
| "loss": 2.0329091548919678, |
| "step": 1066 |
| }, |
| { |
| "epoch": 1.313942751615882, |
| "grad_norm": 7.8125, |
| "learning_rate": 4.234270532192722e-09, |
| "loss": 1.996172308921814, |
| "step": 1068 |
| }, |
| { |
| "epoch": 1.3164050477069869, |
| "grad_norm": 5.4375, |
| "learning_rate": 4.219984349596131e-09, |
| "loss": 1.7426702976226807, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.3188673437980918, |
| "grad_norm": 4.09375, |
| "learning_rate": 4.205726423579531e-09, |
| "loss": 1.9689075946807861, |
| "step": 1072 |
| }, |
| { |
| "epoch": 1.3213296398891967, |
| "grad_norm": 4.375, |
| "learning_rate": 4.1914969804781435e-09, |
| "loss": 1.851407766342163, |
| "step": 1074 |
| }, |
| { |
| "epoch": 1.3237919359803016, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.177296246175035e-09, |
| "loss": 1.9321177005767822, |
| "step": 1076 |
| }, |
| { |
| "epoch": 1.3262542320714066, |
| "grad_norm": 10.75, |
| "learning_rate": 4.1631244460975395e-09, |
| "loss": 2.1217970848083496, |
| "step": 1078 |
| }, |
| { |
| "epoch": 1.3287165281625115, |
| "grad_norm": 2.34375, |
| "learning_rate": 4.148981805213683e-09, |
| "loss": 1.6175642013549805, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.3311788242536164, |
| "grad_norm": 9.9375, |
| "learning_rate": 4.134868548028603e-09, |
| "loss": 1.8694862127304077, |
| "step": 1082 |
| }, |
| { |
| "epoch": 1.3336411203447214, |
| "grad_norm": 3.9375, |
| "learning_rate": 4.120784898580994e-09, |
| "loss": 1.9671717882156372, |
| "step": 1084 |
| }, |
| { |
| "epoch": 1.3361034164358263, |
| "grad_norm": 5.9375, |
| "learning_rate": 4.106731080439549e-09, |
| "loss": 1.6825287342071533, |
| "step": 1086 |
| }, |
| { |
| "epoch": 1.3385657125269312, |
| "grad_norm": 3.03125, |
| "learning_rate": 4.092707316699403e-09, |
| "loss": 1.5507920980453491, |
| "step": 1088 |
| }, |
| { |
| "epoch": 1.3410280086180364, |
| "grad_norm": 6.03125, |
| "learning_rate": 4.078713829978599e-09, |
| "loss": 1.4552762508392334, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.3434903047091413, |
| "grad_norm": 7.09375, |
| "learning_rate": 4.064750842414555e-09, |
| "loss": 1.8754684925079346, |
| "step": 1092 |
| }, |
| { |
| "epoch": 1.3459526008002463, |
| "grad_norm": 94.5, |
| "learning_rate": 4.050818575660528e-09, |
| "loss": 2.175379753112793, |
| "step": 1094 |
| }, |
| { |
| "epoch": 1.3484148968913512, |
| "grad_norm": 2.921875, |
| "learning_rate": 4.0369172508821154e-09, |
| "loss": 1.8554493188858032, |
| "step": 1096 |
| }, |
| { |
| "epoch": 1.3508771929824561, |
| "grad_norm": 4.5625, |
| "learning_rate": 4.023047088753718e-09, |
| "loss": 1.2790199518203735, |
| "step": 1098 |
| }, |
| { |
| "epoch": 1.353339489073561, |
| "grad_norm": 4.75, |
| "learning_rate": 4.009208309455052e-09, |
| "loss": 1.7523287534713745, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.355801785164666, |
| "grad_norm": 7.9375, |
| "learning_rate": 3.9954011326676595e-09, |
| "loss": 2.061239242553711, |
| "step": 1102 |
| }, |
| { |
| "epoch": 1.3582640812557711, |
| "grad_norm": 8.875, |
| "learning_rate": 3.981625777571407e-09, |
| "loss": 2.029423713684082, |
| "step": 1104 |
| }, |
| { |
| "epoch": 1.360726377346876, |
| "grad_norm": 22.125, |
| "learning_rate": 3.967882462841013e-09, |
| "loss": 2.4487719535827637, |
| "step": 1106 |
| }, |
| { |
| "epoch": 1.363188673437981, |
| "grad_norm": 13.4375, |
| "learning_rate": 3.954171406642579e-09, |
| "loss": 2.2747087478637695, |
| "step": 1108 |
| }, |
| { |
| "epoch": 1.365650969529086, |
| "grad_norm": 8.875, |
| "learning_rate": 3.940492826630122e-09, |
| "loss": 2.142123222351074, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.3681132656201909, |
| "grad_norm": 13.3125, |
| "learning_rate": 3.926846939942119e-09, |
| "loss": 2.411155939102173, |
| "step": 1112 |
| }, |
| { |
| "epoch": 1.3705755617112958, |
| "grad_norm": 6.96875, |
| "learning_rate": 3.913233963198062e-09, |
| "loss": 2.1852264404296875, |
| "step": 1114 |
| }, |
| { |
| "epoch": 1.3730378578024007, |
| "grad_norm": 2.71875, |
| "learning_rate": 3.899654112495024e-09, |
| "loss": 1.5160444974899292, |
| "step": 1116 |
| }, |
| { |
| "epoch": 1.3755001538935057, |
| "grad_norm": 4.59375, |
| "learning_rate": 3.886107603404221e-09, |
| "loss": 1.5113252401351929, |
| "step": 1118 |
| }, |
| { |
| "epoch": 1.3779624499846106, |
| "grad_norm": 4.71875, |
| "learning_rate": 3.872594650967591e-09, |
| "loss": 1.700373649597168, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.3804247460757155, |
| "grad_norm": 9.5625, |
| "learning_rate": 3.859115469694385e-09, |
| "loss": 1.9584300518035889, |
| "step": 1122 |
| }, |
| { |
| "epoch": 1.3828870421668205, |
| "grad_norm": 5.5, |
| "learning_rate": 3.845670273557754e-09, |
| "loss": 1.8532516956329346, |
| "step": 1124 |
| }, |
| { |
| "epoch": 1.3853493382579254, |
| "grad_norm": 4.21875, |
| "learning_rate": 3.832259275991365e-09, |
| "loss": 1.640071988105774, |
| "step": 1126 |
| }, |
| { |
| "epoch": 1.3878116343490305, |
| "grad_norm": 3.390625, |
| "learning_rate": 3.818882689885998e-09, |
| "loss": 1.2326576709747314, |
| "step": 1128 |
| }, |
| { |
| "epoch": 1.3902739304401355, |
| "grad_norm": 4.375, |
| "learning_rate": 3.80554072758618e-09, |
| "loss": 1.5156090259552002, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.3927362265312404, |
| "grad_norm": 2.625, |
| "learning_rate": 3.7922336008868e-09, |
| "loss": 1.5685241222381592, |
| "step": 1132 |
| }, |
| { |
| "epoch": 1.3951985226223453, |
| "grad_norm": 5.09375, |
| "learning_rate": 3.778961521029762e-09, |
| "loss": 1.6617923974990845, |
| "step": 1134 |
| }, |
| { |
| "epoch": 1.3976608187134503, |
| "grad_norm": 6.46875, |
| "learning_rate": 3.765724698700621e-09, |
| "loss": 1.8906147480010986, |
| "step": 1136 |
| }, |
| { |
| "epoch": 1.4001231148045552, |
| "grad_norm": 2.875, |
| "learning_rate": 3.752523344025243e-09, |
| "loss": 1.545287847518921, |
| "step": 1138 |
| }, |
| { |
| "epoch": 1.4025854108956601, |
| "grad_norm": 7.78125, |
| "learning_rate": 3.7393576665664675e-09, |
| "loss": 1.732557773590088, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.4050477069867653, |
| "grad_norm": 2.25, |
| "learning_rate": 3.7262278753207815e-09, |
| "loss": 1.72062087059021, |
| "step": 1142 |
| }, |
| { |
| "epoch": 1.4075100030778702, |
| "grad_norm": 8.75, |
| "learning_rate": 3.7131341787150018e-09, |
| "loss": 1.5638048648834229, |
| "step": 1144 |
| }, |
| { |
| "epoch": 1.4099722991689752, |
| "grad_norm": 25.0, |
| "learning_rate": 3.7000767846029665e-09, |
| "loss": 2.013415575027466, |
| "step": 1146 |
| }, |
| { |
| "epoch": 1.41243459526008, |
| "grad_norm": 2.46875, |
| "learning_rate": 3.687055900262238e-09, |
| "loss": 1.5985221862792969, |
| "step": 1148 |
| }, |
| { |
| "epoch": 1.414896891351185, |
| "grad_norm": 12.1875, |
| "learning_rate": 3.6740717323908046e-09, |
| "loss": 1.7952547073364258, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.41735918744229, |
| "grad_norm": 2.9375, |
| "learning_rate": 3.6611244871038118e-09, |
| "loss": 1.5459375381469727, |
| "step": 1152 |
| }, |
| { |
| "epoch": 1.4198214835333949, |
| "grad_norm": 6.84375, |
| "learning_rate": 3.648214369930278e-09, |
| "loss": 1.641556739807129, |
| "step": 1154 |
| }, |
| { |
| "epoch": 1.4222837796244998, |
| "grad_norm": 2.109375, |
| "learning_rate": 3.635341585809837e-09, |
| "loss": 1.5961995124816895, |
| "step": 1156 |
| }, |
| { |
| "epoch": 1.4247460757156047, |
| "grad_norm": 9.125, |
| "learning_rate": 3.6225063390894896e-09, |
| "loss": 1.6079602241516113, |
| "step": 1158 |
| }, |
| { |
| "epoch": 1.4272083718067097, |
| "grad_norm": 4.84375, |
| "learning_rate": 3.609708833520351e-09, |
| "loss": 2.1076085567474365, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.4296706678978146, |
| "grad_norm": 19.125, |
| "learning_rate": 3.5969492722544207e-09, |
| "loss": 2.1435282230377197, |
| "step": 1162 |
| }, |
| { |
| "epoch": 1.4321329639889195, |
| "grad_norm": 1.796875, |
| "learning_rate": 3.5842278578413577e-09, |
| "loss": 1.6422967910766602, |
| "step": 1164 |
| }, |
| { |
| "epoch": 1.4345952600800247, |
| "grad_norm": 4.1875, |
| "learning_rate": 3.5715447922252655e-09, |
| "loss": 1.4160196781158447, |
| "step": 1166 |
| }, |
| { |
| "epoch": 1.4370575561711296, |
| "grad_norm": 7.78125, |
| "learning_rate": 3.558900276741485e-09, |
| "loss": 1.9306385517120361, |
| "step": 1168 |
| }, |
| { |
| "epoch": 1.4395198522622346, |
| "grad_norm": 6.625, |
| "learning_rate": 3.5462945121134016e-09, |
| "loss": 2.028043508529663, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.4419821483533395, |
| "grad_norm": 18.125, |
| "learning_rate": 3.533727698449252e-09, |
| "loss": 1.7561140060424805, |
| "step": 1172 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 11.6875, |
| "learning_rate": 3.521200035238954e-09, |
| "loss": 1.9722295999526978, |
| "step": 1174 |
| }, |
| { |
| "epoch": 1.4469067405355494, |
| "grad_norm": 5.40625, |
| "learning_rate": 3.5087117213509367e-09, |
| "loss": 2.2334213256835938, |
| "step": 1176 |
| }, |
| { |
| "epoch": 1.4493690366266543, |
| "grad_norm": 10.1875, |
| "learning_rate": 3.4962629550289858e-09, |
| "loss": 2.2049357891082764, |
| "step": 1178 |
| }, |
| { |
| "epoch": 1.4518313327177594, |
| "grad_norm": 11.0625, |
| "learning_rate": 3.4838539338890964e-09, |
| "loss": 2.2469396591186523, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.4542936288088644, |
| "grad_norm": 5.59375, |
| "learning_rate": 3.4714848549163314e-09, |
| "loss": 2.023268938064575, |
| "step": 1182 |
| }, |
| { |
| "epoch": 1.4567559248999693, |
| "grad_norm": 3.671875, |
| "learning_rate": 3.4591559144617014e-09, |
| "loss": 1.8120558261871338, |
| "step": 1184 |
| }, |
| { |
| "epoch": 1.4592182209910742, |
| "grad_norm": 5.65625, |
| "learning_rate": 3.4468673082390432e-09, |
| "loss": 1.7612297534942627, |
| "step": 1186 |
| }, |
| { |
| "epoch": 1.4616805170821792, |
| "grad_norm": 23.5, |
| "learning_rate": 3.434619231321912e-09, |
| "loss": 1.9972333908081055, |
| "step": 1188 |
| }, |
| { |
| "epoch": 1.464142813173284, |
| "grad_norm": 4.3125, |
| "learning_rate": 3.4224118781404923e-09, |
| "loss": 1.8834655284881592, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.466605109264389, |
| "grad_norm": 35.25, |
| "learning_rate": 3.4102454424784997e-09, |
| "loss": 2.4007821083068848, |
| "step": 1192 |
| }, |
| { |
| "epoch": 1.469067405355494, |
| "grad_norm": 9.0, |
| "learning_rate": 3.398120117470115e-09, |
| "loss": 2.477167844772339, |
| "step": 1194 |
| }, |
| { |
| "epoch": 1.471529701446599, |
| "grad_norm": 8.625, |
| "learning_rate": 3.3860360955969127e-09, |
| "loss": 2.0541319847106934, |
| "step": 1196 |
| }, |
| { |
| "epoch": 1.4739919975377038, |
| "grad_norm": 11.3125, |
| "learning_rate": 3.373993568684808e-09, |
| "loss": 2.007800579071045, |
| "step": 1198 |
| }, |
| { |
| "epoch": 1.4764542936288088, |
| "grad_norm": 13.125, |
| "learning_rate": 3.36199272790101e-09, |
| "loss": 2.2932679653167725, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.4789165897199137, |
| "grad_norm": 2.8125, |
| "learning_rate": 3.350033763750989e-09, |
| "loss": 1.7902061939239502, |
| "step": 1202 |
| }, |
| { |
| "epoch": 1.4813788858110188, |
| "grad_norm": 15.0625, |
| "learning_rate": 3.3381168660754523e-09, |
| "loss": 1.8084830045700073, |
| "step": 1204 |
| }, |
| { |
| "epoch": 1.4838411819021238, |
| "grad_norm": 5.46875, |
| "learning_rate": 3.3262422240473268e-09, |
| "loss": 1.930219054222107, |
| "step": 1206 |
| }, |
| { |
| "epoch": 1.4863034779932287, |
| "grad_norm": 4.65625, |
| "learning_rate": 3.314410026168757e-09, |
| "loss": 1.8515759706497192, |
| "step": 1208 |
| }, |
| { |
| "epoch": 1.4887657740843336, |
| "grad_norm": 20.875, |
| "learning_rate": 3.30262046026812e-09, |
| "loss": 2.1966378688812256, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.4912280701754386, |
| "grad_norm": 5.0, |
| "learning_rate": 3.2908737134970367e-09, |
| "loss": 2.388540744781494, |
| "step": 1212 |
| }, |
| { |
| "epoch": 1.4936903662665435, |
| "grad_norm": 10.375, |
| "learning_rate": 3.2791699723273984e-09, |
| "loss": 2.1200718879699707, |
| "step": 1214 |
| }, |
| { |
| "epoch": 1.4961526623576484, |
| "grad_norm": 3.515625, |
| "learning_rate": 3.2675094225484135e-09, |
| "loss": 2.037621021270752, |
| "step": 1216 |
| }, |
| { |
| "epoch": 1.4986149584487536, |
| "grad_norm": 3.234375, |
| "learning_rate": 3.2558922492636578e-09, |
| "loss": 1.5640082359313965, |
| "step": 1218 |
| }, |
| { |
| "epoch": 1.5010772545398585, |
| "grad_norm": 6.59375, |
| "learning_rate": 3.2443186368881287e-09, |
| "loss": 1.5967392921447754, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.5035395506309635, |
| "grad_norm": 1.1875, |
| "learning_rate": 3.2327887691453277e-09, |
| "loss": 1.4248828887939453, |
| "step": 1222 |
| }, |
| { |
| "epoch": 1.5060018467220684, |
| "grad_norm": 5.84375, |
| "learning_rate": 3.2213028290643363e-09, |
| "loss": 1.5917315483093262, |
| "step": 1224 |
| }, |
| { |
| "epoch": 1.5084641428131733, |
| "grad_norm": 5.59375, |
| "learning_rate": 3.2098609989769122e-09, |
| "loss": 1.761174201965332, |
| "step": 1226 |
| }, |
| { |
| "epoch": 1.5109264389042782, |
| "grad_norm": 13.8125, |
| "learning_rate": 3.198463460514598e-09, |
| "loss": 1.7805390357971191, |
| "step": 1228 |
| }, |
| { |
| "epoch": 1.5133887349953832, |
| "grad_norm": 3.125, |
| "learning_rate": 3.1871103946058343e-09, |
| "loss": 2.06949782371521, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.515851031086488, |
| "grad_norm": 8.0625, |
| "learning_rate": 3.1758019814730902e-09, |
| "loss": 1.6458537578582764, |
| "step": 1232 |
| }, |
| { |
| "epoch": 1.518313327177593, |
| "grad_norm": 5.90625, |
| "learning_rate": 3.1645384006300033e-09, |
| "loss": 1.8969038724899292, |
| "step": 1234 |
| }, |
| { |
| "epoch": 1.520775623268698, |
| "grad_norm": 2.53125, |
| "learning_rate": 3.153319830878523e-09, |
| "loss": 1.5056371688842773, |
| "step": 1236 |
| }, |
| { |
| "epoch": 1.523237919359803, |
| "grad_norm": 25.5, |
| "learning_rate": 3.142146450306082e-09, |
| "loss": 1.7204036712646484, |
| "step": 1238 |
| }, |
| { |
| "epoch": 1.5257002154509078, |
| "grad_norm": 5.5625, |
| "learning_rate": 3.1310184362827594e-09, |
| "loss": 1.7970688343048096, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.5281625115420128, |
| "grad_norm": 2.75, |
| "learning_rate": 3.1199359654584756e-09, |
| "loss": 1.5522937774658203, |
| "step": 1242 |
| }, |
| { |
| "epoch": 1.530624807633118, |
| "grad_norm": 5.46875, |
| "learning_rate": 3.1088992137601797e-09, |
| "loss": 1.5566771030426025, |
| "step": 1244 |
| }, |
| { |
| "epoch": 1.5330871037242229, |
| "grad_norm": 4.875, |
| "learning_rate": 3.097908356389059e-09, |
| "loss": 1.8924975395202637, |
| "step": 1246 |
| }, |
| { |
| "epoch": 1.5355493998153278, |
| "grad_norm": 2.234375, |
| "learning_rate": 3.08696356781776e-09, |
| "loss": 1.5438798666000366, |
| "step": 1248 |
| }, |
| { |
| "epoch": 1.5380116959064327, |
| "grad_norm": 1.8515625, |
| "learning_rate": 3.0760650217876174e-09, |
| "loss": 1.286960482597351, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.5404739919975377, |
| "grad_norm": 3.140625, |
| "learning_rate": 3.0652128913058935e-09, |
| "loss": 1.1232177019119263, |
| "step": 1252 |
| }, |
| { |
| "epoch": 1.5429362880886428, |
| "grad_norm": 10.0625, |
| "learning_rate": 3.0544073486430396e-09, |
| "loss": 1.7119476795196533, |
| "step": 1254 |
| }, |
| { |
| "epoch": 1.5453985841797477, |
| "grad_norm": 4.84375, |
| "learning_rate": 3.0436485653299487e-09, |
| "loss": 2.0494632720947266, |
| "step": 1256 |
| }, |
| { |
| "epoch": 1.5478608802708527, |
| "grad_norm": 3.1875, |
| "learning_rate": 3.032936712155246e-09, |
| "loss": 1.5645394325256348, |
| "step": 1258 |
| }, |
| { |
| "epoch": 1.5503231763619576, |
| "grad_norm": 11.1875, |
| "learning_rate": 3.022271959162567e-09, |
| "loss": 1.7430448532104492, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.5527854724530625, |
| "grad_norm": 3.25, |
| "learning_rate": 3.0116544756478663e-09, |
| "loss": 1.6215105056762695, |
| "step": 1262 |
| }, |
| { |
| "epoch": 1.5552477685441675, |
| "grad_norm": 5.40625, |
| "learning_rate": 3.001084430156724e-09, |
| "loss": 1.4022070169448853, |
| "step": 1264 |
| }, |
| { |
| "epoch": 1.5577100646352724, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.990561990481675e-09, |
| "loss": 1.7849698066711426, |
| "step": 1266 |
| }, |
| { |
| "epoch": 1.5601723607263773, |
| "grad_norm": 2.90625, |
| "learning_rate": 2.9800873236595416e-09, |
| "loss": 1.514677882194519, |
| "step": 1268 |
| }, |
| { |
| "epoch": 1.5626346568174823, |
| "grad_norm": 10.0, |
| "learning_rate": 2.9696605959687833e-09, |
| "loss": 1.529390573501587, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.5650969529085872, |
| "grad_norm": 2.5625, |
| "learning_rate": 2.9592819729268566e-09, |
| "loss": 1.8093581199645996, |
| "step": 1272 |
| }, |
| { |
| "epoch": 1.5675592489996921, |
| "grad_norm": 10.0625, |
| "learning_rate": 2.948951619287592e-09, |
| "loss": 1.3842357397079468, |
| "step": 1274 |
| }, |
| { |
| "epoch": 1.570021545090797, |
| "grad_norm": 14.5, |
| "learning_rate": 2.938669699038571e-09, |
| "loss": 1.85842764377594, |
| "step": 1276 |
| }, |
| { |
| "epoch": 1.572483841181902, |
| "grad_norm": 29.0, |
| "learning_rate": 2.928436375398528e-09, |
| "loss": 2.2186334133148193, |
| "step": 1278 |
| }, |
| { |
| "epoch": 1.574946137273007, |
| "grad_norm": 7.625, |
| "learning_rate": 2.9182518108147588e-09, |
| "loss": 2.11116361618042, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.577408433364112, |
| "grad_norm": 10.5625, |
| "learning_rate": 2.9081161669605395e-09, |
| "loss": 2.039137363433838, |
| "step": 1282 |
| }, |
| { |
| "epoch": 1.579870729455217, |
| "grad_norm": 1.7578125, |
| "learning_rate": 2.8980296047325638e-09, |
| "loss": 1.548026204109192, |
| "step": 1284 |
| }, |
| { |
| "epoch": 1.582333025546322, |
| "grad_norm": 6.34375, |
| "learning_rate": 2.8879922842483867e-09, |
| "loss": 1.4916882514953613, |
| "step": 1286 |
| }, |
| { |
| "epoch": 1.5847953216374269, |
| "grad_norm": 4.5, |
| "learning_rate": 2.8780043648438818e-09, |
| "loss": 1.6858062744140625, |
| "step": 1288 |
| }, |
| { |
| "epoch": 1.587257617728532, |
| "grad_norm": 6.84375, |
| "learning_rate": 2.868066005070713e-09, |
| "loss": 1.8366402387619019, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.589719913819637, |
| "grad_norm": 3.15625, |
| "learning_rate": 2.8581773626938166e-09, |
| "loss": 1.4952478408813477, |
| "step": 1292 |
| }, |
| { |
| "epoch": 1.5921822099107419, |
| "grad_norm": 4.3125, |
| "learning_rate": 2.8483385946889017e-09, |
| "loss": 1.4701340198516846, |
| "step": 1294 |
| }, |
| { |
| "epoch": 1.5946445060018468, |
| "grad_norm": 5.25, |
| "learning_rate": 2.8385498572399503e-09, |
| "loss": 1.8555335998535156, |
| "step": 1296 |
| }, |
| { |
| "epoch": 1.5971068020929517, |
| "grad_norm": 5.0, |
| "learning_rate": 2.828811305736743e-09, |
| "loss": 1.8610620498657227, |
| "step": 1298 |
| }, |
| { |
| "epoch": 1.5995690981840567, |
| "grad_norm": 7.09375, |
| "learning_rate": 2.8191230947723945e-09, |
| "loss": 1.883762240409851, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.6020313942751616, |
| "grad_norm": 14.5625, |
| "learning_rate": 2.809485378140893e-09, |
| "loss": 2.238772392272949, |
| "step": 1302 |
| }, |
| { |
| "epoch": 1.6044936903662665, |
| "grad_norm": 6.25, |
| "learning_rate": 2.7998983088346625e-09, |
| "loss": 2.1114282608032227, |
| "step": 1304 |
| }, |
| { |
| "epoch": 1.6069559864573715, |
| "grad_norm": 1.9140625, |
| "learning_rate": 2.7903620390421363e-09, |
| "loss": 1.6002395153045654, |
| "step": 1306 |
| }, |
| { |
| "epoch": 1.6094182825484764, |
| "grad_norm": 9.4375, |
| "learning_rate": 2.7808767201453376e-09, |
| "loss": 1.6772760152816772, |
| "step": 1308 |
| }, |
| { |
| "epoch": 1.6118805786395813, |
| "grad_norm": 10.4375, |
| "learning_rate": 2.771442502717478e-09, |
| "loss": 2.111185073852539, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.6143428747306863, |
| "grad_norm": 14.125, |
| "learning_rate": 2.7620595365205627e-09, |
| "loss": 2.0705718994140625, |
| "step": 1312 |
| }, |
| { |
| "epoch": 1.6168051708217912, |
| "grad_norm": 4.46875, |
| "learning_rate": 2.752727970503024e-09, |
| "loss": 1.95082426071167, |
| "step": 1314 |
| }, |
| { |
| "epoch": 1.6192674669128961, |
| "grad_norm": 5.03125, |
| "learning_rate": 2.7434479527973477e-09, |
| "loss": 1.7210240364074707, |
| "step": 1316 |
| }, |
| { |
| "epoch": 1.621729763004001, |
| "grad_norm": 3.515625, |
| "learning_rate": 2.7342196307177214e-09, |
| "loss": 1.6697207689285278, |
| "step": 1318 |
| }, |
| { |
| "epoch": 1.6241920590951062, |
| "grad_norm": 2.65625, |
| "learning_rate": 2.7250431507577004e-09, |
| "loss": 1.4422950744628906, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.6266543551862112, |
| "grad_norm": 2.84375, |
| "learning_rate": 2.7159186585878816e-09, |
| "loss": 1.1386830806732178, |
| "step": 1322 |
| }, |
| { |
| "epoch": 1.629116651277316, |
| "grad_norm": 3.015625, |
| "learning_rate": 2.7068462990535863e-09, |
| "loss": 1.2971214056015015, |
| "step": 1324 |
| }, |
| { |
| "epoch": 1.631578947368421, |
| "grad_norm": 19.875, |
| "learning_rate": 2.697826216172569e-09, |
| "loss": 1.638606309890747, |
| "step": 1326 |
| }, |
| { |
| "epoch": 1.6340412434595262, |
| "grad_norm": 3.109375, |
| "learning_rate": 2.688858553132723e-09, |
| "loss": 1.6914677619934082, |
| "step": 1328 |
| }, |
| { |
| "epoch": 1.636503539550631, |
| "grad_norm": 2.28125, |
| "learning_rate": 2.6799434522898126e-09, |
| "loss": 1.1819281578063965, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.638965835641736, |
| "grad_norm": 2.140625, |
| "learning_rate": 2.6710810551652133e-09, |
| "loss": 1.1034936904907227, |
| "step": 1332 |
| }, |
| { |
| "epoch": 1.641428131732841, |
| "grad_norm": 34.5, |
| "learning_rate": 2.66227150244366e-09, |
| "loss": 1.6707381010055542, |
| "step": 1334 |
| }, |
| { |
| "epoch": 1.643890427823946, |
| "grad_norm": 25.5, |
| "learning_rate": 2.6535149339710184e-09, |
| "loss": 2.70631742477417, |
| "step": 1336 |
| }, |
| { |
| "epoch": 1.6463527239150508, |
| "grad_norm": 30.75, |
| "learning_rate": 2.644811488752068e-09, |
| "loss": 2.4394781589508057, |
| "step": 1338 |
| }, |
| { |
| "epoch": 1.6488150200061558, |
| "grad_norm": 13.625, |
| "learning_rate": 2.636161304948286e-09, |
| "loss": 2.2337255477905273, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.6512773160972607, |
| "grad_norm": 13.0, |
| "learning_rate": 2.627564519875663e-09, |
| "loss": 2.295048236846924, |
| "step": 1342 |
| }, |
| { |
| "epoch": 1.6537396121883656, |
| "grad_norm": 20.0, |
| "learning_rate": 2.6190212700025183e-09, |
| "loss": 2.110807418823242, |
| "step": 1344 |
| }, |
| { |
| "epoch": 1.6562019082794706, |
| "grad_norm": 4.84375, |
| "learning_rate": 2.6105316909473364e-09, |
| "loss": 1.8732104301452637, |
| "step": 1346 |
| }, |
| { |
| "epoch": 1.6586642043705755, |
| "grad_norm": 8.125, |
| "learning_rate": 2.6020959174766106e-09, |
| "loss": 1.9254186153411865, |
| "step": 1348 |
| }, |
| { |
| "epoch": 1.6611265004616804, |
| "grad_norm": 6.15625, |
| "learning_rate": 2.5937140835027097e-09, |
| "loss": 1.8715019226074219, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.6635887965527854, |
| "grad_norm": 9.8125, |
| "learning_rate": 2.5853863220817436e-09, |
| "loss": 1.9434764385223389, |
| "step": 1352 |
| }, |
| { |
| "epoch": 1.6660510926438903, |
| "grad_norm": 5.25, |
| "learning_rate": 2.577112765411459e-09, |
| "loss": 2.207705497741699, |
| "step": 1354 |
| }, |
| { |
| "epoch": 1.6685133887349952, |
| "grad_norm": 12.625, |
| "learning_rate": 2.568893544829136e-09, |
| "loss": 1.880719780921936, |
| "step": 1356 |
| }, |
| { |
| "epoch": 1.6709756848261004, |
| "grad_norm": 9.5625, |
| "learning_rate": 2.560728790809509e-09, |
| "loss": 1.8875178098678589, |
| "step": 1358 |
| }, |
| { |
| "epoch": 1.6734379809172053, |
| "grad_norm": 5.4375, |
| "learning_rate": 2.5526186329626865e-09, |
| "loss": 1.6963284015655518, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.6759002770083102, |
| "grad_norm": 5.90625, |
| "learning_rate": 2.5445632000320995e-09, |
| "loss": 1.791224718093872, |
| "step": 1362 |
| }, |
| { |
| "epoch": 1.6783625730994152, |
| "grad_norm": 3.890625, |
| "learning_rate": 2.5365626198924598e-09, |
| "loss": 1.6278963088989258, |
| "step": 1364 |
| }, |
| { |
| "epoch": 1.6808248691905203, |
| "grad_norm": 3.375, |
| "learning_rate": 2.528617019547723e-09, |
| "loss": 1.3288359642028809, |
| "step": 1366 |
| }, |
| { |
| "epoch": 1.6832871652816253, |
| "grad_norm": 9.0625, |
| "learning_rate": 2.5207265251290823e-09, |
| "loss": 1.6888291835784912, |
| "step": 1368 |
| }, |
| { |
| "epoch": 1.6857494613727302, |
| "grad_norm": 13.375, |
| "learning_rate": 2.512891261892955e-09, |
| "loss": 2.285770893096924, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.6882117574638351, |
| "grad_norm": 3.1875, |
| "learning_rate": 2.505111354219002e-09, |
| "loss": 1.671492099761963, |
| "step": 1372 |
| }, |
| { |
| "epoch": 1.69067405355494, |
| "grad_norm": 6.25, |
| "learning_rate": 2.49738692560815e-09, |
| "loss": 1.5187859535217285, |
| "step": 1374 |
| }, |
| { |
| "epoch": 1.693136349646045, |
| "grad_norm": 7.0625, |
| "learning_rate": 2.4897180986806322e-09, |
| "loss": 1.9461727142333984, |
| "step": 1376 |
| }, |
| { |
| "epoch": 1.69559864573715, |
| "grad_norm": 7.53125, |
| "learning_rate": 2.482104995174044e-09, |
| "loss": 1.8825700283050537, |
| "step": 1378 |
| }, |
| { |
| "epoch": 1.6980609418282548, |
| "grad_norm": 5.28125, |
| "learning_rate": 2.474547735941405e-09, |
| "loss": 1.8659740686416626, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.7005232379193598, |
| "grad_norm": 5.59375, |
| "learning_rate": 2.4670464409492447e-09, |
| "loss": 1.7924315929412842, |
| "step": 1382 |
| }, |
| { |
| "epoch": 1.7029855340104647, |
| "grad_norm": 13.4375, |
| "learning_rate": 2.459601229275697e-09, |
| "loss": 1.9610867500305176, |
| "step": 1384 |
| }, |
| { |
| "epoch": 1.7054478301015696, |
| "grad_norm": 8.5, |
| "learning_rate": 2.4522122191086104e-09, |
| "loss": 1.836552381515503, |
| "step": 1386 |
| }, |
| { |
| "epoch": 1.7079101261926746, |
| "grad_norm": 8.8125, |
| "learning_rate": 2.4448795277436698e-09, |
| "loss": 1.7403874397277832, |
| "step": 1388 |
| }, |
| { |
| "epoch": 1.7103724222837795, |
| "grad_norm": 4.625, |
| "learning_rate": 2.4376032715825386e-09, |
| "loss": 1.5626749992370605, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.7128347183748844, |
| "grad_norm": 3.625, |
| "learning_rate": 2.4303835661310066e-09, |
| "loss": 1.3395249843597412, |
| "step": 1392 |
| }, |
| { |
| "epoch": 1.7152970144659896, |
| "grad_norm": 13.125, |
| "learning_rate": 2.4232205259971584e-09, |
| "loss": 1.0826705694198608, |
| "step": 1394 |
| }, |
| { |
| "epoch": 1.7177593105570945, |
| "grad_norm": 12.875, |
| "learning_rate": 2.4161142648895533e-09, |
| "loss": 1.810969352722168, |
| "step": 1396 |
| }, |
| { |
| "epoch": 1.7202216066481995, |
| "grad_norm": 9.0, |
| "learning_rate": 2.4090648956154223e-09, |
| "loss": 2.039994239807129, |
| "step": 1398 |
| }, |
| { |
| "epoch": 1.7226839027393044, |
| "grad_norm": 7.625, |
| "learning_rate": 2.402072530078876e-09, |
| "loss": 1.8878741264343262, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.7251461988304093, |
| "grad_norm": 4.5625, |
| "learning_rate": 2.395137279279127e-09, |
| "loss": 1.8724961280822754, |
| "step": 1402 |
| }, |
| { |
| "epoch": 1.7276084949215145, |
| "grad_norm": 4.0, |
| "learning_rate": 2.3882592533087286e-09, |
| "loss": 1.9301607608795166, |
| "step": 1404 |
| }, |
| { |
| "epoch": 1.7300707910126194, |
| "grad_norm": 24.125, |
| "learning_rate": 2.3814385613518284e-09, |
| "loss": 1.6868252754211426, |
| "step": 1406 |
| }, |
| { |
| "epoch": 1.7325330871037243, |
| "grad_norm": 6.78125, |
| "learning_rate": 2.374675311682433e-09, |
| "loss": 1.7913291454315186, |
| "step": 1408 |
| }, |
| { |
| "epoch": 1.7349953831948293, |
| "grad_norm": 2.59375, |
| "learning_rate": 2.3679696116626936e-09, |
| "loss": 1.5577332973480225, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.7374576792859342, |
| "grad_norm": 4.875, |
| "learning_rate": 2.3613215677411944e-09, |
| "loss": 1.5362656116485596, |
| "step": 1412 |
| }, |
| { |
| "epoch": 1.7399199753770391, |
| "grad_norm": 1.75, |
| "learning_rate": 2.354731285451268e-09, |
| "loss": 1.5279173851013184, |
| "step": 1414 |
| }, |
| { |
| "epoch": 1.742382271468144, |
| "grad_norm": 10.6875, |
| "learning_rate": 2.348198869409322e-09, |
| "loss": 1.696439504623413, |
| "step": 1416 |
| }, |
| { |
| "epoch": 1.744844567559249, |
| "grad_norm": 18.5, |
| "learning_rate": 2.341724423313171e-09, |
| "loss": 2.554849147796631, |
| "step": 1418 |
| }, |
| { |
| "epoch": 1.747306863650354, |
| "grad_norm": 13.0625, |
| "learning_rate": 2.335308049940398e-09, |
| "loss": 2.1925854682922363, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.7497691597414589, |
| "grad_norm": 3.46875, |
| "learning_rate": 2.328949851146718e-09, |
| "loss": 1.593017816543579, |
| "step": 1422 |
| }, |
| { |
| "epoch": 1.7522314558325638, |
| "grad_norm": 4.0, |
| "learning_rate": 2.322649927864363e-09, |
| "loss": 1.229564905166626, |
| "step": 1424 |
| }, |
| { |
| "epoch": 1.7546937519236687, |
| "grad_norm": 15.6875, |
| "learning_rate": 2.3164083801004798e-09, |
| "loss": 1.9423973560333252, |
| "step": 1426 |
| }, |
| { |
| "epoch": 1.7571560480147737, |
| "grad_norm": 5.75, |
| "learning_rate": 2.3102253069355413e-09, |
| "loss": 2.0594370365142822, |
| "step": 1428 |
| }, |
| { |
| "epoch": 1.7596183441058786, |
| "grad_norm": 6.53125, |
| "learning_rate": 2.3041008065217754e-09, |
| "loss": 1.9393881559371948, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.7620806401969837, |
| "grad_norm": 7.90625, |
| "learning_rate": 2.298034976081607e-09, |
| "loss": 1.8895037174224854, |
| "step": 1432 |
| }, |
| { |
| "epoch": 1.7645429362880887, |
| "grad_norm": 8.125, |
| "learning_rate": 2.292027911906112e-09, |
| "loss": 1.7276127338409424, |
| "step": 1434 |
| }, |
| { |
| "epoch": 1.7670052323791936, |
| "grad_norm": 6.125, |
| "learning_rate": 2.286079709353491e-09, |
| "loss": 1.5182913541793823, |
| "step": 1436 |
| }, |
| { |
| "epoch": 1.7694675284702985, |
| "grad_norm": 8.6875, |
| "learning_rate": 2.2801904628475545e-09, |
| "loss": 1.845018982887268, |
| "step": 1438 |
| }, |
| { |
| "epoch": 1.7719298245614035, |
| "grad_norm": 13.0625, |
| "learning_rate": 2.274360265876225e-09, |
| "loss": 2.4570071697235107, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.7743921206525086, |
| "grad_norm": 6.53125, |
| "learning_rate": 2.268589210990052e-09, |
| "loss": 1.779624342918396, |
| "step": 1442 |
| }, |
| { |
| "epoch": 1.7768544167436136, |
| "grad_norm": 11.0, |
| "learning_rate": 2.262877389800745e-09, |
| "loss": 1.5919256210327148, |
| "step": 1444 |
| }, |
| { |
| "epoch": 1.7793167128347185, |
| "grad_norm": 5.96875, |
| "learning_rate": 2.257224892979714e-09, |
| "loss": 2.230924129486084, |
| "step": 1446 |
| }, |
| { |
| "epoch": 1.7817790089258234, |
| "grad_norm": 2.296875, |
| "learning_rate": 2.2516318102566373e-09, |
| "loss": 1.6709070205688477, |
| "step": 1448 |
| }, |
| { |
| "epoch": 1.7842413050169283, |
| "grad_norm": 7.1875, |
| "learning_rate": 2.24609823041803e-09, |
| "loss": 1.5729997158050537, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.7867036011080333, |
| "grad_norm": 23.125, |
| "learning_rate": 2.240624241305841e-09, |
| "loss": 2.22371768951416, |
| "step": 1452 |
| }, |
| { |
| "epoch": 1.7891658971991382, |
| "grad_norm": 9.0, |
| "learning_rate": 2.2352099298160545e-09, |
| "loss": 1.9387813806533813, |
| "step": 1454 |
| }, |
| { |
| "epoch": 1.7916281932902431, |
| "grad_norm": 6.96875, |
| "learning_rate": 2.2298553818973096e-09, |
| "loss": 1.6565120220184326, |
| "step": 1456 |
| }, |
| { |
| "epoch": 1.794090489381348, |
| "grad_norm": 24.0, |
| "learning_rate": 2.2245606825495408e-09, |
| "loss": 1.6322071552276611, |
| "step": 1458 |
| }, |
| { |
| "epoch": 1.796552785472453, |
| "grad_norm": 6.0625, |
| "learning_rate": 2.219325915822624e-09, |
| "loss": 2.004333257675171, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.799015081563558, |
| "grad_norm": 11.625, |
| "learning_rate": 2.214151164815044e-09, |
| "loss": 2.2140424251556396, |
| "step": 1462 |
| }, |
| { |
| "epoch": 1.8014773776546629, |
| "grad_norm": 5.90625, |
| "learning_rate": 2.2090365116725787e-09, |
| "loss": 1.876783847808838, |
| "step": 1464 |
| }, |
| { |
| "epoch": 1.8039396737457678, |
| "grad_norm": 2.921875, |
| "learning_rate": 2.203982037586988e-09, |
| "loss": 1.5903770923614502, |
| "step": 1466 |
| }, |
| { |
| "epoch": 1.8064019698368727, |
| "grad_norm": 5.78125, |
| "learning_rate": 2.1989878227947297e-09, |
| "loss": 1.4093436002731323, |
| "step": 1468 |
| }, |
| { |
| "epoch": 1.8088642659279779, |
| "grad_norm": 5.4375, |
| "learning_rate": 2.1940539465756848e-09, |
| "loss": 1.5252522230148315, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.8113265620190828, |
| "grad_norm": 11.1875, |
| "learning_rate": 2.1891804872519013e-09, |
| "loss": 1.6333411931991577, |
| "step": 1472 |
| }, |
| { |
| "epoch": 1.8137888581101878, |
| "grad_norm": 12.125, |
| "learning_rate": 2.1843675221863456e-09, |
| "loss": 2.395686626434326, |
| "step": 1474 |
| }, |
| { |
| "epoch": 1.8162511542012927, |
| "grad_norm": 6.40625, |
| "learning_rate": 2.179615127781678e-09, |
| "loss": 2.011446475982666, |
| "step": 1476 |
| }, |
| { |
| "epoch": 1.8187134502923976, |
| "grad_norm": 27.75, |
| "learning_rate": 2.1749233794790424e-09, |
| "loss": 1.9201209545135498, |
| "step": 1478 |
| }, |
| { |
| "epoch": 1.8211757463835028, |
| "grad_norm": 8.75, |
| "learning_rate": 2.1702923517568608e-09, |
| "loss": 1.9654639959335327, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.8236380424746077, |
| "grad_norm": 14.4375, |
| "learning_rate": 2.1657221181296596e-09, |
| "loss": 2.4255740642547607, |
| "step": 1482 |
| }, |
| { |
| "epoch": 1.8261003385657126, |
| "grad_norm": 5.46875, |
| "learning_rate": 2.161212751146898e-09, |
| "loss": 2.1441259384155273, |
| "step": 1484 |
| }, |
| { |
| "epoch": 1.8285626346568176, |
| "grad_norm": 3.03125, |
| "learning_rate": 2.1567643223918164e-09, |
| "loss": 1.5081210136413574, |
| "step": 1486 |
| }, |
| { |
| "epoch": 1.8310249307479225, |
| "grad_norm": 3.15625, |
| "learning_rate": 2.1523769024803013e-09, |
| "loss": 1.219706416130066, |
| "step": 1488 |
| }, |
| { |
| "epoch": 1.8334872268390274, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.148050561059763e-09, |
| "loss": 1.3154406547546387, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.8359495229301324, |
| "grad_norm": 4.84375, |
| "learning_rate": 2.1437853668080316e-09, |
| "loss": 1.663912057876587, |
| "step": 1492 |
| }, |
| { |
| "epoch": 1.8384118190212373, |
| "grad_norm": 5.5, |
| "learning_rate": 2.139581387432267e-09, |
| "loss": 1.9996685981750488, |
| "step": 1494 |
| }, |
| { |
| "epoch": 1.8408741151123422, |
| "grad_norm": 9.125, |
| "learning_rate": 2.135438689667882e-09, |
| "loss": 2.1527910232543945, |
| "step": 1496 |
| }, |
| { |
| "epoch": 1.8433364112034472, |
| "grad_norm": 5.4375, |
| "learning_rate": 2.1313573392774835e-09, |
| "loss": 2.181238889694214, |
| "step": 1498 |
| }, |
| { |
| "epoch": 1.845798707294552, |
| "grad_norm": 26.625, |
| "learning_rate": 2.1273374010498306e-09, |
| "loss": 2.07470965385437, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.848261003385657, |
| "grad_norm": 7.375, |
| "learning_rate": 2.123378938798803e-09, |
| "loss": 2.180095672607422, |
| "step": 1502 |
| }, |
| { |
| "epoch": 1.850723299476762, |
| "grad_norm": 10.25, |
| "learning_rate": 2.119482015362392e-09, |
| "loss": 2.023428440093994, |
| "step": 1504 |
| }, |
| { |
| "epoch": 1.8531855955678669, |
| "grad_norm": 6.03125, |
| "learning_rate": 2.1156466926016974e-09, |
| "loss": 1.9310382604599, |
| "step": 1506 |
| }, |
| { |
| "epoch": 1.855647891658972, |
| "grad_norm": 10.9375, |
| "learning_rate": 2.1118730313999516e-09, |
| "loss": 1.7410407066345215, |
| "step": 1508 |
| }, |
| { |
| "epoch": 1.858110187750077, |
| "grad_norm": 14.9375, |
| "learning_rate": 2.108161091661548e-09, |
| "loss": 2.463320732116699, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.860572483841182, |
| "grad_norm": 10.1875, |
| "learning_rate": 2.1045109323110943e-09, |
| "loss": 2.164478302001953, |
| "step": 1512 |
| }, |
| { |
| "epoch": 1.8630347799322868, |
| "grad_norm": 11.0, |
| "learning_rate": 2.1009226112924727e-09, |
| "loss": 2.304097890853882, |
| "step": 1514 |
| }, |
| { |
| "epoch": 1.8654970760233918, |
| "grad_norm": 11.4375, |
| "learning_rate": 2.097396185567926e-09, |
| "loss": 2.384671688079834, |
| "step": 1516 |
| }, |
| { |
| "epoch": 1.867959372114497, |
| "grad_norm": 11.875, |
| "learning_rate": 2.0939317111171467e-09, |
| "loss": 1.752406358718872, |
| "step": 1518 |
| }, |
| { |
| "epoch": 1.8704216682056019, |
| "grad_norm": 19.875, |
| "learning_rate": 2.090529242936392e-09, |
| "loss": 1.5490081310272217, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.8728839642967068, |
| "grad_norm": 5.90625, |
| "learning_rate": 2.087188835037611e-09, |
| "loss": 2.0984854698181152, |
| "step": 1522 |
| }, |
| { |
| "epoch": 1.8753462603878117, |
| "grad_norm": 2.890625, |
| "learning_rate": 2.0839105404475866e-09, |
| "loss": 1.6633992195129395, |
| "step": 1524 |
| }, |
| { |
| "epoch": 1.8778085564789166, |
| "grad_norm": 3.6875, |
| "learning_rate": 2.080694411207094e-09, |
| "loss": 1.4255918264389038, |
| "step": 1526 |
| }, |
| { |
| "epoch": 1.8802708525700216, |
| "grad_norm": 4.84375, |
| "learning_rate": 2.0775404983700724e-09, |
| "loss": 1.845369577407837, |
| "step": 1528 |
| }, |
| { |
| "epoch": 1.8827331486611265, |
| "grad_norm": 4.40625, |
| "learning_rate": 2.074448852002819e-09, |
| "loss": 1.7371915578842163, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.8851954447522314, |
| "grad_norm": 13.3125, |
| "learning_rate": 2.07141952118319e-09, |
| "loss": 1.805029034614563, |
| "step": 1532 |
| }, |
| { |
| "epoch": 1.8876577408433364, |
| "grad_norm": 6.65625, |
| "learning_rate": 2.068452553999822e-09, |
| "loss": 2.060267448425293, |
| "step": 1534 |
| }, |
| { |
| "epoch": 1.8901200369344413, |
| "grad_norm": 3.625, |
| "learning_rate": 2.065547997551375e-09, |
| "loss": 1.525952935218811, |
| "step": 1536 |
| }, |
| { |
| "epoch": 1.8925823330255462, |
| "grad_norm": 7.46875, |
| "learning_rate": 2.062705897945773e-09, |
| "loss": 1.4751570224761963, |
| "step": 1538 |
| }, |
| { |
| "epoch": 1.8950446291166512, |
| "grad_norm": 5.0625, |
| "learning_rate": 2.059926300299483e-09, |
| "loss": 1.6626102924346924, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.897506925207756, |
| "grad_norm": 5.65625, |
| "learning_rate": 2.057209248736792e-09, |
| "loss": 1.2773092985153198, |
| "step": 1542 |
| }, |
| { |
| "epoch": 1.899969221298861, |
| "grad_norm": 13.0625, |
| "learning_rate": 2.054554786389111e-09, |
| "loss": 1.6589457988739014, |
| "step": 1544 |
| }, |
| { |
| "epoch": 1.9024315173899662, |
| "grad_norm": 6.25, |
| "learning_rate": 2.051962955394286e-09, |
| "loss": 1.9413405656814575, |
| "step": 1546 |
| }, |
| { |
| "epoch": 1.9048938134810711, |
| "grad_norm": 10.25, |
| "learning_rate": 2.0494337968959344e-09, |
| "loss": 1.6395326852798462, |
| "step": 1548 |
| }, |
| { |
| "epoch": 1.907356109572176, |
| "grad_norm": 5.21875, |
| "learning_rate": 2.0469673510427865e-09, |
| "loss": 1.9667985439300537, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.909818405663281, |
| "grad_norm": 4.90625, |
| "learning_rate": 2.0445636569880505e-09, |
| "loss": 1.8468351364135742, |
| "step": 1552 |
| }, |
| { |
| "epoch": 1.912280701754386, |
| "grad_norm": 11.25, |
| "learning_rate": 2.0422227528887923e-09, |
| "loss": 2.118504524230957, |
| "step": 1554 |
| }, |
| { |
| "epoch": 1.914742997845491, |
| "grad_norm": 10.375, |
| "learning_rate": 2.0399446759053274e-09, |
| "loss": 2.0504517555236816, |
| "step": 1556 |
| }, |
| { |
| "epoch": 1.917205293936596, |
| "grad_norm": 5.25, |
| "learning_rate": 2.037729462200633e-09, |
| "loss": 1.661136507987976, |
| "step": 1558 |
| }, |
| { |
| "epoch": 1.919667590027701, |
| "grad_norm": 6.03125, |
| "learning_rate": 2.0355771469397726e-09, |
| "loss": 1.5671418905258179, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.9221298861188059, |
| "grad_norm": 5.34375, |
| "learning_rate": 2.0334877642893373e-09, |
| "loss": 2.0463449954986572, |
| "step": 1562 |
| }, |
| { |
| "epoch": 1.9245921822099108, |
| "grad_norm": 3.96875, |
| "learning_rate": 2.0314613474169064e-09, |
| "loss": 1.7543866634368896, |
| "step": 1564 |
| }, |
| { |
| "epoch": 1.9270544783010157, |
| "grad_norm": 23.375, |
| "learning_rate": 2.029497928490516e-09, |
| "loss": 1.5825181007385254, |
| "step": 1566 |
| }, |
| { |
| "epoch": 1.9295167743921207, |
| "grad_norm": 8.6875, |
| "learning_rate": 2.027597538678154e-09, |
| "loss": 1.5585989952087402, |
| "step": 1568 |
| }, |
| { |
| "epoch": 1.9319790704832256, |
| "grad_norm": 10.0625, |
| "learning_rate": 2.0257602081472603e-09, |
| "loss": 1.5373648405075073, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.9344413665743305, |
| "grad_norm": 3.296875, |
| "learning_rate": 2.023985966064252e-09, |
| "loss": 1.638904333114624, |
| "step": 1572 |
| }, |
| { |
| "epoch": 1.9369036626654355, |
| "grad_norm": 2.71875, |
| "learning_rate": 2.0222748405940567e-09, |
| "loss": 1.3301455974578857, |
| "step": 1574 |
| }, |
| { |
| "epoch": 1.9393659587565404, |
| "grad_norm": 2.734375, |
| "learning_rate": 2.0206268588996686e-09, |
| "loss": 1.1727893352508545, |
| "step": 1576 |
| }, |
| { |
| "epoch": 1.9418282548476453, |
| "grad_norm": 4.46875, |
| "learning_rate": 2.019042047141714e-09, |
| "loss": 1.2285372018814087, |
| "step": 1578 |
| }, |
| { |
| "epoch": 1.9442905509387503, |
| "grad_norm": 5.0625, |
| "learning_rate": 2.0175204304780413e-09, |
| "loss": 1.5906985998153687, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.9467528470298552, |
| "grad_norm": 18.875, |
| "learning_rate": 2.016062033063314e-09, |
| "loss": 1.8927161693572998, |
| "step": 1582 |
| }, |
| { |
| "epoch": 1.9492151431209603, |
| "grad_norm": 11.4375, |
| "learning_rate": 2.0146668780486356e-09, |
| "loss": 2.0817370414733887, |
| "step": 1584 |
| }, |
| { |
| "epoch": 1.9516774392120653, |
| "grad_norm": 8.4375, |
| "learning_rate": 2.0133349875811752e-09, |
| "loss": 2.1541638374328613, |
| "step": 1586 |
| }, |
| { |
| "epoch": 1.9541397353031702, |
| "grad_norm": 6.03125, |
| "learning_rate": 2.0120663828038197e-09, |
| "loss": 2.136171340942383, |
| "step": 1588 |
| }, |
| { |
| "epoch": 1.9566020313942751, |
| "grad_norm": 8.8125, |
| "learning_rate": 2.010861083854838e-09, |
| "loss": 2.047274112701416, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.95906432748538, |
| "grad_norm": 5.4375, |
| "learning_rate": 2.009719109867558e-09, |
| "loss": 2.093939781188965, |
| "step": 1592 |
| }, |
| { |
| "epoch": 1.9615266235764852, |
| "grad_norm": 8.0625, |
| "learning_rate": 2.0086404789700686e-09, |
| "loss": 1.9545447826385498, |
| "step": 1594 |
| }, |
| { |
| "epoch": 1.9639889196675901, |
| "grad_norm": 4.03125, |
| "learning_rate": 2.0076252082849266e-09, |
| "loss": 1.710350751876831, |
| "step": 1596 |
| }, |
| { |
| "epoch": 1.966451215758695, |
| "grad_norm": 9.8125, |
| "learning_rate": 2.006673313928888e-09, |
| "loss": 1.6602602005004883, |
| "step": 1598 |
| }, |
| { |
| "epoch": 1.9689135118498, |
| "grad_norm": 6.96875, |
| "learning_rate": 2.0057848110126513e-09, |
| "loss": 2.073413848876953, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.971375807940905, |
| "grad_norm": 18.75, |
| "learning_rate": 2.0049597136406157e-09, |
| "loss": 2.155198574066162, |
| "step": 1602 |
| }, |
| { |
| "epoch": 1.9738381040320099, |
| "grad_norm": 7.4375, |
| "learning_rate": 2.004198034910662e-09, |
| "loss": 2.1142520904541016, |
| "step": 1604 |
| }, |
| { |
| "epoch": 1.9763004001231148, |
| "grad_norm": 2.6875, |
| "learning_rate": 2.003499786913938e-09, |
| "loss": 1.6299633979797363, |
| "step": 1606 |
| }, |
| { |
| "epoch": 1.9787626962142197, |
| "grad_norm": 11.3125, |
| "learning_rate": 2.0028649807346742e-09, |
| "loss": 1.5626764297485352, |
| "step": 1608 |
| }, |
| { |
| "epoch": 1.9812249923053247, |
| "grad_norm": 16.875, |
| "learning_rate": 2.0022936264500017e-09, |
| "loss": 2.2909412384033203, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.9836872883964296, |
| "grad_norm": 11.25, |
| "learning_rate": 2.0017857331297935e-09, |
| "loss": 2.1796622276306152, |
| "step": 1612 |
| }, |
| { |
| "epoch": 1.9861495844875345, |
| "grad_norm": 5.375, |
| "learning_rate": 2.001341308836524e-09, |
| "loss": 1.9472308158874512, |
| "step": 1614 |
| }, |
| { |
| "epoch": 1.9886118805786395, |
| "grad_norm": 8.5625, |
| "learning_rate": 2.000960360625136e-09, |
| "loss": 1.743130087852478, |
| "step": 1616 |
| }, |
| { |
| "epoch": 1.9910741766697444, |
| "grad_norm": 10.1875, |
| "learning_rate": 2.0006428945429335e-09, |
| "loss": 1.43598210811615, |
| "step": 1618 |
| }, |
| { |
| "epoch": 1.9935364727608493, |
| "grad_norm": 12.8125, |
| "learning_rate": 2.0003889156294813e-09, |
| "loss": 1.9119551181793213, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.9959987688519545, |
| "grad_norm": 5.71875, |
| "learning_rate": 2.0001984279165285e-09, |
| "loss": 2.036318302154541, |
| "step": 1622 |
| }, |
| { |
| "epoch": 1.9984610649430594, |
| "grad_norm": 5.28125, |
| "learning_rate": 2.0000714344279417e-09, |
| "loss": 1.577465295791626, |
| "step": 1624 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 3.578125, |
| "learning_rate": 2.00000793717966e-09, |
| "loss": 1.1681241989135742, |
| "step": 1626 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 1626, |
| "total_flos": 2.5753569883429274e+18, |
| "train_loss": 1.8335715001506265, |
| "train_runtime": 15477.0683, |
| "train_samples_per_second": 1.679, |
| "train_steps_per_second": 0.105 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 1626, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 9999999, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.5753569883429274e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|