|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"eval_steps": 500, |
|
"global_step": 238700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10473397570171764, |
|
"grad_norm": 1.1384308338165283, |
|
"learning_rate": 4.989526602429829e-05, |
|
"loss": 8.9672, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.20946795140343527, |
|
"grad_norm": 0.9985808730125427, |
|
"learning_rate": 4.979053204859657e-05, |
|
"loss": 7.7253, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31420192710515293, |
|
"grad_norm": 1.0521348714828491, |
|
"learning_rate": 4.968579807289485e-05, |
|
"loss": 7.5614, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.41893590280687054, |
|
"grad_norm": 1.0602227449417114, |
|
"learning_rate": 4.958106409719313e-05, |
|
"loss": 7.5037, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5236698785085881, |
|
"grad_norm": 1.6102268695831299, |
|
"learning_rate": 4.9476330121491414e-05, |
|
"loss": 7.4595, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6284038542103059, |
|
"grad_norm": 1.335976004600525, |
|
"learning_rate": 4.9371596145789694e-05, |
|
"loss": 7.4267, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7331378299120235, |
|
"grad_norm": 1.340728998184204, |
|
"learning_rate": 4.926686217008798e-05, |
|
"loss": 7.392, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8378718056137411, |
|
"grad_norm": 1.4520059823989868, |
|
"learning_rate": 4.916212819438626e-05, |
|
"loss": 7.3253, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9426057813154587, |
|
"grad_norm": 1.7685532569885254, |
|
"learning_rate": 4.905760368663595e-05, |
|
"loss": 7.3204, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0473397570171763, |
|
"grad_norm": 1.396130084991455, |
|
"learning_rate": 4.8952869710934226e-05, |
|
"loss": 7.2682, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1520737327188941, |
|
"grad_norm": 1.5962079763412476, |
|
"learning_rate": 4.884813573523251e-05, |
|
"loss": 7.2441, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2568077084206117, |
|
"grad_norm": 1.6328166723251343, |
|
"learning_rate": 4.874340175953079e-05, |
|
"loss": 7.2216, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3615416841223293, |
|
"grad_norm": 1.8534362316131592, |
|
"learning_rate": 4.863887725178048e-05, |
|
"loss": 7.1957, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.466275659824047, |
|
"grad_norm": 1.4871692657470703, |
|
"learning_rate": 4.8534143276078766e-05, |
|
"loss": 7.1561, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.5710096355257646, |
|
"grad_norm": 1.8590672016143799, |
|
"learning_rate": 4.8429409300377045e-05, |
|
"loss": 7.1397, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.6757436112274822, |
|
"grad_norm": 1.7009446620941162, |
|
"learning_rate": 4.8324675324675325e-05, |
|
"loss": 7.1149, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.7804775869291998, |
|
"grad_norm": 1.9020010232925415, |
|
"learning_rate": 4.822015081692501e-05, |
|
"loss": 7.1215, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.8852115626309174, |
|
"grad_norm": 2.912442445755005, |
|
"learning_rate": 4.811541684122329e-05, |
|
"loss": 7.0616, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.989945538332635, |
|
"grad_norm": 2.654263496398926, |
|
"learning_rate": 4.801068286552158e-05, |
|
"loss": 7.0508, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.0946795140343526, |
|
"grad_norm": 2.1642003059387207, |
|
"learning_rate": 4.790594888981986e-05, |
|
"loss": 7.0251, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.19941348973607, |
|
"grad_norm": 1.9420874118804932, |
|
"learning_rate": 4.7801424382069544e-05, |
|
"loss": 6.9806, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.3041474654377883, |
|
"grad_norm": 2.2306201457977295, |
|
"learning_rate": 4.769669040636783e-05, |
|
"loss": 6.9721, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.4088814411395054, |
|
"grad_norm": 2.8180389404296875, |
|
"learning_rate": 4.759195643066611e-05, |
|
"loss": 6.9582, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.5136154168412235, |
|
"grad_norm": 2.387949228286743, |
|
"learning_rate": 4.748722245496439e-05, |
|
"loss": 6.9211, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.618349392542941, |
|
"grad_norm": 3.3709394931793213, |
|
"learning_rate": 4.7382697947214076e-05, |
|
"loss": 6.9183, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.7230833682446587, |
|
"grad_norm": 2.567798376083374, |
|
"learning_rate": 4.727796397151236e-05, |
|
"loss": 6.8732, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.8278173439463763, |
|
"grad_norm": 2.6373414993286133, |
|
"learning_rate": 4.717322999581064e-05, |
|
"loss": 6.8658, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.932551319648094, |
|
"grad_norm": 2.2950875759124756, |
|
"learning_rate": 4.706849602010893e-05, |
|
"loss": 6.8436, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.0372852953498115, |
|
"grad_norm": 4.0021514892578125, |
|
"learning_rate": 4.696397151235861e-05, |
|
"loss": 6.8496, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.142019271051529, |
|
"grad_norm": 3.289193630218506, |
|
"learning_rate": 4.685923753665689e-05, |
|
"loss": 6.8047, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.2467532467532467, |
|
"grad_norm": 2.9973654747009277, |
|
"learning_rate": 4.6754503560955175e-05, |
|
"loss": 6.7651, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.3514872224549643, |
|
"grad_norm": 2.9979376792907715, |
|
"learning_rate": 4.664976958525346e-05, |
|
"loss": 6.7768, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.456221198156682, |
|
"grad_norm": 3.263784885406494, |
|
"learning_rate": 4.654524507750315e-05, |
|
"loss": 6.7617, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.5609551738583995, |
|
"grad_norm": 3.330116033554077, |
|
"learning_rate": 4.644051110180143e-05, |
|
"loss": 6.7417, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.665689149560117, |
|
"grad_norm": 3.224337339401245, |
|
"learning_rate": 4.6335777126099714e-05, |
|
"loss": 6.7028, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.7704231252618348, |
|
"grad_norm": 3.21891450881958, |
|
"learning_rate": 4.623104315039799e-05, |
|
"loss": 6.7101, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.875157100963553, |
|
"grad_norm": 2.3559324741363525, |
|
"learning_rate": 4.6126518642647674e-05, |
|
"loss": 6.6892, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.97989107666527, |
|
"grad_norm": 3.1527633666992188, |
|
"learning_rate": 4.602178466694596e-05, |
|
"loss": 6.6851, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.084625052366988, |
|
"grad_norm": 2.9760189056396484, |
|
"learning_rate": 4.591705069124424e-05, |
|
"loss": 6.6704, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.189359028068705, |
|
"grad_norm": 2.8135318756103516, |
|
"learning_rate": 4.5812316715542526e-05, |
|
"loss": 6.6621, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.294093003770423, |
|
"grad_norm": 3.060316324234009, |
|
"learning_rate": 4.570779220779221e-05, |
|
"loss": 6.6298, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.39882697947214, |
|
"grad_norm": 2.7130279541015625, |
|
"learning_rate": 4.560305823209049e-05, |
|
"loss": 6.628, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.5035609551738585, |
|
"grad_norm": 3.156386613845825, |
|
"learning_rate": 4.549853372434017e-05, |
|
"loss": 6.6423, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.6082949308755765, |
|
"grad_norm": 3.039471387863159, |
|
"learning_rate": 4.539379974863846e-05, |
|
"loss": 6.6343, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.713028906577294, |
|
"grad_norm": 3.976949453353882, |
|
"learning_rate": 4.5289065772936745e-05, |
|
"loss": 6.606, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.817762882279011, |
|
"grad_norm": 3.310382604598999, |
|
"learning_rate": 4.5184331797235025e-05, |
|
"loss": 6.5956, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.922496857980729, |
|
"grad_norm": 3.5924322605133057, |
|
"learning_rate": 4.507959782153331e-05, |
|
"loss": 6.5965, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.027230833682447, |
|
"grad_norm": 2.616468667984009, |
|
"learning_rate": 4.497486384583159e-05, |
|
"loss": 6.5788, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.131964809384164, |
|
"grad_norm": 3.3178062438964844, |
|
"learning_rate": 4.487012987012987e-05, |
|
"loss": 6.5684, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.236698785085882, |
|
"grad_norm": 3.7108089923858643, |
|
"learning_rate": 4.476539589442815e-05, |
|
"loss": 6.5756, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.341432760787599, |
|
"grad_norm": 3.396498918533325, |
|
"learning_rate": 4.466087138667784e-05, |
|
"loss": 6.5678, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.446166736489317, |
|
"grad_norm": 3.7245748043060303, |
|
"learning_rate": 4.4556137410976123e-05, |
|
"loss": 6.5578, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.5509007121910345, |
|
"grad_norm": 3.6525135040283203, |
|
"learning_rate": 4.44514034352744e-05, |
|
"loss": 6.5385, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.655634687892753, |
|
"grad_norm": 3.4302523136138916, |
|
"learning_rate": 4.434666945957269e-05, |
|
"loss": 6.5143, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.76036866359447, |
|
"grad_norm": 3.762871503829956, |
|
"learning_rate": 4.4242144951822376e-05, |
|
"loss": 6.52, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.865102639296188, |
|
"grad_norm": 2.8195388317108154, |
|
"learning_rate": 4.4137410976120656e-05, |
|
"loss": 6.5213, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.969836614997905, |
|
"grad_norm": 3.013187885284424, |
|
"learning_rate": 4.4032677000418936e-05, |
|
"loss": 6.5052, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 6.074570590699623, |
|
"grad_norm": 2.9772274494171143, |
|
"learning_rate": 4.392794302471722e-05, |
|
"loss": 6.502, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 6.17930456640134, |
|
"grad_norm": 3.2228713035583496, |
|
"learning_rate": 4.38232090490155e-05, |
|
"loss": 6.4889, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 6.284038542103058, |
|
"grad_norm": 3.824286937713623, |
|
"learning_rate": 4.371847507331379e-05, |
|
"loss": 6.4792, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.388772517804776, |
|
"grad_norm": 3.100308656692505, |
|
"learning_rate": 4.361374109761207e-05, |
|
"loss": 6.4816, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 6.4935064935064934, |
|
"grad_norm": 3.4449245929718018, |
|
"learning_rate": 4.350900712191035e-05, |
|
"loss": 6.4786, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.5982404692082115, |
|
"grad_norm": 3.6803085803985596, |
|
"learning_rate": 4.3404482614160034e-05, |
|
"loss": 6.4778, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.702974444909929, |
|
"grad_norm": 3.6413722038269043, |
|
"learning_rate": 4.329974863845832e-05, |
|
"loss": 6.4782, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.807708420611647, |
|
"grad_norm": 3.482905626296997, |
|
"learning_rate": 4.31950146627566e-05, |
|
"loss": 6.4719, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.912442396313364, |
|
"grad_norm": 3.4605376720428467, |
|
"learning_rate": 4.3090280687054887e-05, |
|
"loss": 6.4458, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 7.017176372015082, |
|
"grad_norm": 3.814375877380371, |
|
"learning_rate": 4.2985965647255974e-05, |
|
"loss": 6.4539, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 7.121910347716799, |
|
"grad_norm": 4.238844871520996, |
|
"learning_rate": 4.288123167155425e-05, |
|
"loss": 6.4522, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 7.226644323418517, |
|
"grad_norm": 2.8327670097351074, |
|
"learning_rate": 4.277649769585253e-05, |
|
"loss": 6.4383, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 7.331378299120234, |
|
"grad_norm": 3.1451475620269775, |
|
"learning_rate": 4.267176372015082e-05, |
|
"loss": 6.4451, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 7.436112274821952, |
|
"grad_norm": 3.6858575344085693, |
|
"learning_rate": 4.2567239212400506e-05, |
|
"loss": 6.4313, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 7.5408462505236695, |
|
"grad_norm": 4.258295059204102, |
|
"learning_rate": 4.2462505236698786e-05, |
|
"loss": 6.4143, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.645580226225388, |
|
"grad_norm": 4.3574676513671875, |
|
"learning_rate": 4.235777126099707e-05, |
|
"loss": 6.4061, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.750314201927106, |
|
"grad_norm": 3.8001816272735596, |
|
"learning_rate": 4.225303728529535e-05, |
|
"loss": 6.3994, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.855048177628823, |
|
"grad_norm": 3.487893581390381, |
|
"learning_rate": 4.214851277754504e-05, |
|
"loss": 6.4192, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.95978215333054, |
|
"grad_norm": 3.9729723930358887, |
|
"learning_rate": 4.204377880184332e-05, |
|
"loss": 6.407, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 8.064516129032258, |
|
"grad_norm": 3.4465062618255615, |
|
"learning_rate": 4.1939044826141604e-05, |
|
"loss": 6.3867, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 8.169250104733976, |
|
"grad_norm": 3.706404685974121, |
|
"learning_rate": 4.1834310850439884e-05, |
|
"loss": 6.3877, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 8.273984080435694, |
|
"grad_norm": 3.8204259872436523, |
|
"learning_rate": 4.172957687473817e-05, |
|
"loss": 6.3921, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 8.37871805613741, |
|
"grad_norm": 3.4868948459625244, |
|
"learning_rate": 4.162505236698786e-05, |
|
"loss": 6.3729, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 8.483452031839128, |
|
"grad_norm": 3.5007081031799316, |
|
"learning_rate": 4.152031839128614e-05, |
|
"loss": 6.3886, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 8.588186007540846, |
|
"grad_norm": 2.937894582748413, |
|
"learning_rate": 4.1415584415584417e-05, |
|
"loss": 6.3814, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 8.692919983242565, |
|
"grad_norm": 3.529237985610962, |
|
"learning_rate": 4.1310850439882696e-05, |
|
"loss": 6.3722, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.79765395894428, |
|
"grad_norm": 3.883575677871704, |
|
"learning_rate": 4.120611646418098e-05, |
|
"loss": 6.3655, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.902387934645999, |
|
"grad_norm": 4.439103603363037, |
|
"learning_rate": 4.110159195643067e-05, |
|
"loss": 6.3673, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 9.007121910347717, |
|
"grad_norm": 4.103298664093018, |
|
"learning_rate": 4.099685798072895e-05, |
|
"loss": 6.3659, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 9.111855886049435, |
|
"grad_norm": 3.491204023361206, |
|
"learning_rate": 4.0892124005027235e-05, |
|
"loss": 6.3744, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 9.216589861751151, |
|
"grad_norm": 3.441976547241211, |
|
"learning_rate": 4.0787390029325515e-05, |
|
"loss": 6.3573, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 9.32132383745287, |
|
"grad_norm": 3.58134126663208, |
|
"learning_rate": 4.0682656053623795e-05, |
|
"loss": 6.3407, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 9.426057813154587, |
|
"grad_norm": 3.274592638015747, |
|
"learning_rate": 4.057813154587348e-05, |
|
"loss": 6.3373, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 9.530791788856305, |
|
"grad_norm": 4.296390533447266, |
|
"learning_rate": 4.047339757017177e-05, |
|
"loss": 6.3499, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 9.635525764558023, |
|
"grad_norm": 3.5000336170196533, |
|
"learning_rate": 4.036866359447005e-05, |
|
"loss": 6.3199, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 9.74025974025974, |
|
"grad_norm": 3.4947054386138916, |
|
"learning_rate": 4.0263929618768334e-05, |
|
"loss": 6.3474, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.844993715961458, |
|
"grad_norm": 3.3658857345581055, |
|
"learning_rate": 4.0159195643066614e-05, |
|
"loss": 6.3296, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.949727691663176, |
|
"grad_norm": 2.9811642169952393, |
|
"learning_rate": 4.0054671135316294e-05, |
|
"loss": 6.345, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 10.054461667364894, |
|
"grad_norm": 4.165875434875488, |
|
"learning_rate": 3.994993715961458e-05, |
|
"loss": 6.3209, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 10.15919564306661, |
|
"grad_norm": 3.6118202209472656, |
|
"learning_rate": 3.9845203183912866e-05, |
|
"loss": 6.3175, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 10.263929618768328, |
|
"grad_norm": 3.930669069290161, |
|
"learning_rate": 3.9740469208211146e-05, |
|
"loss": 6.3231, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 10.368663594470046, |
|
"grad_norm": 3.1688554286956787, |
|
"learning_rate": 3.963594470046083e-05, |
|
"loss": 6.309, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 10.473397570171764, |
|
"grad_norm": 3.6746394634246826, |
|
"learning_rate": 3.953121072475911e-05, |
|
"loss": 6.3077, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 10.57813154587348, |
|
"grad_norm": 3.5134785175323486, |
|
"learning_rate": 3.942647674905739e-05, |
|
"loss": 6.3299, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 10.682865521575199, |
|
"grad_norm": 3.2903287410736084, |
|
"learning_rate": 3.932174277335568e-05, |
|
"loss": 6.3178, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 10.787599497276917, |
|
"grad_norm": 3.5344769954681396, |
|
"learning_rate": 3.921700879765396e-05, |
|
"loss": 6.3139, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 10.892333472978635, |
|
"grad_norm": 3.5710573196411133, |
|
"learning_rate": 3.9112274821952245e-05, |
|
"loss": 6.306, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 10.997067448680351, |
|
"grad_norm": 3.4193336963653564, |
|
"learning_rate": 3.9007540846250524e-05, |
|
"loss": 6.3129, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 11.101801424382069, |
|
"grad_norm": 3.683143377304077, |
|
"learning_rate": 3.890301633850021e-05, |
|
"loss": 6.3084, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 11.206535400083787, |
|
"grad_norm": 3.214221239089966, |
|
"learning_rate": 3.879828236279849e-05, |
|
"loss": 6.302, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 11.311269375785505, |
|
"grad_norm": 3.5691747665405273, |
|
"learning_rate": 3.869354838709678e-05, |
|
"loss": 6.3164, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 11.416003351487223, |
|
"grad_norm": 3.2734036445617676, |
|
"learning_rate": 3.858881441139506e-05, |
|
"loss": 6.2889, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 11.52073732718894, |
|
"grad_norm": 4.049854278564453, |
|
"learning_rate": 3.848428990364474e-05, |
|
"loss": 6.2957, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 11.625471302890658, |
|
"grad_norm": 3.837921380996704, |
|
"learning_rate": 3.837955592794303e-05, |
|
"loss": 6.2806, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 11.730205278592376, |
|
"grad_norm": 3.4606828689575195, |
|
"learning_rate": 3.827482195224131e-05, |
|
"loss": 6.2896, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 11.834939254294094, |
|
"grad_norm": 4.859198093414307, |
|
"learning_rate": 3.8170087976539596e-05, |
|
"loss": 6.273, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 11.93967322999581, |
|
"grad_norm": 4.689023494720459, |
|
"learning_rate": 3.806535400083787e-05, |
|
"loss": 6.2776, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 12.044407205697528, |
|
"grad_norm": 4.234752178192139, |
|
"learning_rate": 3.7960829493087555e-05, |
|
"loss": 6.282, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 12.149141181399246, |
|
"grad_norm": 3.950773239135742, |
|
"learning_rate": 3.785609551738584e-05, |
|
"loss": 6.283, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 12.253875157100964, |
|
"grad_norm": 4.1780548095703125, |
|
"learning_rate": 3.775136154168412e-05, |
|
"loss": 6.2635, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 12.35860913280268, |
|
"grad_norm": 3.2049672603607178, |
|
"learning_rate": 3.764662756598241e-05, |
|
"loss": 6.2858, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 12.463343108504398, |
|
"grad_norm": 3.863649606704712, |
|
"learning_rate": 3.7541893590280694e-05, |
|
"loss": 6.2609, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 12.568077084206116, |
|
"grad_norm": 3.881343364715576, |
|
"learning_rate": 3.743715961457897e-05, |
|
"loss": 6.2695, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 12.672811059907835, |
|
"grad_norm": 3.522132635116577, |
|
"learning_rate": 3.7332635106828654e-05, |
|
"loss": 6.2523, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 12.777545035609553, |
|
"grad_norm": 4.043595790863037, |
|
"learning_rate": 3.722790113112694e-05, |
|
"loss": 6.2546, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 12.882279011311269, |
|
"grad_norm": 3.4860141277313232, |
|
"learning_rate": 3.712316715542522e-05, |
|
"loss": 6.2468, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 12.987012987012987, |
|
"grad_norm": 3.9201574325561523, |
|
"learning_rate": 3.7018433179723506e-05, |
|
"loss": 6.2615, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 13.091746962714705, |
|
"grad_norm": 3.5582118034362793, |
|
"learning_rate": 3.6913699204021786e-05, |
|
"loss": 6.2529, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 13.196480938416423, |
|
"grad_norm": 3.1254093647003174, |
|
"learning_rate": 3.680917469627147e-05, |
|
"loss": 6.2418, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 13.30121491411814, |
|
"grad_norm": 4.058616638183594, |
|
"learning_rate": 3.670444072056975e-05, |
|
"loss": 6.243, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 13.405948889819857, |
|
"grad_norm": 3.5146963596343994, |
|
"learning_rate": 3.659970674486803e-05, |
|
"loss": 6.2595, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 13.510682865521575, |
|
"grad_norm": 3.804818630218506, |
|
"learning_rate": 3.649497276916632e-05, |
|
"loss": 6.2438, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 13.615416841223293, |
|
"grad_norm": 3.591214179992676, |
|
"learning_rate": 3.6390238793464605e-05, |
|
"loss": 6.2266, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 13.72015081692501, |
|
"grad_norm": 4.973635196685791, |
|
"learning_rate": 3.6285504817762885e-05, |
|
"loss": 6.2501, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 13.824884792626728, |
|
"grad_norm": 4.189575672149658, |
|
"learning_rate": 3.618098031001257e-05, |
|
"loss": 6.2341, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 13.929618768328446, |
|
"grad_norm": 4.408186912536621, |
|
"learning_rate": 3.607624633431085e-05, |
|
"loss": 6.227, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 14.034352744030164, |
|
"grad_norm": 4.066199779510498, |
|
"learning_rate": 3.597151235860913e-05, |
|
"loss": 6.2316, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 14.139086719731882, |
|
"grad_norm": 3.8263683319091797, |
|
"learning_rate": 3.586677838290742e-05, |
|
"loss": 6.2322, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 14.243820695433598, |
|
"grad_norm": 4.787020206451416, |
|
"learning_rate": 3.57620444072057e-05, |
|
"loss": 6.2099, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 14.348554671135316, |
|
"grad_norm": 3.5196545124053955, |
|
"learning_rate": 3.565751989945538e-05, |
|
"loss": 6.2425, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 14.453288646837034, |
|
"grad_norm": 4.1746439933776855, |
|
"learning_rate": 3.555278592375367e-05, |
|
"loss": 6.2373, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 14.558022622538752, |
|
"grad_norm": 4.07820463180542, |
|
"learning_rate": 3.544805194805195e-05, |
|
"loss": 6.2099, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 14.662756598240469, |
|
"grad_norm": 3.400038242340088, |
|
"learning_rate": 3.534331797235023e-05, |
|
"loss": 6.216, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 14.767490573942187, |
|
"grad_norm": 4.578042030334473, |
|
"learning_rate": 3.5238793464599916e-05, |
|
"loss": 6.2091, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 14.872224549643905, |
|
"grad_norm": 3.6254208087921143, |
|
"learning_rate": 3.51340594888982e-05, |
|
"loss": 6.2258, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 14.976958525345623, |
|
"grad_norm": 3.496166467666626, |
|
"learning_rate": 3.502932551319648e-05, |
|
"loss": 6.2138, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 15.081692501047339, |
|
"grad_norm": 3.5367865562438965, |
|
"learning_rate": 3.492459153749477e-05, |
|
"loss": 6.213, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 15.186426476749057, |
|
"grad_norm": 3.4754440784454346, |
|
"learning_rate": 3.481985756179305e-05, |
|
"loss": 6.2153, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 15.291160452450775, |
|
"grad_norm": 4.432271957397461, |
|
"learning_rate": 3.4715333054042735e-05, |
|
"loss": 6.2007, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 15.395894428152493, |
|
"grad_norm": 3.8427770137786865, |
|
"learning_rate": 3.4610599078341014e-05, |
|
"loss": 6.2071, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 15.50062840385421, |
|
"grad_norm": 3.9617857933044434, |
|
"learning_rate": 3.4505865102639294e-05, |
|
"loss": 6.2142, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 15.605362379555928, |
|
"grad_norm": 3.769693613052368, |
|
"learning_rate": 3.440113112693758e-05, |
|
"loss": 6.2065, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 15.710096355257646, |
|
"grad_norm": 3.825507402420044, |
|
"learning_rate": 3.429639715123587e-05, |
|
"loss": 6.2072, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 15.814830330959364, |
|
"grad_norm": 3.982872724533081, |
|
"learning_rate": 3.4191872643485554e-05, |
|
"loss": 6.2003, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 15.91956430666108, |
|
"grad_norm": 3.9958648681640625, |
|
"learning_rate": 3.408713866778383e-05, |
|
"loss": 6.1913, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 16.0242982823628, |
|
"grad_norm": 3.947957754135132, |
|
"learning_rate": 3.398240469208211e-05, |
|
"loss": 6.2019, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 16.129032258064516, |
|
"grad_norm": 3.8135411739349365, |
|
"learning_rate": 3.387767071638039e-05, |
|
"loss": 6.1944, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 16.233766233766232, |
|
"grad_norm": 3.940861701965332, |
|
"learning_rate": 3.377293674067868e-05, |
|
"loss": 6.1893, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 16.338500209467952, |
|
"grad_norm": 5.24894905090332, |
|
"learning_rate": 3.3668412232928366e-05, |
|
"loss": 6.1984, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 16.44323418516967, |
|
"grad_norm": 4.470870494842529, |
|
"learning_rate": 3.3563678257226645e-05, |
|
"loss": 6.1958, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 16.547968160871388, |
|
"grad_norm": 3.699892282485962, |
|
"learning_rate": 3.345894428152493e-05, |
|
"loss": 6.1952, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 16.652702136573104, |
|
"grad_norm": 4.136711120605469, |
|
"learning_rate": 3.335421030582321e-05, |
|
"loss": 6.1896, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 16.75743611227482, |
|
"grad_norm": 4.904257297515869, |
|
"learning_rate": 3.324968579807289e-05, |
|
"loss": 6.1715, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 16.86217008797654, |
|
"grad_norm": 4.219280242919922, |
|
"learning_rate": 3.314495182237118e-05, |
|
"loss": 6.1829, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 16.966904063678257, |
|
"grad_norm": 4.426414489746094, |
|
"learning_rate": 3.3040217846669464e-05, |
|
"loss": 6.1782, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 17.071638039379973, |
|
"grad_norm": 4.792020797729492, |
|
"learning_rate": 3.2935483870967744e-05, |
|
"loss": 6.1675, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 17.176372015081693, |
|
"grad_norm": 3.9796903133392334, |
|
"learning_rate": 3.283095936321743e-05, |
|
"loss": 6.179, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 17.28110599078341, |
|
"grad_norm": 4.554388046264648, |
|
"learning_rate": 3.272622538751571e-05, |
|
"loss": 6.1756, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 17.38583996648513, |
|
"grad_norm": 4.024316787719727, |
|
"learning_rate": 3.262149141181399e-05, |
|
"loss": 6.177, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 17.490573942186845, |
|
"grad_norm": 4.059772968292236, |
|
"learning_rate": 3.2516757436112276e-05, |
|
"loss": 6.1818, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 17.59530791788856, |
|
"grad_norm": 4.296391487121582, |
|
"learning_rate": 3.241223292836196e-05, |
|
"loss": 6.1866, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 17.70004189359028, |
|
"grad_norm": 4.008220672607422, |
|
"learning_rate": 3.230749895266024e-05, |
|
"loss": 6.172, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 17.804775869291998, |
|
"grad_norm": 4.639082908630371, |
|
"learning_rate": 3.220276497695853e-05, |
|
"loss": 6.169, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 17.909509844993718, |
|
"grad_norm": 4.635848522186279, |
|
"learning_rate": 3.209803100125681e-05, |
|
"loss": 6.1721, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 18.014243820695434, |
|
"grad_norm": 4.662270545959473, |
|
"learning_rate": 3.199329702555509e-05, |
|
"loss": 6.1575, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 18.11897779639715, |
|
"grad_norm": 4.280701637268066, |
|
"learning_rate": 3.1888772517804775e-05, |
|
"loss": 6.1482, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 18.22371177209887, |
|
"grad_norm": 3.8602380752563477, |
|
"learning_rate": 3.178403854210306e-05, |
|
"loss": 6.1565, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 18.328445747800586, |
|
"grad_norm": 4.634263515472412, |
|
"learning_rate": 3.167930456640134e-05, |
|
"loss": 6.1587, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 18.433179723502302, |
|
"grad_norm": 4.115392208099365, |
|
"learning_rate": 3.157457059069963e-05, |
|
"loss": 6.1436, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 18.537913699204022, |
|
"grad_norm": 3.6665916442871094, |
|
"learning_rate": 3.146983661499791e-05, |
|
"loss": 6.1442, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 18.64264767490574, |
|
"grad_norm": 4.444345951080322, |
|
"learning_rate": 3.1365312107247594e-05, |
|
"loss": 6.1571, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 18.74738165060746, |
|
"grad_norm": 3.792792558670044, |
|
"learning_rate": 3.1260578131545873e-05, |
|
"loss": 6.1535, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 18.852115626309175, |
|
"grad_norm": 3.904019832611084, |
|
"learning_rate": 3.115584415584415e-05, |
|
"loss": 6.1501, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 18.95684960201089, |
|
"grad_norm": 4.531284332275391, |
|
"learning_rate": 3.105111018014244e-05, |
|
"loss": 6.1592, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 19.06158357771261, |
|
"grad_norm": 3.7976317405700684, |
|
"learning_rate": 3.0946376204440726e-05, |
|
"loss": 6.1474, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 19.166317553414327, |
|
"grad_norm": 3.8021469116210938, |
|
"learning_rate": 3.084185169669041e-05, |
|
"loss": 6.1408, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 19.271051529116047, |
|
"grad_norm": 4.194758892059326, |
|
"learning_rate": 3.073711772098869e-05, |
|
"loss": 6.1476, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 19.375785504817763, |
|
"grad_norm": 4.084668159484863, |
|
"learning_rate": 3.063238374528697e-05, |
|
"loss": 6.1443, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 19.48051948051948, |
|
"grad_norm": 4.383222579956055, |
|
"learning_rate": 3.052764976958525e-05, |
|
"loss": 6.1422, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 19.5852534562212, |
|
"grad_norm": 4.250995635986328, |
|
"learning_rate": 3.042312526183494e-05, |
|
"loss": 6.1375, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 19.689987431922916, |
|
"grad_norm": 4.78529691696167, |
|
"learning_rate": 3.0318391286133225e-05, |
|
"loss": 6.1368, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 19.794721407624632, |
|
"grad_norm": 3.4997754096984863, |
|
"learning_rate": 3.0213657310431504e-05, |
|
"loss": 6.1432, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 19.89945538332635, |
|
"grad_norm": 4.723648548126221, |
|
"learning_rate": 3.0108923334729787e-05, |
|
"loss": 6.1263, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 20.004189359028068, |
|
"grad_norm": 3.930859088897705, |
|
"learning_rate": 3.0004398826979474e-05, |
|
"loss": 6.1387, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 20.108923334729788, |
|
"grad_norm": 4.286599159240723, |
|
"learning_rate": 2.9899664851277754e-05, |
|
"loss": 6.1187, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 20.213657310431504, |
|
"grad_norm": 3.8475680351257324, |
|
"learning_rate": 2.9794930875576037e-05, |
|
"loss": 6.1312, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 20.31839128613322, |
|
"grad_norm": 4.844906806945801, |
|
"learning_rate": 2.9690196899874323e-05, |
|
"loss": 6.1457, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 20.42312526183494, |
|
"grad_norm": 4.691315174102783, |
|
"learning_rate": 2.958567239212401e-05, |
|
"loss": 6.1351, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 20.527859237536656, |
|
"grad_norm": 6.15250825881958, |
|
"learning_rate": 2.9480938416422286e-05, |
|
"loss": 6.1275, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 20.632593213238373, |
|
"grad_norm": 3.8872599601745605, |
|
"learning_rate": 2.9376204440720573e-05, |
|
"loss": 6.1275, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 20.737327188940093, |
|
"grad_norm": 4.541051864624023, |
|
"learning_rate": 2.9271470465018852e-05, |
|
"loss": 6.1472, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 20.84206116464181, |
|
"grad_norm": 4.556408405303955, |
|
"learning_rate": 2.9166736489317135e-05, |
|
"loss": 6.1369, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 20.94679514034353, |
|
"grad_norm": 4.5567498207092285, |
|
"learning_rate": 2.9062211981566822e-05, |
|
"loss": 6.1148, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 21.051529116045245, |
|
"grad_norm": 4.647518634796143, |
|
"learning_rate": 2.8957478005865102e-05, |
|
"loss": 6.1281, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 21.15626309174696, |
|
"grad_norm": 4.372421741485596, |
|
"learning_rate": 2.8852744030163388e-05, |
|
"loss": 6.1226, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 21.26099706744868, |
|
"grad_norm": 4.270533084869385, |
|
"learning_rate": 2.874801005446167e-05, |
|
"loss": 6.123, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 21.365731043150397, |
|
"grad_norm": 3.5596370697021484, |
|
"learning_rate": 2.8643485546711358e-05, |
|
"loss": 6.1402, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 21.470465018852117, |
|
"grad_norm": 5.230384826660156, |
|
"learning_rate": 2.8538751571009638e-05, |
|
"loss": 6.1199, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 21.575198994553833, |
|
"grad_norm": 3.9881417751312256, |
|
"learning_rate": 2.843401759530792e-05, |
|
"loss": 6.1244, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 21.67993297025555, |
|
"grad_norm": 4.617568016052246, |
|
"learning_rate": 2.83292836196062e-05, |
|
"loss": 6.1154, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 21.78466694595727, |
|
"grad_norm": 4.641009330749512, |
|
"learning_rate": 2.8224759111855887e-05, |
|
"loss": 6.113, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 21.889400921658986, |
|
"grad_norm": 4.005772113800049, |
|
"learning_rate": 2.812002513615417e-05, |
|
"loss": 6.1163, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 21.994134897360702, |
|
"grad_norm": 4.2611799240112305, |
|
"learning_rate": 2.801529116045245e-05, |
|
"loss": 6.1023, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 22.098868873062422, |
|
"grad_norm": 4.568357467651367, |
|
"learning_rate": 2.7910557184750736e-05, |
|
"loss": 6.1169, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 22.203602848764138, |
|
"grad_norm": 4.323103427886963, |
|
"learning_rate": 2.780603267700042e-05, |
|
"loss": 6.1226, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 22.308336824465858, |
|
"grad_norm": 4.507444381713867, |
|
"learning_rate": 2.77012987012987e-05, |
|
"loss": 6.1052, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 22.413070800167574, |
|
"grad_norm": 4.301244735717773, |
|
"learning_rate": 2.7596564725596985e-05, |
|
"loss": 6.0944, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 22.51780477586929, |
|
"grad_norm": 4.984853267669678, |
|
"learning_rate": 2.749183074989527e-05, |
|
"loss": 6.1125, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 22.62253875157101, |
|
"grad_norm": 4.682931423187256, |
|
"learning_rate": 2.7387096774193548e-05, |
|
"loss": 6.1158, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 22.727272727272727, |
|
"grad_norm": 4.494015693664551, |
|
"learning_rate": 2.7282572266443235e-05, |
|
"loss": 6.1035, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 22.832006702974446, |
|
"grad_norm": 3.880779981613159, |
|
"learning_rate": 2.717783829074152e-05, |
|
"loss": 6.1084, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 22.936740678676163, |
|
"grad_norm": 4.154653072357178, |
|
"learning_rate": 2.7073104315039798e-05, |
|
"loss": 6.0945, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 23.04147465437788, |
|
"grad_norm": 5.443271160125732, |
|
"learning_rate": 2.6968370339338084e-05, |
|
"loss": 6.1016, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 23.1462086300796, |
|
"grad_norm": 4.298133373260498, |
|
"learning_rate": 2.686384583158777e-05, |
|
"loss": 6.103, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 23.250942605781315, |
|
"grad_norm": 4.379884243011475, |
|
"learning_rate": 2.675911185588605e-05, |
|
"loss": 6.0814, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 23.35567658148303, |
|
"grad_norm": 6.175398349761963, |
|
"learning_rate": 2.6654377880184333e-05, |
|
"loss": 6.1088, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 23.46041055718475, |
|
"grad_norm": 4.121715068817139, |
|
"learning_rate": 2.6549643904482613e-05, |
|
"loss": 6.0966, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 23.565144532886467, |
|
"grad_norm": 5.040287494659424, |
|
"learning_rate": 2.6444909928780896e-05, |
|
"loss": 6.098, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 23.669878508588187, |
|
"grad_norm": 4.766879081726074, |
|
"learning_rate": 2.6340175953079182e-05, |
|
"loss": 6.0957, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 23.774612484289904, |
|
"grad_norm": 5.87930965423584, |
|
"learning_rate": 2.623565144532887e-05, |
|
"loss": 6.1089, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 23.87934645999162, |
|
"grad_norm": 5.318653583526611, |
|
"learning_rate": 2.613091746962715e-05, |
|
"loss": 6.0761, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 23.98408043569334, |
|
"grad_norm": 4.465319633483887, |
|
"learning_rate": 2.6026183493925432e-05, |
|
"loss": 6.0826, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 24.088814411395056, |
|
"grad_norm": 4.640571594238281, |
|
"learning_rate": 2.592144951822371e-05, |
|
"loss": 6.0805, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 24.193548387096776, |
|
"grad_norm": 4.252554416656494, |
|
"learning_rate": 2.5816925010473398e-05, |
|
"loss": 6.0701, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 24.298282362798492, |
|
"grad_norm": 4.704644203186035, |
|
"learning_rate": 2.571219103477168e-05, |
|
"loss": 6.0936, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 24.40301633850021, |
|
"grad_norm": 4.601324558258057, |
|
"learning_rate": 2.560745705906996e-05, |
|
"loss": 6.0758, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 24.507750314201928, |
|
"grad_norm": 4.380444526672363, |
|
"learning_rate": 2.5502723083368247e-05, |
|
"loss": 6.0871, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 24.612484289903644, |
|
"grad_norm": 4.119806289672852, |
|
"learning_rate": 2.5397989107666527e-05, |
|
"loss": 6.102, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 24.71721826560536, |
|
"grad_norm": 3.9712698459625244, |
|
"learning_rate": 2.5293464599916217e-05, |
|
"loss": 6.0999, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 24.82195224130708, |
|
"grad_norm": 5.146612167358398, |
|
"learning_rate": 2.5188730624214497e-05, |
|
"loss": 6.0947, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 24.926686217008797, |
|
"grad_norm": 4.406741142272949, |
|
"learning_rate": 2.508399664851278e-05, |
|
"loss": 6.0829, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 25.031420192710517, |
|
"grad_norm": 5.4739766120910645, |
|
"learning_rate": 2.497926267281106e-05, |
|
"loss": 6.0598, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 25.136154168412233, |
|
"grad_norm": 4.6231794357299805, |
|
"learning_rate": 2.4874738165060746e-05, |
|
"loss": 6.072, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 25.24088814411395, |
|
"grad_norm": 4.47750186920166, |
|
"learning_rate": 2.477000418935903e-05, |
|
"loss": 6.0664, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 25.34562211981567, |
|
"grad_norm": 5.023014068603516, |
|
"learning_rate": 2.4665270213657312e-05, |
|
"loss": 6.0894, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 25.450356095517385, |
|
"grad_norm": 5.687644004821777, |
|
"learning_rate": 2.4560536237955595e-05, |
|
"loss": 6.0936, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 25.555090071219105, |
|
"grad_norm": 4.534958362579346, |
|
"learning_rate": 2.4455802262253878e-05, |
|
"loss": 6.0794, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 25.65982404692082, |
|
"grad_norm": 5.563751697540283, |
|
"learning_rate": 2.435127775450356e-05, |
|
"loss": 6.081, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 25.764558022622538, |
|
"grad_norm": 4.613626956939697, |
|
"learning_rate": 2.4246543778801845e-05, |
|
"loss": 6.0982, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 25.869291998324258, |
|
"grad_norm": 4.645818710327148, |
|
"learning_rate": 2.4141809803100128e-05, |
|
"loss": 6.0665, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 25.974025974025974, |
|
"grad_norm": 4.9156928062438965, |
|
"learning_rate": 2.4037075827398407e-05, |
|
"loss": 6.0674, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 26.07875994972769, |
|
"grad_norm": 4.7342305183410645, |
|
"learning_rate": 2.3932551319648094e-05, |
|
"loss": 6.0741, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 26.18349392542941, |
|
"grad_norm": 4.607081413269043, |
|
"learning_rate": 2.3827817343946377e-05, |
|
"loss": 6.0626, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 26.288227901131126, |
|
"grad_norm": 4.820442199707031, |
|
"learning_rate": 2.372308336824466e-05, |
|
"loss": 6.0936, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 26.392961876832846, |
|
"grad_norm": 4.549975395202637, |
|
"learning_rate": 2.3618349392542943e-05, |
|
"loss": 6.0804, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 26.497695852534562, |
|
"grad_norm": 4.722150802612305, |
|
"learning_rate": 2.351382488479263e-05, |
|
"loss": 6.0555, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 26.60242982823628, |
|
"grad_norm": 4.2948408126831055, |
|
"learning_rate": 2.340909090909091e-05, |
|
"loss": 6.0626, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 26.707163803938, |
|
"grad_norm": 4.246878623962402, |
|
"learning_rate": 2.3304356933389193e-05, |
|
"loss": 6.0577, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 26.811897779639715, |
|
"grad_norm": 4.165809154510498, |
|
"learning_rate": 2.3199622957687476e-05, |
|
"loss": 6.0608, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 26.916631755341434, |
|
"grad_norm": 4.3159894943237305, |
|
"learning_rate": 2.309488898198576e-05, |
|
"loss": 6.0806, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 27.02136573104315, |
|
"grad_norm": 4.15300989151001, |
|
"learning_rate": 2.2990364474235442e-05, |
|
"loss": 6.0654, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 27.126099706744867, |
|
"grad_norm": 4.730154991149902, |
|
"learning_rate": 2.2885630498533725e-05, |
|
"loss": 6.0567, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 27.230833682446587, |
|
"grad_norm": 4.300974369049072, |
|
"learning_rate": 2.2780896522832008e-05, |
|
"loss": 6.0449, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 27.335567658148303, |
|
"grad_norm": 4.148283958435059, |
|
"learning_rate": 2.2676162547130288e-05, |
|
"loss": 6.0491, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 27.44030163385002, |
|
"grad_norm": 4.9924421310424805, |
|
"learning_rate": 2.2571638039379974e-05, |
|
"loss": 6.0491, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 27.54503560955174, |
|
"grad_norm": 5.713706016540527, |
|
"learning_rate": 2.246690406367826e-05, |
|
"loss": 6.0396, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 27.649769585253456, |
|
"grad_norm": 5.007369518280029, |
|
"learning_rate": 2.236217008797654e-05, |
|
"loss": 6.0378, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 27.754503560955175, |
|
"grad_norm": 4.500640392303467, |
|
"learning_rate": 2.2257436112274823e-05, |
|
"loss": 6.0313, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 27.85923753665689, |
|
"grad_norm": 4.709275722503662, |
|
"learning_rate": 2.2152702136573107e-05, |
|
"loss": 6.0278, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 27.963971512358608, |
|
"grad_norm": 4.891386032104492, |
|
"learning_rate": 2.204817762882279e-05, |
|
"loss": 6.0267, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 28.068705488060328, |
|
"grad_norm": 4.82666540145874, |
|
"learning_rate": 2.1943443653121073e-05, |
|
"loss": 5.986, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 28.173439463762044, |
|
"grad_norm": 4.489607810974121, |
|
"learning_rate": 2.1838709677419356e-05, |
|
"loss": 6.0209, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 28.278173439463764, |
|
"grad_norm": 4.719301700592041, |
|
"learning_rate": 2.173397570171764e-05, |
|
"loss": 5.9998, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 28.38290741516548, |
|
"grad_norm": 5.639565467834473, |
|
"learning_rate": 2.1629451193967322e-05, |
|
"loss": 5.9881, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 28.487641390867196, |
|
"grad_norm": 4.745512008666992, |
|
"learning_rate": 2.1524717218265605e-05, |
|
"loss": 6.0002, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 28.592375366568916, |
|
"grad_norm": 5.661725997924805, |
|
"learning_rate": 2.141998324256389e-05, |
|
"loss": 5.9967, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 28.697109342270632, |
|
"grad_norm": 5.3391194343566895, |
|
"learning_rate": 2.131524926686217e-05, |
|
"loss": 6.0024, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 28.80184331797235, |
|
"grad_norm": 5.1614089012146, |
|
"learning_rate": 2.1210724759111858e-05, |
|
"loss": 5.9992, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 28.90657729367407, |
|
"grad_norm": 5.429248332977295, |
|
"learning_rate": 2.110599078341014e-05, |
|
"loss": 5.9929, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 29.011311269375785, |
|
"grad_norm": 5.1270012855529785, |
|
"learning_rate": 2.100125680770842e-05, |
|
"loss": 5.9883, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 29.116045245077505, |
|
"grad_norm": 5.027891159057617, |
|
"learning_rate": 2.0896522832006704e-05, |
|
"loss": 5.963, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 29.22077922077922, |
|
"grad_norm": 5.712099552154541, |
|
"learning_rate": 2.079199832425639e-05, |
|
"loss": 5.9811, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 29.325513196480937, |
|
"grad_norm": 4.954220294952393, |
|
"learning_rate": 2.0687264348554674e-05, |
|
"loss": 5.9808, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 29.430247172182657, |
|
"grad_norm": 5.713419437408447, |
|
"learning_rate": 2.0582530372852953e-05, |
|
"loss": 5.9887, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 29.534981147884373, |
|
"grad_norm": 4.683711528778076, |
|
"learning_rate": 2.0477796397151236e-05, |
|
"loss": 5.9612, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 29.63971512358609, |
|
"grad_norm": 5.164538383483887, |
|
"learning_rate": 2.0373271889400923e-05, |
|
"loss": 5.993, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 29.74444909928781, |
|
"grad_norm": 5.386078357696533, |
|
"learning_rate": 2.0268537913699203e-05, |
|
"loss": 5.9735, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 29.849183074989526, |
|
"grad_norm": 4.4406418800354, |
|
"learning_rate": 2.016380393799749e-05, |
|
"loss": 5.9672, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 29.953917050691246, |
|
"grad_norm": 5.029815673828125, |
|
"learning_rate": 2.0059069962295772e-05, |
|
"loss": 5.961, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 30.058651026392962, |
|
"grad_norm": 4.666591167449951, |
|
"learning_rate": 1.9954335986594052e-05, |
|
"loss": 5.9505, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 30.163385002094678, |
|
"grad_norm": 6.975547790527344, |
|
"learning_rate": 1.9849602010892335e-05, |
|
"loss": 5.956, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 30.268118977796398, |
|
"grad_norm": 4.687684535980225, |
|
"learning_rate": 1.974507750314202e-05, |
|
"loss": 5.9475, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 30.372852953498114, |
|
"grad_norm": 5.594231605529785, |
|
"learning_rate": 1.96403435274403e-05, |
|
"loss": 5.9496, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 30.477586929199834, |
|
"grad_norm": 4.879722595214844, |
|
"learning_rate": 1.9535609551738584e-05, |
|
"loss": 5.9577, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 30.58232090490155, |
|
"grad_norm": 5.470447540283203, |
|
"learning_rate": 1.9430875576036867e-05, |
|
"loss": 5.9672, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 30.687054880603267, |
|
"grad_norm": 5.818385124206543, |
|
"learning_rate": 1.932614160033515e-05, |
|
"loss": 5.9501, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 30.791788856304986, |
|
"grad_norm": 5.907487392425537, |
|
"learning_rate": 1.9221617092584834e-05, |
|
"loss": 5.9458, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 30.896522832006703, |
|
"grad_norm": 4.739224433898926, |
|
"learning_rate": 1.911688311688312e-05, |
|
"loss": 5.935, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 31.00125680770842, |
|
"grad_norm": 4.57131814956665, |
|
"learning_rate": 1.90121491411814e-05, |
|
"loss": 5.945, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 31.10599078341014, |
|
"grad_norm": 5.128586769104004, |
|
"learning_rate": 1.8907415165479683e-05, |
|
"loss": 5.9494, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 31.210724759111855, |
|
"grad_norm": 4.871676921844482, |
|
"learning_rate": 1.880289065772937e-05, |
|
"loss": 5.9415, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 31.315458734813575, |
|
"grad_norm": 5.380068778991699, |
|
"learning_rate": 1.8698156682027652e-05, |
|
"loss": 5.939, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 31.42019271051529, |
|
"grad_norm": 5.430812835693359, |
|
"learning_rate": 1.8593422706325932e-05, |
|
"loss": 5.9276, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 31.524926686217007, |
|
"grad_norm": 4.7710442543029785, |
|
"learning_rate": 1.8488688730624215e-05, |
|
"loss": 5.9413, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 31.629660661918727, |
|
"grad_norm": 5.183919906616211, |
|
"learning_rate": 1.8383954754922498e-05, |
|
"loss": 5.9257, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 31.734394637620444, |
|
"grad_norm": 4.851598739624023, |
|
"learning_rate": 1.827943024717218e-05, |
|
"loss": 5.9251, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 31.839128613322163, |
|
"grad_norm": 4.835882663726807, |
|
"learning_rate": 1.8174696271470464e-05, |
|
"loss": 5.92, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 31.94386258902388, |
|
"grad_norm": 5.428823947906494, |
|
"learning_rate": 1.8069962295768748e-05, |
|
"loss": 5.9146, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 32.0485965647256, |
|
"grad_norm": 6.11329984664917, |
|
"learning_rate": 1.796522832006703e-05, |
|
"loss": 5.9179, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 32.15333054042731, |
|
"grad_norm": 4.836859226226807, |
|
"learning_rate": 1.7860703812316717e-05, |
|
"loss": 5.9189, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 32.25806451612903, |
|
"grad_norm": 4.598475456237793, |
|
"learning_rate": 1.7755969836615e-05, |
|
"loss": 5.9207, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 32.36279849183075, |
|
"grad_norm": 4.638394832611084, |
|
"learning_rate": 1.765123586091328e-05, |
|
"loss": 5.915, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 32.467532467532465, |
|
"grad_norm": 5.637279987335205, |
|
"learning_rate": 1.7546501885211563e-05, |
|
"loss": 5.9228, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 32.572266443234184, |
|
"grad_norm": 4.516068458557129, |
|
"learning_rate": 1.7441767909509846e-05, |
|
"loss": 5.9322, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 32.677000418935904, |
|
"grad_norm": 4.652084827423096, |
|
"learning_rate": 1.7337243401759533e-05, |
|
"loss": 5.9395, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 32.78173439463762, |
|
"grad_norm": 5.667607307434082, |
|
"learning_rate": 1.7232509426057812e-05, |
|
"loss": 5.9172, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 32.88646837033934, |
|
"grad_norm": 4.980391025543213, |
|
"learning_rate": 1.7127775450356095e-05, |
|
"loss": 5.934, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 32.99120234604106, |
|
"grad_norm": 4.39646053314209, |
|
"learning_rate": 1.702304147465438e-05, |
|
"loss": 5.914, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 33.095936321742776, |
|
"grad_norm": 5.263533115386963, |
|
"learning_rate": 1.6918516966904062e-05, |
|
"loss": 5.9094, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 33.20067029744449, |
|
"grad_norm": 4.661126136779785, |
|
"learning_rate": 1.6813782991202348e-05, |
|
"loss": 5.9189, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 33.30540427314621, |
|
"grad_norm": 4.935306549072266, |
|
"learning_rate": 1.670904901550063e-05, |
|
"loss": 5.8944, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 33.41013824884793, |
|
"grad_norm": 5.82065486907959, |
|
"learning_rate": 1.660431503979891e-05, |
|
"loss": 5.892, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 33.51487222454964, |
|
"grad_norm": 4.6220927238464355, |
|
"learning_rate": 1.6499581064097194e-05, |
|
"loss": 5.9022, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 33.61960620025136, |
|
"grad_norm": 5.109046936035156, |
|
"learning_rate": 1.639505655634688e-05, |
|
"loss": 5.8933, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 33.72434017595308, |
|
"grad_norm": 5.230437278747559, |
|
"learning_rate": 1.6290322580645164e-05, |
|
"loss": 5.9107, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 33.829074151654794, |
|
"grad_norm": 6.466080188751221, |
|
"learning_rate": 1.6185588604943443e-05, |
|
"loss": 5.9077, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 33.933808127356514, |
|
"grad_norm": 4.655428409576416, |
|
"learning_rate": 1.6080854629241726e-05, |
|
"loss": 5.9186, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 34.038542103058234, |
|
"grad_norm": 6.136354923248291, |
|
"learning_rate": 1.597612065354001e-05, |
|
"loss": 5.8815, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 34.143276078759946, |
|
"grad_norm": 5.668376445770264, |
|
"learning_rate": 1.587138667783829e-05, |
|
"loss": 5.8872, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 34.248010054461666, |
|
"grad_norm": 5.579314708709717, |
|
"learning_rate": 1.5766862170087976e-05, |
|
"loss": 5.8966, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 34.352744030163386, |
|
"grad_norm": 5.474893569946289, |
|
"learning_rate": 1.5662128194386262e-05, |
|
"loss": 5.8865, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 34.457478005865106, |
|
"grad_norm": 4.853377342224121, |
|
"learning_rate": 1.5557394218684542e-05, |
|
"loss": 5.8718, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 34.56221198156682, |
|
"grad_norm": 4.8684563636779785, |
|
"learning_rate": 1.5452660242982825e-05, |
|
"loss": 5.8896, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 34.66694595726854, |
|
"grad_norm": 4.851233959197998, |
|
"learning_rate": 1.5347926267281108e-05, |
|
"loss": 5.8908, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 34.77167993297026, |
|
"grad_norm": 6.647628307342529, |
|
"learning_rate": 1.524319229157939e-05, |
|
"loss": 5.8972, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 34.87641390867197, |
|
"grad_norm": 5.826745986938477, |
|
"learning_rate": 1.5138667783829074e-05, |
|
"loss": 5.8852, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 34.98114788437369, |
|
"grad_norm": 5.109675407409668, |
|
"learning_rate": 1.5033933808127357e-05, |
|
"loss": 5.8971, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 35.08588186007541, |
|
"grad_norm": 5.743017196655273, |
|
"learning_rate": 1.4929199832425639e-05, |
|
"loss": 5.8892, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 35.19061583577712, |
|
"grad_norm": 4.862270355224609, |
|
"learning_rate": 1.482446585672392e-05, |
|
"loss": 5.8976, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 35.29534981147884, |
|
"grad_norm": 5.532686233520508, |
|
"learning_rate": 1.4719941348973607e-05, |
|
"loss": 5.8716, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 35.40008378718056, |
|
"grad_norm": 5.498019695281982, |
|
"learning_rate": 1.4615207373271891e-05, |
|
"loss": 5.8768, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 35.504817762882276, |
|
"grad_norm": 4.7324042320251465, |
|
"learning_rate": 1.4510473397570173e-05, |
|
"loss": 5.8739, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 35.609551738583995, |
|
"grad_norm": 4.973413944244385, |
|
"learning_rate": 1.4405739421868456e-05, |
|
"loss": 5.8801, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 35.714285714285715, |
|
"grad_norm": 4.977658271789551, |
|
"learning_rate": 1.430121491411814e-05, |
|
"loss": 5.8644, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 35.819019689987435, |
|
"grad_norm": 5.551715850830078, |
|
"learning_rate": 1.4196480938416424e-05, |
|
"loss": 5.8789, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 35.92375366568915, |
|
"grad_norm": 5.135740756988525, |
|
"learning_rate": 1.4091746962714705e-05, |
|
"loss": 5.8862, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 36.02848764139087, |
|
"grad_norm": 5.068655967712402, |
|
"learning_rate": 1.3987012987012987e-05, |
|
"loss": 5.879, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 36.13322161709259, |
|
"grad_norm": 5.393857479095459, |
|
"learning_rate": 1.388227901131127e-05, |
|
"loss": 5.8544, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 36.2379555927943, |
|
"grad_norm": 5.854538440704346, |
|
"learning_rate": 1.3777545035609551e-05, |
|
"loss": 5.8824, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 36.34268956849602, |
|
"grad_norm": 5.566401481628418, |
|
"learning_rate": 1.3673020527859238e-05, |
|
"loss": 5.8587, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 36.44742354419774, |
|
"grad_norm": 6.091250896453857, |
|
"learning_rate": 1.3568286552157519e-05, |
|
"loss": 5.8624, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 36.55215751989945, |
|
"grad_norm": 4.826417922973633, |
|
"learning_rate": 1.3463552576455804e-05, |
|
"loss": 5.879, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 36.65689149560117, |
|
"grad_norm": 5.28770637512207, |
|
"learning_rate": 1.3358818600754087e-05, |
|
"loss": 5.8632, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 36.76162547130289, |
|
"grad_norm": 5.072086811065674, |
|
"learning_rate": 1.3254084625052368e-05, |
|
"loss": 5.8698, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 36.866359447004605, |
|
"grad_norm": 6.194067001342773, |
|
"learning_rate": 1.3149560117302053e-05, |
|
"loss": 5.8701, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 36.971093422706325, |
|
"grad_norm": 5.250491142272949, |
|
"learning_rate": 1.3044826141600336e-05, |
|
"loss": 5.855, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 37.075827398408045, |
|
"grad_norm": 4.9726080894470215, |
|
"learning_rate": 1.2940092165898618e-05, |
|
"loss": 5.8613, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 37.180561374109764, |
|
"grad_norm": 5.526548385620117, |
|
"learning_rate": 1.28353581901969e-05, |
|
"loss": 5.8519, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 37.28529534981148, |
|
"grad_norm": 5.5989861488342285, |
|
"learning_rate": 1.2730624214495182e-05, |
|
"loss": 5.8642, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 37.3900293255132, |
|
"grad_norm": 5.138686180114746, |
|
"learning_rate": 1.2625890238793465e-05, |
|
"loss": 5.852, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 37.49476330121492, |
|
"grad_norm": 5.0514326095581055, |
|
"learning_rate": 1.252115626309175e-05, |
|
"loss": 5.8484, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 37.59949727691663, |
|
"grad_norm": 4.9300360679626465, |
|
"learning_rate": 1.241642228739003e-05, |
|
"loss": 5.849, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 37.70423125261835, |
|
"grad_norm": 5.487224102020264, |
|
"learning_rate": 1.2311897779639716e-05, |
|
"loss": 5.8562, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 37.80896522832007, |
|
"grad_norm": 5.826539516448975, |
|
"learning_rate": 1.2207163803937999e-05, |
|
"loss": 5.8665, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 37.91369920402178, |
|
"grad_norm": 5.733819961547852, |
|
"learning_rate": 1.2102639296187684e-05, |
|
"loss": 5.8569, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 38.0184331797235, |
|
"grad_norm": 4.917960166931152, |
|
"learning_rate": 1.1997905320485967e-05, |
|
"loss": 5.8395, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 38.12316715542522, |
|
"grad_norm": 5.337119102478027, |
|
"learning_rate": 1.1893171344784248e-05, |
|
"loss": 5.854, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 38.227901131126934, |
|
"grad_norm": 5.299139022827148, |
|
"learning_rate": 1.178843736908253e-05, |
|
"loss": 5.8433, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 38.332635106828654, |
|
"grad_norm": 5.900153160095215, |
|
"learning_rate": 1.1683703393380813e-05, |
|
"loss": 5.8535, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 38.437369082530374, |
|
"grad_norm": 6.776584625244141, |
|
"learning_rate": 1.15791788856305e-05, |
|
"loss": 5.8454, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 38.542103058232094, |
|
"grad_norm": 6.258368015289307, |
|
"learning_rate": 1.1474444909928783e-05, |
|
"loss": 5.8354, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 38.64683703393381, |
|
"grad_norm": 5.288670539855957, |
|
"learning_rate": 1.1369710934227064e-05, |
|
"loss": 5.8458, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 38.751571009635526, |
|
"grad_norm": 5.596650123596191, |
|
"learning_rate": 1.1264976958525345e-05, |
|
"loss": 5.8387, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 38.856304985337246, |
|
"grad_norm": 5.121638774871826, |
|
"learning_rate": 1.1160452450775032e-05, |
|
"loss": 5.8268, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 38.96103896103896, |
|
"grad_norm": 4.5758442878723145, |
|
"learning_rate": 1.1055718475073313e-05, |
|
"loss": 5.83, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 39.06577293674068, |
|
"grad_norm": 5.161282539367676, |
|
"learning_rate": 1.0950984499371596e-05, |
|
"loss": 5.8544, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 39.1705069124424, |
|
"grad_norm": 4.628884315490723, |
|
"learning_rate": 1.084625052366988e-05, |
|
"loss": 5.8474, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 39.27524088814411, |
|
"grad_norm": 5.854598045349121, |
|
"learning_rate": 1.074151654796816e-05, |
|
"loss": 5.8501, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 39.37997486384583, |
|
"grad_norm": 5.315525054931641, |
|
"learning_rate": 1.0636782572266444e-05, |
|
"loss": 5.8265, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 39.48470883954755, |
|
"grad_norm": 6.078185081481934, |
|
"learning_rate": 1.0532048596564727e-05, |
|
"loss": 5.8419, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 39.589442815249264, |
|
"grad_norm": 5.223086357116699, |
|
"learning_rate": 1.0427524088814412e-05, |
|
"loss": 5.8197, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 39.69417679095098, |
|
"grad_norm": 5.235757827758789, |
|
"learning_rate": 1.0322790113112695e-05, |
|
"loss": 5.8245, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 39.7989107666527, |
|
"grad_norm": 5.124643325805664, |
|
"learning_rate": 1.0218056137410976e-05, |
|
"loss": 5.839, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 39.90364474235442, |
|
"grad_norm": 5.613321304321289, |
|
"learning_rate": 1.011332216170926e-05, |
|
"loss": 5.844, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 40.008378718056136, |
|
"grad_norm": 5.873430252075195, |
|
"learning_rate": 1.0008588186007542e-05, |
|
"loss": 5.837, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 40.113112693757856, |
|
"grad_norm": 5.089309215545654, |
|
"learning_rate": 9.903854210305824e-06, |
|
"loss": 5.8384, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 40.217846669459576, |
|
"grad_norm": 7.3569817543029785, |
|
"learning_rate": 9.79932970255551e-06, |
|
"loss": 5.8232, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 40.32258064516129, |
|
"grad_norm": 6.024489402770996, |
|
"learning_rate": 9.694595726853792e-06, |
|
"loss": 5.8292, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 40.42731462086301, |
|
"grad_norm": 5.7150983810424805, |
|
"learning_rate": 9.589861751152073e-06, |
|
"loss": 5.8614, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 40.53204859656473, |
|
"grad_norm": 4.717107772827148, |
|
"learning_rate": 9.485127775450356e-06, |
|
"loss": 5.8092, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 40.63678257226644, |
|
"grad_norm": 4.9722490310668945, |
|
"learning_rate": 9.380603267700043e-06, |
|
"loss": 5.8231, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 40.74151654796816, |
|
"grad_norm": 5.593094825744629, |
|
"learning_rate": 9.275869291998326e-06, |
|
"loss": 5.8339, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 40.84625052366988, |
|
"grad_norm": 5.731310844421387, |
|
"learning_rate": 9.171135316296607e-06, |
|
"loss": 5.8381, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 40.95098449937159, |
|
"grad_norm": 5.072065353393555, |
|
"learning_rate": 9.066401340594889e-06, |
|
"loss": 5.8367, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 41.05571847507331, |
|
"grad_norm": 5.219040870666504, |
|
"learning_rate": 8.961667364893172e-06, |
|
"loss": 5.8234, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 41.16045245077503, |
|
"grad_norm": 5.844238758087158, |
|
"learning_rate": 8.856933389191455e-06, |
|
"loss": 5.8347, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 41.26518642647675, |
|
"grad_norm": 6.088447093963623, |
|
"learning_rate": 8.752199413489736e-06, |
|
"loss": 5.8178, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 41.369920402178465, |
|
"grad_norm": 5.14108943939209, |
|
"learning_rate": 8.647465437788019e-06, |
|
"loss": 5.8248, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 41.474654377880185, |
|
"grad_norm": 5.424249172210693, |
|
"learning_rate": 8.542940930037704e-06, |
|
"loss": 5.8113, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 41.579388353581905, |
|
"grad_norm": 4.888121604919434, |
|
"learning_rate": 8.43841642228739e-06, |
|
"loss": 5.8111, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 41.68412232928362, |
|
"grad_norm": 4.9909515380859375, |
|
"learning_rate": 8.333682446585672e-06, |
|
"loss": 5.8276, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 41.78885630498534, |
|
"grad_norm": 5.032175540924072, |
|
"learning_rate": 8.228948470883955e-06, |
|
"loss": 5.8332, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 41.89359028068706, |
|
"grad_norm": 5.116880416870117, |
|
"learning_rate": 8.124214495182238e-06, |
|
"loss": 5.8233, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 41.99832425638877, |
|
"grad_norm": 5.235647678375244, |
|
"learning_rate": 8.019689987431923e-06, |
|
"loss": 5.8297, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 42.10305823209049, |
|
"grad_norm": 5.445380210876465, |
|
"learning_rate": 7.914956011730206e-06, |
|
"loss": 5.8159, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 42.20779220779221, |
|
"grad_norm": 4.979036331176758, |
|
"learning_rate": 7.810222036028488e-06, |
|
"loss": 5.809, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 42.31252618349392, |
|
"grad_norm": 5.359362602233887, |
|
"learning_rate": 7.70548806032677e-06, |
|
"loss": 5.8346, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 42.41726015919564, |
|
"grad_norm": 5.264519214630127, |
|
"learning_rate": 7.600754084625053e-06, |
|
"loss": 5.8089, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 42.52199413489736, |
|
"grad_norm": 5.985982894897461, |
|
"learning_rate": 7.496020108923335e-06, |
|
"loss": 5.8192, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 42.626728110599075, |
|
"grad_norm": 5.505626201629639, |
|
"learning_rate": 7.391286133221617e-06, |
|
"loss": 5.8095, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 42.731462086300795, |
|
"grad_norm": 5.069738388061523, |
|
"learning_rate": 7.286552157519899e-06, |
|
"loss": 5.8186, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 42.836196062002514, |
|
"grad_norm": 6.004745960235596, |
|
"learning_rate": 7.182027649769586e-06, |
|
"loss": 5.8136, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 42.940930037704234, |
|
"grad_norm": 6.299502372741699, |
|
"learning_rate": 7.077293674067868e-06, |
|
"loss": 5.8213, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 43.04566401340595, |
|
"grad_norm": 6.302718162536621, |
|
"learning_rate": 6.97255969836615e-06, |
|
"loss": 5.8075, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 43.15039798910767, |
|
"grad_norm": 5.921250343322754, |
|
"learning_rate": 6.867825722664433e-06, |
|
"loss": 5.8276, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 43.25513196480939, |
|
"grad_norm": 5.123110771179199, |
|
"learning_rate": 6.763091746962715e-06, |
|
"loss": 5.7965, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 43.3598659405111, |
|
"grad_norm": 5.187294006347656, |
|
"learning_rate": 6.658357771260998e-06, |
|
"loss": 5.8137, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 43.46459991621282, |
|
"grad_norm": 5.407510757446289, |
|
"learning_rate": 6.55362379555928e-06, |
|
"loss": 5.8305, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 43.56933389191454, |
|
"grad_norm": 5.892600059509277, |
|
"learning_rate": 6.449099287808966e-06, |
|
"loss": 5.8167, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 43.67406786761625, |
|
"grad_norm": 5.39382266998291, |
|
"learning_rate": 6.344365312107248e-06, |
|
"loss": 5.8185, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 43.77880184331797, |
|
"grad_norm": 5.608034133911133, |
|
"learning_rate": 6.23963133640553e-06, |
|
"loss": 5.8051, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 43.88353581901969, |
|
"grad_norm": 6.069722652435303, |
|
"learning_rate": 6.1348973607038125e-06, |
|
"loss": 5.8101, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 43.988269794721404, |
|
"grad_norm": 5.938599109649658, |
|
"learning_rate": 6.0301633850020955e-06, |
|
"loss": 5.807, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 44.093003770423124, |
|
"grad_norm": 5.808456897735596, |
|
"learning_rate": 5.925429409300378e-06, |
|
"loss": 5.8246, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 44.197737746124844, |
|
"grad_norm": 5.229996681213379, |
|
"learning_rate": 5.820904901550063e-06, |
|
"loss": 5.8029, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 44.302471721826564, |
|
"grad_norm": 5.295706748962402, |
|
"learning_rate": 5.716170925848346e-06, |
|
"loss": 5.8094, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 44.407205697528276, |
|
"grad_norm": 5.649194240570068, |
|
"learning_rate": 5.611436950146628e-06, |
|
"loss": 5.811, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 44.511939673229996, |
|
"grad_norm": 6.5928521156311035, |
|
"learning_rate": 5.50670297444491e-06, |
|
"loss": 5.7974, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 44.616673648931716, |
|
"grad_norm": 6.246605396270752, |
|
"learning_rate": 5.401968998743192e-06, |
|
"loss": 5.8011, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 44.72140762463343, |
|
"grad_norm": 5.312093734741211, |
|
"learning_rate": 5.2972350230414745e-06, |
|
"loss": 5.7819, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 44.82614160033515, |
|
"grad_norm": 5.348554611206055, |
|
"learning_rate": 5.19271051529116e-06, |
|
"loss": 5.8027, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 44.93087557603687, |
|
"grad_norm": 5.95352029800415, |
|
"learning_rate": 5.087976539589443e-06, |
|
"loss": 5.8046, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 45.03560955173858, |
|
"grad_norm": 5.978014945983887, |
|
"learning_rate": 4.983242563887726e-06, |
|
"loss": 5.8021, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 45.1403435274403, |
|
"grad_norm": 5.595849990844727, |
|
"learning_rate": 4.878508588186008e-06, |
|
"loss": 5.7996, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 45.24507750314202, |
|
"grad_norm": 5.570345401763916, |
|
"learning_rate": 4.77377461248429e-06, |
|
"loss": 5.7973, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 45.34981147884373, |
|
"grad_norm": 5.320748805999756, |
|
"learning_rate": 4.669040636782573e-06, |
|
"loss": 5.7886, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 45.45454545454545, |
|
"grad_norm": 4.676185607910156, |
|
"learning_rate": 4.564516129032258e-06, |
|
"loss": 5.7874, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 45.55927943024717, |
|
"grad_norm": 5.7768473625183105, |
|
"learning_rate": 4.45978215333054e-06, |
|
"loss": 5.7875, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 45.66401340594889, |
|
"grad_norm": 5.668895244598389, |
|
"learning_rate": 4.355048177628823e-06, |
|
"loss": 5.8121, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 45.768747381650606, |
|
"grad_norm": 5.033557891845703, |
|
"learning_rate": 4.2503142019271055e-06, |
|
"loss": 5.8137, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 45.873481357352325, |
|
"grad_norm": 6.15772819519043, |
|
"learning_rate": 4.145580226225388e-06, |
|
"loss": 5.8137, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 45.978215333054045, |
|
"grad_norm": 6.617910861968994, |
|
"learning_rate": 4.04084625052367e-06, |
|
"loss": 5.8079, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 46.08294930875576, |
|
"grad_norm": 5.210205554962158, |
|
"learning_rate": 3.936112274821952e-06, |
|
"loss": 5.8189, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 46.18768328445748, |
|
"grad_norm": 5.630945205688477, |
|
"learning_rate": 3.831587767071639e-06, |
|
"loss": 5.795, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 46.2924172601592, |
|
"grad_norm": 5.8690032958984375, |
|
"learning_rate": 3.7268537913699205e-06, |
|
"loss": 5.8019, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 46.39715123586091, |
|
"grad_norm": 5.787112712860107, |
|
"learning_rate": 3.6221198156682027e-06, |
|
"loss": 5.7901, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 46.50188521156263, |
|
"grad_norm": 5.568469524383545, |
|
"learning_rate": 3.5173858399664853e-06, |
|
"loss": 5.803, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 46.60661918726435, |
|
"grad_norm": 5.671326637268066, |
|
"learning_rate": 3.4126518642647675e-06, |
|
"loss": 5.7931, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 46.71135316296606, |
|
"grad_norm": 5.4085307121276855, |
|
"learning_rate": 3.30791788856305e-06, |
|
"loss": 5.7903, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 46.81608713866778, |
|
"grad_norm": 5.7440571784973145, |
|
"learning_rate": 3.203393380812736e-06, |
|
"loss": 5.7897, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 46.9208211143695, |
|
"grad_norm": 5.144542217254639, |
|
"learning_rate": 3.098659405111018e-06, |
|
"loss": 5.8145, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 47.02555509007122, |
|
"grad_norm": 5.842213153839111, |
|
"learning_rate": 2.9939254294093008e-06, |
|
"loss": 5.8046, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 47.130289065772935, |
|
"grad_norm": 6.161410331726074, |
|
"learning_rate": 2.8891914537075826e-06, |
|
"loss": 5.7965, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 47.235023041474655, |
|
"grad_norm": 6.173724174499512, |
|
"learning_rate": 2.784457478005865e-06, |
|
"loss": 5.7854, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 47.339757017176375, |
|
"grad_norm": 5.132796287536621, |
|
"learning_rate": 2.679932970255551e-06, |
|
"loss": 5.791, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 47.44449099287809, |
|
"grad_norm": 6.053417205810547, |
|
"learning_rate": 2.5751989945538332e-06, |
|
"loss": 5.7935, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 47.54922496857981, |
|
"grad_norm": 5.08466911315918, |
|
"learning_rate": 2.470465018852116e-06, |
|
"loss": 5.8006, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 47.65395894428153, |
|
"grad_norm": 6.060305595397949, |
|
"learning_rate": 2.365731043150398e-06, |
|
"loss": 5.7839, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 47.75869291998324, |
|
"grad_norm": 5.808520317077637, |
|
"learning_rate": 2.2609970674486806e-06, |
|
"loss": 5.7933, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 47.86342689568496, |
|
"grad_norm": 5.413971424102783, |
|
"learning_rate": 2.156472559698366e-06, |
|
"loss": 5.7985, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 47.96816087138668, |
|
"grad_norm": 6.4786529541015625, |
|
"learning_rate": 2.0517385839966487e-06, |
|
"loss": 5.8059, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 48.07289484708839, |
|
"grad_norm": 5.606147289276123, |
|
"learning_rate": 1.947004608294931e-06, |
|
"loss": 5.7934, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 48.17762882279011, |
|
"grad_norm": 5.827240467071533, |
|
"learning_rate": 1.8422706325932133e-06, |
|
"loss": 5.8007, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 48.28236279849183, |
|
"grad_norm": 5.678854465484619, |
|
"learning_rate": 1.7375366568914957e-06, |
|
"loss": 5.8038, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 48.38709677419355, |
|
"grad_norm": 5.42138671875, |
|
"learning_rate": 1.632802681189778e-06, |
|
"loss": 5.7976, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 48.491830749895264, |
|
"grad_norm": 6.03090238571167, |
|
"learning_rate": 1.5280687054880603e-06, |
|
"loss": 5.7874, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 48.596564725596984, |
|
"grad_norm": 4.660221099853516, |
|
"learning_rate": 1.4233347297863427e-06, |
|
"loss": 5.7742, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 48.701298701298704, |
|
"grad_norm": 6.032063961029053, |
|
"learning_rate": 1.3188102220360285e-06, |
|
"loss": 5.7866, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 48.80603267700042, |
|
"grad_norm": 6.296925067901611, |
|
"learning_rate": 1.2142857142857144e-06, |
|
"loss": 5.7922, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 48.91076665270214, |
|
"grad_norm": 5.2991437911987305, |
|
"learning_rate": 1.1095517385839968e-06, |
|
"loss": 5.7873, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 49.015500628403856, |
|
"grad_norm": 6.130777835845947, |
|
"learning_rate": 1.0048177628822792e-06, |
|
"loss": 5.8074, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 49.12023460410557, |
|
"grad_norm": 6.305094242095947, |
|
"learning_rate": 9.000837871805614e-07, |
|
"loss": 5.7847, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 49.22496857980729, |
|
"grad_norm": 6.156949996948242, |
|
"learning_rate": 7.953498114788438e-07, |
|
"loss": 5.7741, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 49.32970255550901, |
|
"grad_norm": 6.475966930389404, |
|
"learning_rate": 6.908253037285296e-07, |
|
"loss": 5.8083, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 49.43443653121072, |
|
"grad_norm": 5.687895774841309, |
|
"learning_rate": 5.86091328026812e-07, |
|
"loss": 5.7976, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 49.53917050691244, |
|
"grad_norm": 5.615401744842529, |
|
"learning_rate": 4.813573523250943e-07, |
|
"loss": 5.7907, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 49.64390448261416, |
|
"grad_norm": 6.177160263061523, |
|
"learning_rate": 3.7662337662337666e-07, |
|
"loss": 5.792, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 49.74863845831588, |
|
"grad_norm": 5.604287624359131, |
|
"learning_rate": 2.71889400921659e-07, |
|
"loss": 5.7837, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 49.853372434017594, |
|
"grad_norm": 4.826539039611816, |
|
"learning_rate": 1.673648931713448e-07, |
|
"loss": 5.7808, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 49.95810640971931, |
|
"grad_norm": 5.6649250984191895, |
|
"learning_rate": 6.263091746962715e-08, |
|
"loss": 5.7888, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 238700, |
|
"total_flos": 5161725447936000.0, |
|
"train_loss": 6.140905278960581, |
|
"train_runtime": 7883.0646, |
|
"train_samples_per_second": 484.444, |
|
"train_steps_per_second": 30.28 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 238700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5161725447936000.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|