|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 71610, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04189359028068706, |
|
"grad_norm": 1.1499052047729492, |
|
"learning_rate": 4.993017734953219e-05, |
|
"loss": 9.973, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08378718056137412, |
|
"grad_norm": 1.1211209297180176, |
|
"learning_rate": 4.986035469906438e-05, |
|
"loss": 9.315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12568077084206117, |
|
"grad_norm": 1.0281990766525269, |
|
"learning_rate": 4.979053204859657e-05, |
|
"loss": 8.8239, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16757436112274823, |
|
"grad_norm": 1.1534616947174072, |
|
"learning_rate": 4.972070939812876e-05, |
|
"loss": 8.4254, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20946795140343527, |
|
"grad_norm": 0.9906216859817505, |
|
"learning_rate": 4.9650886747660944e-05, |
|
"loss": 8.1225, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25136154168412234, |
|
"grad_norm": 0.9559820294380188, |
|
"learning_rate": 4.958106409719313e-05, |
|
"loss": 7.8557, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2932551319648094, |
|
"grad_norm": 0.7887147665023804, |
|
"learning_rate": 4.951124144672532e-05, |
|
"loss": 7.7282, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33514872224549647, |
|
"grad_norm": 0.6530473232269287, |
|
"learning_rate": 4.944141879625751e-05, |
|
"loss": 7.6586, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3770423125261835, |
|
"grad_norm": 0.6546779870986938, |
|
"learning_rate": 4.9371596145789694e-05, |
|
"loss": 7.6438, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41893590280687054, |
|
"grad_norm": 0.5660322308540344, |
|
"learning_rate": 4.9301773495321885e-05, |
|
"loss": 7.6237, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4608294930875576, |
|
"grad_norm": 0.6094719171524048, |
|
"learning_rate": 4.9231950844854076e-05, |
|
"loss": 7.5937, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5027230833682447, |
|
"grad_norm": 0.6101346015930176, |
|
"learning_rate": 4.916212819438626e-05, |
|
"loss": 7.5661, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5446166736489317, |
|
"grad_norm": 0.5943477749824524, |
|
"learning_rate": 4.909230554391845e-05, |
|
"loss": 7.6007, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5865102639296188, |
|
"grad_norm": 0.6604776382446289, |
|
"learning_rate": 4.902248289345064e-05, |
|
"loss": 7.568, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6284038542103059, |
|
"grad_norm": 0.6151777505874634, |
|
"learning_rate": 4.8952660242982826e-05, |
|
"loss": 7.5521, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6702974444909929, |
|
"grad_norm": 0.6381381750106812, |
|
"learning_rate": 4.888283759251501e-05, |
|
"loss": 7.5478, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.7121910347716799, |
|
"grad_norm": 0.6552081108093262, |
|
"learning_rate": 4.88130149420472e-05, |
|
"loss": 7.5293, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.754084625052367, |
|
"grad_norm": 0.6973659992218018, |
|
"learning_rate": 4.874319229157939e-05, |
|
"loss": 7.4983, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.795978215333054, |
|
"grad_norm": 0.8130584955215454, |
|
"learning_rate": 4.8673369641111576e-05, |
|
"loss": 7.5213, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8378718056137411, |
|
"grad_norm": 0.8530446887016296, |
|
"learning_rate": 4.860354699064377e-05, |
|
"loss": 7.5105, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8797653958944281, |
|
"grad_norm": 0.8059477210044861, |
|
"learning_rate": 4.853372434017596e-05, |
|
"loss": 7.508, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.9216589861751152, |
|
"grad_norm": 0.7378331422805786, |
|
"learning_rate": 4.846390168970814e-05, |
|
"loss": 7.482, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9635525764558023, |
|
"grad_norm": 1.1823194026947021, |
|
"learning_rate": 4.839407903924033e-05, |
|
"loss": 7.4778, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0054461667364893, |
|
"grad_norm": 0.8547298908233643, |
|
"learning_rate": 4.832425638877252e-05, |
|
"loss": 7.4529, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0473397570171763, |
|
"grad_norm": 0.8535734415054321, |
|
"learning_rate": 4.825443373830471e-05, |
|
"loss": 7.4374, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0892333472978635, |
|
"grad_norm": 0.994597852230072, |
|
"learning_rate": 4.818461108783689e-05, |
|
"loss": 7.4306, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1311269375785504, |
|
"grad_norm": 1.2056490182876587, |
|
"learning_rate": 4.8114788437369084e-05, |
|
"loss": 7.4322, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1730205278592376, |
|
"grad_norm": 1.2451157569885254, |
|
"learning_rate": 4.8044965786901275e-05, |
|
"loss": 7.4205, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2149141181399246, |
|
"grad_norm": 0.9964780211448669, |
|
"learning_rate": 4.797514313643346e-05, |
|
"loss": 7.4031, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2568077084206117, |
|
"grad_norm": 0.8989804983139038, |
|
"learning_rate": 4.790532048596565e-05, |
|
"loss": 7.4043, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.2987012987012987, |
|
"grad_norm": 1.1330469846725464, |
|
"learning_rate": 4.783549783549784e-05, |
|
"loss": 7.4031, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3405948889819856, |
|
"grad_norm": 0.9531299471855164, |
|
"learning_rate": 4.7765675185030025e-05, |
|
"loss": 7.3866, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3824884792626728, |
|
"grad_norm": 1.0342323780059814, |
|
"learning_rate": 4.7695852534562216e-05, |
|
"loss": 7.3477, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.42438206954336, |
|
"grad_norm": 1.0523111820220947, |
|
"learning_rate": 4.76260298840944e-05, |
|
"loss": 7.3726, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.466275659824047, |
|
"grad_norm": 1.298751711845398, |
|
"learning_rate": 4.755620723362659e-05, |
|
"loss": 7.3484, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.508169250104734, |
|
"grad_norm": 1.0065233707427979, |
|
"learning_rate": 4.7486384583158775e-05, |
|
"loss": 7.3567, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.550062840385421, |
|
"grad_norm": 1.2989579439163208, |
|
"learning_rate": 4.7416561932690966e-05, |
|
"loss": 7.3275, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.591956430666108, |
|
"grad_norm": 1.0343406200408936, |
|
"learning_rate": 4.734673928222316e-05, |
|
"loss": 7.3353, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.6338500209467952, |
|
"grad_norm": 0.9944115281105042, |
|
"learning_rate": 4.727691663175534e-05, |
|
"loss": 7.3304, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.6757436112274822, |
|
"grad_norm": 1.102974534034729, |
|
"learning_rate": 4.720709398128753e-05, |
|
"loss": 7.3128, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7176372015081691, |
|
"grad_norm": 1.112282156944275, |
|
"learning_rate": 4.713727133081972e-05, |
|
"loss": 7.333, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.7595307917888563, |
|
"grad_norm": 1.2143328189849854, |
|
"learning_rate": 4.7067448680351914e-05, |
|
"loss": 7.3342, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.8014243820695435, |
|
"grad_norm": 1.1656922101974487, |
|
"learning_rate": 4.69976260298841e-05, |
|
"loss": 7.2995, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.8433179723502304, |
|
"grad_norm": 1.2085694074630737, |
|
"learning_rate": 4.692780337941628e-05, |
|
"loss": 7.2939, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.8852115626309174, |
|
"grad_norm": 1.1366217136383057, |
|
"learning_rate": 4.685798072894847e-05, |
|
"loss": 7.3079, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9271051529116046, |
|
"grad_norm": 1.5368098020553589, |
|
"learning_rate": 4.678815807848066e-05, |
|
"loss": 7.2682, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.9689987431922917, |
|
"grad_norm": 1.1548763513565063, |
|
"learning_rate": 4.671833542801285e-05, |
|
"loss": 7.2708, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.0108923334729787, |
|
"grad_norm": 1.1510928869247437, |
|
"learning_rate": 4.664851277754504e-05, |
|
"loss": 7.2794, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.0527859237536656, |
|
"grad_norm": 1.103461503982544, |
|
"learning_rate": 4.6578690127077224e-05, |
|
"loss": 7.2612, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.0946795140343526, |
|
"grad_norm": 1.2767215967178345, |
|
"learning_rate": 4.6508867476609414e-05, |
|
"loss": 7.2436, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.13657310431504, |
|
"grad_norm": 1.3062710762023926, |
|
"learning_rate": 4.6439044826141605e-05, |
|
"loss": 7.2162, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.178466694595727, |
|
"grad_norm": 1.2461299896240234, |
|
"learning_rate": 4.6369222175673796e-05, |
|
"loss": 7.2387, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.220360284876414, |
|
"grad_norm": 1.5427358150482178, |
|
"learning_rate": 4.629939952520598e-05, |
|
"loss": 7.2004, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.262253875157101, |
|
"grad_norm": 1.390331506729126, |
|
"learning_rate": 4.6229576874738165e-05, |
|
"loss": 7.2403, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.3041474654377883, |
|
"grad_norm": 1.4087032079696655, |
|
"learning_rate": 4.6159754224270356e-05, |
|
"loss": 7.2157, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.346041055718475, |
|
"grad_norm": 1.2417359352111816, |
|
"learning_rate": 4.608993157380254e-05, |
|
"loss": 7.2277, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.387934645999162, |
|
"grad_norm": 1.4267281293869019, |
|
"learning_rate": 4.602010892333473e-05, |
|
"loss": 7.1999, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.429828236279849, |
|
"grad_norm": 1.3897684812545776, |
|
"learning_rate": 4.595028627286692e-05, |
|
"loss": 7.2155, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.471721826560536, |
|
"grad_norm": 1.326821208000183, |
|
"learning_rate": 4.5880463622399106e-05, |
|
"loss": 7.1705, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.5136154168412235, |
|
"grad_norm": 1.2585749626159668, |
|
"learning_rate": 4.58106409719313e-05, |
|
"loss": 7.1787, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.5555090071219104, |
|
"grad_norm": 1.4856244325637817, |
|
"learning_rate": 4.574081832146349e-05, |
|
"loss": 7.1923, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.5974025974025974, |
|
"grad_norm": 1.2883421182632446, |
|
"learning_rate": 4.567099567099568e-05, |
|
"loss": 7.1776, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.6392961876832843, |
|
"grad_norm": 1.4935518503189087, |
|
"learning_rate": 4.560117302052786e-05, |
|
"loss": 7.1711, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.6811897779639713, |
|
"grad_norm": 1.3920152187347412, |
|
"learning_rate": 4.553135037006005e-05, |
|
"loss": 7.1292, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.7230833682446587, |
|
"grad_norm": 1.2802495956420898, |
|
"learning_rate": 4.546152771959224e-05, |
|
"loss": 7.1558, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.7649769585253456, |
|
"grad_norm": 1.4111789464950562, |
|
"learning_rate": 4.5392403295629106e-05, |
|
"loss": 7.172, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.8068705488060326, |
|
"grad_norm": 1.6390964984893799, |
|
"learning_rate": 4.53225806451613e-05, |
|
"loss": 7.1263, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.84876413908672, |
|
"grad_norm": 1.4132812023162842, |
|
"learning_rate": 4.525275799469348e-05, |
|
"loss": 7.1259, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.890657729367407, |
|
"grad_norm": 1.4943978786468506, |
|
"learning_rate": 4.518293534422567e-05, |
|
"loss": 7.1252, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.932551319648094, |
|
"grad_norm": 1.3022414445877075, |
|
"learning_rate": 4.5113112693757856e-05, |
|
"loss": 7.107, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.974444909928781, |
|
"grad_norm": 1.4270446300506592, |
|
"learning_rate": 4.504329004329004e-05, |
|
"loss": 7.1278, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.016338500209468, |
|
"grad_norm": 1.3672137260437012, |
|
"learning_rate": 4.497346739282223e-05, |
|
"loss": 7.1282, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.058232090490155, |
|
"grad_norm": 1.955368995666504, |
|
"learning_rate": 4.490364474235442e-05, |
|
"loss": 7.1123, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.100125680770842, |
|
"grad_norm": 1.3990498781204224, |
|
"learning_rate": 4.483382209188661e-05, |
|
"loss": 7.1117, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.142019271051529, |
|
"grad_norm": 1.6294671297073364, |
|
"learning_rate": 4.47639994414188e-05, |
|
"loss": 7.0721, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.183912861332216, |
|
"grad_norm": 1.3939063549041748, |
|
"learning_rate": 4.469417679095099e-05, |
|
"loss": 7.0599, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.225806451612903, |
|
"grad_norm": 1.918155312538147, |
|
"learning_rate": 4.462435414048318e-05, |
|
"loss": 7.0759, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.2677000418935904, |
|
"grad_norm": 1.3072093725204468, |
|
"learning_rate": 4.455453149001536e-05, |
|
"loss": 7.0784, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.3095936321742774, |
|
"grad_norm": 1.3794573545455933, |
|
"learning_rate": 4.4484708839547554e-05, |
|
"loss": 7.0726, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.3514872224549643, |
|
"grad_norm": 1.4223533868789673, |
|
"learning_rate": 4.441488618907974e-05, |
|
"loss": 7.0513, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.3933808127356513, |
|
"grad_norm": 1.695520043373108, |
|
"learning_rate": 4.434506353861192e-05, |
|
"loss": 7.0586, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.4352744030163387, |
|
"grad_norm": 2.290275812149048, |
|
"learning_rate": 4.4275240888144113e-05, |
|
"loss": 7.0429, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.4771679932970256, |
|
"grad_norm": 1.5322943925857544, |
|
"learning_rate": 4.4205418237676304e-05, |
|
"loss": 7.0196, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.5190615835777126, |
|
"grad_norm": 1.4767639636993408, |
|
"learning_rate": 4.4135595587208495e-05, |
|
"loss": 7.0643, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.5609551738583995, |
|
"grad_norm": 1.4343881607055664, |
|
"learning_rate": 4.406577293674068e-05, |
|
"loss": 7.0153, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.602848764139087, |
|
"grad_norm": 1.7641897201538086, |
|
"learning_rate": 4.399664851277755e-05, |
|
"loss": 7.013, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.644742354419774, |
|
"grad_norm": 1.9688279628753662, |
|
"learning_rate": 4.392682586230973e-05, |
|
"loss": 6.9985, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.686635944700461, |
|
"grad_norm": 1.7434871196746826, |
|
"learning_rate": 4.385700321184192e-05, |
|
"loss": 7.0142, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.728529534981148, |
|
"grad_norm": 1.550470232963562, |
|
"learning_rate": 4.378718056137411e-05, |
|
"loss": 6.9975, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.7704231252618348, |
|
"grad_norm": 1.759869933128357, |
|
"learning_rate": 4.37173579109063e-05, |
|
"loss": 6.9821, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.812316715542522, |
|
"grad_norm": 2.052905797958374, |
|
"learning_rate": 4.364753526043849e-05, |
|
"loss": 6.9917, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.854210305823209, |
|
"grad_norm": 1.8859872817993164, |
|
"learning_rate": 4.357771260997068e-05, |
|
"loss": 6.9934, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.896103896103896, |
|
"grad_norm": 1.8349354267120361, |
|
"learning_rate": 4.3507889959502863e-05, |
|
"loss": 6.9861, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.937997486384583, |
|
"grad_norm": 2.519893169403076, |
|
"learning_rate": 4.3438067309035054e-05, |
|
"loss": 6.9611, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.97989107666527, |
|
"grad_norm": 1.506072759628296, |
|
"learning_rate": 4.336824465856724e-05, |
|
"loss": 6.9688, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.021784666945957, |
|
"grad_norm": 1.5342004299163818, |
|
"learning_rate": 4.329842200809943e-05, |
|
"loss": 6.9582, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.063678257226645, |
|
"grad_norm": 1.6476861238479614, |
|
"learning_rate": 4.3228599357631614e-05, |
|
"loss": 6.9712, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.105571847507331, |
|
"grad_norm": 2.112595319747925, |
|
"learning_rate": 4.3158776707163805e-05, |
|
"loss": 6.9568, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.147465437788019, |
|
"grad_norm": 2.390101194381714, |
|
"learning_rate": 4.3088954056695996e-05, |
|
"loss": 6.9501, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.189359028068705, |
|
"grad_norm": 1.900177240371704, |
|
"learning_rate": 4.301913140622818e-05, |
|
"loss": 6.935, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.231252618349393, |
|
"grad_norm": 1.7032443284988403, |
|
"learning_rate": 4.294930875576037e-05, |
|
"loss": 6.9343, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.27314620863008, |
|
"grad_norm": 1.8393847942352295, |
|
"learning_rate": 4.287948610529256e-05, |
|
"loss": 6.8927, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.3150397989107665, |
|
"grad_norm": 2.046727180480957, |
|
"learning_rate": 4.280966345482475e-05, |
|
"loss": 6.9156, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.356933389191454, |
|
"grad_norm": 1.832216501235962, |
|
"learning_rate": 4.273984080435694e-05, |
|
"loss": 6.89, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.39882697947214, |
|
"grad_norm": 1.8682448863983154, |
|
"learning_rate": 4.267001815388912e-05, |
|
"loss": 6.8995, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.440720569752828, |
|
"grad_norm": 2.0732340812683105, |
|
"learning_rate": 4.260089372992599e-05, |
|
"loss": 6.9094, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 4.482614160033515, |
|
"grad_norm": 1.6016206741333008, |
|
"learning_rate": 4.253107107945818e-05, |
|
"loss": 6.913, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 4.524507750314202, |
|
"grad_norm": 2.063062906265259, |
|
"learning_rate": 4.246124842899037e-05, |
|
"loss": 6.8943, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 4.566401340594889, |
|
"grad_norm": 1.9563026428222656, |
|
"learning_rate": 4.2391425778522555e-05, |
|
"loss": 6.8986, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 4.6082949308755765, |
|
"grad_norm": 1.8872498273849487, |
|
"learning_rate": 4.2321603128054746e-05, |
|
"loss": 6.8883, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.650188521156263, |
|
"grad_norm": 2.1376144886016846, |
|
"learning_rate": 4.225178047758693e-05, |
|
"loss": 6.8716, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 4.69208211143695, |
|
"grad_norm": 1.938679575920105, |
|
"learning_rate": 4.218195782711912e-05, |
|
"loss": 6.8836, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 4.733975701717637, |
|
"grad_norm": 1.9372957944869995, |
|
"learning_rate": 4.2112135176651305e-05, |
|
"loss": 6.8925, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.775869291998324, |
|
"grad_norm": 2.716827630996704, |
|
"learning_rate": 4.2042312526183496e-05, |
|
"loss": 6.8284, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.817762882279011, |
|
"grad_norm": 1.942700743675232, |
|
"learning_rate": 4.197248987571568e-05, |
|
"loss": 6.8753, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.859656472559698, |
|
"grad_norm": 2.026385545730591, |
|
"learning_rate": 4.190266722524787e-05, |
|
"loss": 6.8707, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 4.901550062840386, |
|
"grad_norm": 1.7594517469406128, |
|
"learning_rate": 4.183284457478006e-05, |
|
"loss": 6.8427, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 4.943443653121072, |
|
"grad_norm": 1.8161801099777222, |
|
"learning_rate": 4.176302192431225e-05, |
|
"loss": 6.8519, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 4.9853372434017595, |
|
"grad_norm": 2.6034481525421143, |
|
"learning_rate": 4.169319927384444e-05, |
|
"loss": 6.8448, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 5.027230833682447, |
|
"grad_norm": 1.93776535987854, |
|
"learning_rate": 4.162337662337663e-05, |
|
"loss": 6.8071, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.0691244239631335, |
|
"grad_norm": 2.0754964351654053, |
|
"learning_rate": 4.155355397290881e-05, |
|
"loss": 6.8386, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 5.111018014243821, |
|
"grad_norm": 2.0640342235565186, |
|
"learning_rate": 4.1483731322440996e-05, |
|
"loss": 6.8402, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 5.152911604524507, |
|
"grad_norm": 1.8218064308166504, |
|
"learning_rate": 4.141390867197319e-05, |
|
"loss": 6.8153, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 5.194805194805195, |
|
"grad_norm": 2.0181634426116943, |
|
"learning_rate": 4.134408602150538e-05, |
|
"loss": 6.8104, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 5.236698785085882, |
|
"grad_norm": 2.5224316120147705, |
|
"learning_rate": 4.127426337103757e-05, |
|
"loss": 6.8355, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.278592375366569, |
|
"grad_norm": 3.1008002758026123, |
|
"learning_rate": 4.120513894707444e-05, |
|
"loss": 6.8384, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 5.320485965647256, |
|
"grad_norm": 1.8872394561767578, |
|
"learning_rate": 4.113531629660662e-05, |
|
"loss": 6.8087, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 5.362379555927943, |
|
"grad_norm": 2.109281063079834, |
|
"learning_rate": 4.1065493646138805e-05, |
|
"loss": 6.8161, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 5.40427314620863, |
|
"grad_norm": 1.7881128787994385, |
|
"learning_rate": 4.0995670995670996e-05, |
|
"loss": 6.8215, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 5.446166736489317, |
|
"grad_norm": 2.5179624557495117, |
|
"learning_rate": 4.092584834520319e-05, |
|
"loss": 6.7883, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.488060326770004, |
|
"grad_norm": 2.4349751472473145, |
|
"learning_rate": 4.085602569473537e-05, |
|
"loss": 6.792, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 5.529953917050691, |
|
"grad_norm": 2.011018991470337, |
|
"learning_rate": 4.078620304426756e-05, |
|
"loss": 6.7846, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 5.571847507331379, |
|
"grad_norm": 2.519958019256592, |
|
"learning_rate": 4.071638039379975e-05, |
|
"loss": 6.7887, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 5.613741097612065, |
|
"grad_norm": 1.9241886138916016, |
|
"learning_rate": 4.064655774333194e-05, |
|
"loss": 6.7662, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 5.655634687892753, |
|
"grad_norm": 1.8995391130447388, |
|
"learning_rate": 4.057673509286413e-05, |
|
"loss": 6.7672, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.697528278173439, |
|
"grad_norm": 2.1511363983154297, |
|
"learning_rate": 4.050691244239632e-05, |
|
"loss": 6.7867, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 5.7394218684541265, |
|
"grad_norm": 1.8995012044906616, |
|
"learning_rate": 4.04370897919285e-05, |
|
"loss": 6.7563, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 5.781315458734814, |
|
"grad_norm": 1.83163321018219, |
|
"learning_rate": 4.036726714146069e-05, |
|
"loss": 6.7848, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 5.8232090490155, |
|
"grad_norm": 2.2616159915924072, |
|
"learning_rate": 4.029744449099288e-05, |
|
"loss": 6.7896, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 5.865102639296188, |
|
"grad_norm": 2.0548572540283203, |
|
"learning_rate": 4.0228320067029746e-05, |
|
"loss": 6.7633, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.906996229576874, |
|
"grad_norm": 2.4749302864074707, |
|
"learning_rate": 4.015849741656194e-05, |
|
"loss": 6.7267, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 5.948889819857562, |
|
"grad_norm": 1.906648874282837, |
|
"learning_rate": 4.008867476609413e-05, |
|
"loss": 6.7645, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 5.990783410138249, |
|
"grad_norm": 2.0839619636535645, |
|
"learning_rate": 4.001885211562631e-05, |
|
"loss": 6.8082, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 6.032677000418936, |
|
"grad_norm": 2.1202664375305176, |
|
"learning_rate": 3.9949029465158496e-05, |
|
"loss": 6.7625, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 6.074570590699623, |
|
"grad_norm": 1.988951563835144, |
|
"learning_rate": 3.987920681469069e-05, |
|
"loss": 6.7413, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.11646418098031, |
|
"grad_norm": 2.4327659606933594, |
|
"learning_rate": 3.980938416422287e-05, |
|
"loss": 6.7123, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 6.158357771260997, |
|
"grad_norm": 2.07710599899292, |
|
"learning_rate": 3.973956151375506e-05, |
|
"loss": 6.7316, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 6.200251361541684, |
|
"grad_norm": 1.9640876054763794, |
|
"learning_rate": 3.966973886328725e-05, |
|
"loss": 6.752, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 6.242144951822371, |
|
"grad_norm": 2.3012888431549072, |
|
"learning_rate": 3.959991621281944e-05, |
|
"loss": 6.7188, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 6.284038542103058, |
|
"grad_norm": 2.0262773036956787, |
|
"learning_rate": 3.953009356235163e-05, |
|
"loss": 6.7255, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.325932132383746, |
|
"grad_norm": 1.8689815998077393, |
|
"learning_rate": 3.946027091188382e-05, |
|
"loss": 6.7132, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 6.367825722664432, |
|
"grad_norm": 2.188612937927246, |
|
"learning_rate": 3.939044826141601e-05, |
|
"loss": 6.7407, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 6.4097193129451195, |
|
"grad_norm": 2.0168368816375732, |
|
"learning_rate": 3.9320625610948195e-05, |
|
"loss": 6.7132, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 6.451612903225806, |
|
"grad_norm": 2.496889352798462, |
|
"learning_rate": 3.925080296048038e-05, |
|
"loss": 6.7003, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 6.4935064935064934, |
|
"grad_norm": 2.1601486206054688, |
|
"learning_rate": 3.918098031001257e-05, |
|
"loss": 6.7056, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.535400083787181, |
|
"grad_norm": 2.300112009048462, |
|
"learning_rate": 3.9111157659544754e-05, |
|
"loss": 6.7314, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 6.577293674067867, |
|
"grad_norm": 2.321880578994751, |
|
"learning_rate": 3.9041335009076945e-05, |
|
"loss": 6.7166, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 6.619187264348555, |
|
"grad_norm": 2.029465913772583, |
|
"learning_rate": 3.8971512358609136e-05, |
|
"loss": 6.6908, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 6.661080854629242, |
|
"grad_norm": 2.258577585220337, |
|
"learning_rate": 3.890168970814133e-05, |
|
"loss": 6.7359, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 6.702974444909929, |
|
"grad_norm": 2.3579437732696533, |
|
"learning_rate": 3.883186705767351e-05, |
|
"loss": 6.7021, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.744868035190616, |
|
"grad_norm": 2.236828565597534, |
|
"learning_rate": 3.87620444072057e-05, |
|
"loss": 6.6897, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 6.786761625471303, |
|
"grad_norm": 2.6255593299865723, |
|
"learning_rate": 3.869222175673789e-05, |
|
"loss": 6.6899, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 6.82865521575199, |
|
"grad_norm": 2.297067880630493, |
|
"learning_rate": 3.862239910627008e-05, |
|
"loss": 6.7058, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 6.870548806032677, |
|
"grad_norm": 2.440605640411377, |
|
"learning_rate": 3.8553274682306945e-05, |
|
"loss": 6.6559, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 6.912442396313364, |
|
"grad_norm": 2.0427000522613525, |
|
"learning_rate": 3.848345203183913e-05, |
|
"loss": 6.6799, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.954335986594051, |
|
"grad_norm": 2.0323081016540527, |
|
"learning_rate": 3.841362938137132e-05, |
|
"loss": 6.6863, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 6.996229576874738, |
|
"grad_norm": 3.407731533050537, |
|
"learning_rate": 3.834380673090351e-05, |
|
"loss": 6.6767, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 7.038123167155425, |
|
"grad_norm": 2.112870931625366, |
|
"learning_rate": 3.8273984080435695e-05, |
|
"loss": 6.682, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 7.080016757436113, |
|
"grad_norm": 2.710810422897339, |
|
"learning_rate": 3.8204161429967886e-05, |
|
"loss": 6.7046, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 7.121910347716799, |
|
"grad_norm": 2.0754942893981934, |
|
"learning_rate": 3.813433877950007e-05, |
|
"loss": 6.6511, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.1638039379974865, |
|
"grad_norm": 3.1009552478790283, |
|
"learning_rate": 3.8064516129032254e-05, |
|
"loss": 6.666, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 7.205697528278174, |
|
"grad_norm": 2.1582441329956055, |
|
"learning_rate": 3.7994693478564445e-05, |
|
"loss": 6.6574, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 7.24759111855886, |
|
"grad_norm": 2.680147647857666, |
|
"learning_rate": 3.7924870828096636e-05, |
|
"loss": 6.6814, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 7.289484708839548, |
|
"grad_norm": 2.0264320373535156, |
|
"learning_rate": 3.785504817762883e-05, |
|
"loss": 6.668, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 7.331378299120234, |
|
"grad_norm": 2.032093048095703, |
|
"learning_rate": 3.778522552716101e-05, |
|
"loss": 6.6603, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.373271889400922, |
|
"grad_norm": 2.4837894439697266, |
|
"learning_rate": 3.77154028766932e-05, |
|
"loss": 6.6817, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 7.415165479681609, |
|
"grad_norm": 2.70166015625, |
|
"learning_rate": 3.764558022622539e-05, |
|
"loss": 6.6657, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 7.457059069962296, |
|
"grad_norm": 2.3508477210998535, |
|
"learning_rate": 3.757575757575758e-05, |
|
"loss": 6.6314, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 7.498952660242983, |
|
"grad_norm": 2.450437307357788, |
|
"learning_rate": 3.750593492528977e-05, |
|
"loss": 6.6551, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 7.5408462505236695, |
|
"grad_norm": 1.9939864873886108, |
|
"learning_rate": 3.743611227482195e-05, |
|
"loss": 6.6128, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.582739840804357, |
|
"grad_norm": 2.470285177230835, |
|
"learning_rate": 3.736628962435414e-05, |
|
"loss": 6.6126, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 7.624633431085044, |
|
"grad_norm": 2.5651469230651855, |
|
"learning_rate": 3.729646697388633e-05, |
|
"loss": 6.6694, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 7.666527021365731, |
|
"grad_norm": 2.361785650253296, |
|
"learning_rate": 3.722664432341852e-05, |
|
"loss": 6.6349, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 7.708420611646418, |
|
"grad_norm": 2.371994972229004, |
|
"learning_rate": 3.715682167295071e-05, |
|
"loss": 6.6483, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 7.750314201927106, |
|
"grad_norm": 2.862107038497925, |
|
"learning_rate": 3.708769724898758e-05, |
|
"loss": 6.634, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 7.792207792207792, |
|
"grad_norm": 2.815486192703247, |
|
"learning_rate": 3.701787459851976e-05, |
|
"loss": 6.6324, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 7.8341013824884795, |
|
"grad_norm": 1.930017352104187, |
|
"learning_rate": 3.6948051948051945e-05, |
|
"loss": 6.6275, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 7.875994972769166, |
|
"grad_norm": 3.1758625507354736, |
|
"learning_rate": 3.6878229297584136e-05, |
|
"loss": 6.6529, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 7.9178885630498534, |
|
"grad_norm": 2.1219429969787598, |
|
"learning_rate": 3.680840664711633e-05, |
|
"loss": 6.6085, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 7.95978215333054, |
|
"grad_norm": 2.1965785026550293, |
|
"learning_rate": 3.673858399664851e-05, |
|
"loss": 6.6206, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.001675743611228, |
|
"grad_norm": 2.489473581314087, |
|
"learning_rate": 3.66687613461807e-05, |
|
"loss": 6.6089, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 8.043569333891915, |
|
"grad_norm": 2.3411850929260254, |
|
"learning_rate": 3.659893869571289e-05, |
|
"loss": 6.6286, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 8.085462924172601, |
|
"grad_norm": 2.32071590423584, |
|
"learning_rate": 3.6529116045245084e-05, |
|
"loss": 6.5984, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 8.12735651445329, |
|
"grad_norm": 2.402956247329712, |
|
"learning_rate": 3.645929339477727e-05, |
|
"loss": 6.5952, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 8.169250104733976, |
|
"grad_norm": 2.6951029300689697, |
|
"learning_rate": 3.638947074430946e-05, |
|
"loss": 6.6106, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.211143695014663, |
|
"grad_norm": 2.807187080383301, |
|
"learning_rate": 3.6319648093841643e-05, |
|
"loss": 6.6109, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 8.253037285295349, |
|
"grad_norm": 2.798614025115967, |
|
"learning_rate": 3.624982544337383e-05, |
|
"loss": 6.6052, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 8.294930875576037, |
|
"grad_norm": 4.015589237213135, |
|
"learning_rate": 3.618000279290602e-05, |
|
"loss": 6.5995, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 8.336824465856724, |
|
"grad_norm": 2.6923959255218506, |
|
"learning_rate": 3.611018014243821e-05, |
|
"loss": 6.5855, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 8.37871805613741, |
|
"grad_norm": 2.112994909286499, |
|
"learning_rate": 3.6040357491970394e-05, |
|
"loss": 6.5968, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.420611646418099, |
|
"grad_norm": 2.8196451663970947, |
|
"learning_rate": 3.5970534841502585e-05, |
|
"loss": 6.5977, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 8.462505236698785, |
|
"grad_norm": 2.2421326637268066, |
|
"learning_rate": 3.5900712191034776e-05, |
|
"loss": 6.5846, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 8.504398826979472, |
|
"grad_norm": 2.634634256362915, |
|
"learning_rate": 3.583088954056697e-05, |
|
"loss": 6.5955, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 8.54629241726016, |
|
"grad_norm": 2.101125955581665, |
|
"learning_rate": 3.576106689009915e-05, |
|
"loss": 6.6013, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 8.588186007540846, |
|
"grad_norm": 2.719330072402954, |
|
"learning_rate": 3.569194246613601e-05, |
|
"loss": 6.5668, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.630079597821533, |
|
"grad_norm": 2.283790349960327, |
|
"learning_rate": 3.56221198156682e-05, |
|
"loss": 6.6107, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 8.671973188102221, |
|
"grad_norm": 2.1805171966552734, |
|
"learning_rate": 3.5552297165200393e-05, |
|
"loss": 6.5875, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 8.713866778382908, |
|
"grad_norm": 2.6632487773895264, |
|
"learning_rate": 3.5482474514732584e-05, |
|
"loss": 6.613, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 8.755760368663594, |
|
"grad_norm": 2.3296337127685547, |
|
"learning_rate": 3.541265186426477e-05, |
|
"loss": 6.5628, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 8.79765395894428, |
|
"grad_norm": 2.8429343700408936, |
|
"learning_rate": 3.534282921379696e-05, |
|
"loss": 6.5823, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.839547549224969, |
|
"grad_norm": 2.4361233711242676, |
|
"learning_rate": 3.527300656332915e-05, |
|
"loss": 6.5853, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 8.881441139505656, |
|
"grad_norm": 2.5633111000061035, |
|
"learning_rate": 3.5203183912861335e-05, |
|
"loss": 6.5979, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 8.923334729786342, |
|
"grad_norm": 2.350463628768921, |
|
"learning_rate": 3.513336126239352e-05, |
|
"loss": 6.5744, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 8.96522832006703, |
|
"grad_norm": 2.456291675567627, |
|
"learning_rate": 3.506353861192571e-05, |
|
"loss": 6.57, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 9.007121910347717, |
|
"grad_norm": 2.401036262512207, |
|
"learning_rate": 3.49937159614579e-05, |
|
"loss": 6.5614, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 9.049015500628403, |
|
"grad_norm": 2.5537233352661133, |
|
"learning_rate": 3.4923893310990085e-05, |
|
"loss": 6.5836, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 9.090909090909092, |
|
"grad_norm": 2.6386375427246094, |
|
"learning_rate": 3.4854070660522276e-05, |
|
"loss": 6.6178, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 9.132802681189778, |
|
"grad_norm": 2.508533477783203, |
|
"learning_rate": 3.478424801005447e-05, |
|
"loss": 6.5761, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 9.174696271470465, |
|
"grad_norm": 3.1510419845581055, |
|
"learning_rate": 3.471442535958665e-05, |
|
"loss": 6.558, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 9.216589861751151, |
|
"grad_norm": 2.6325526237487793, |
|
"learning_rate": 3.464460270911884e-05, |
|
"loss": 6.5661, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.25848345203184, |
|
"grad_norm": 2.9870827198028564, |
|
"learning_rate": 3.457478005865103e-05, |
|
"loss": 6.5392, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 9.300377042312526, |
|
"grad_norm": 2.4924209117889404, |
|
"learning_rate": 3.450495740818322e-05, |
|
"loss": 6.547, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 9.342270632593213, |
|
"grad_norm": 2.3227298259735107, |
|
"learning_rate": 3.44351347577154e-05, |
|
"loss": 6.5306, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 9.3841642228739, |
|
"grad_norm": 2.867182731628418, |
|
"learning_rate": 3.436531210724759e-05, |
|
"loss": 6.5628, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 9.426057813154587, |
|
"grad_norm": 2.2619149684906006, |
|
"learning_rate": 3.429548945677978e-05, |
|
"loss": 6.5192, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.467951403435274, |
|
"grad_norm": 2.232321262359619, |
|
"learning_rate": 3.422636503281665e-05, |
|
"loss": 6.56, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 9.509844993715962, |
|
"grad_norm": 2.4485862255096436, |
|
"learning_rate": 3.4156542382348835e-05, |
|
"loss": 6.557, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 9.551738583996649, |
|
"grad_norm": 2.4476943016052246, |
|
"learning_rate": 3.4086719731881026e-05, |
|
"loss": 6.5314, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 9.593632174277335, |
|
"grad_norm": 2.491731643676758, |
|
"learning_rate": 3.401689708141321e-05, |
|
"loss": 6.4952, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 9.635525764558023, |
|
"grad_norm": 2.6474783420562744, |
|
"learning_rate": 3.39470744309454e-05, |
|
"loss": 6.5499, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.67741935483871, |
|
"grad_norm": 2.5691514015197754, |
|
"learning_rate": 3.3877251780477585e-05, |
|
"loss": 6.5417, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 9.719312945119396, |
|
"grad_norm": 2.601832151412964, |
|
"learning_rate": 3.3807429130009776e-05, |
|
"loss": 6.5584, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 9.761206535400083, |
|
"grad_norm": 3.481239080429077, |
|
"learning_rate": 3.373760647954197e-05, |
|
"loss": 6.5403, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 9.803100125680771, |
|
"grad_norm": 3.0747485160827637, |
|
"learning_rate": 3.366778382907415e-05, |
|
"loss": 6.5751, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 9.844993715961458, |
|
"grad_norm": 2.2310988903045654, |
|
"learning_rate": 3.359796117860634e-05, |
|
"loss": 6.5046, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 9.886887306242144, |
|
"grad_norm": 2.4555273056030273, |
|
"learning_rate": 3.352813852813853e-05, |
|
"loss": 6.5544, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 9.928780896522833, |
|
"grad_norm": 3.1235666275024414, |
|
"learning_rate": 3.345831587767072e-05, |
|
"loss": 6.5396, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 9.970674486803519, |
|
"grad_norm": 2.2766611576080322, |
|
"learning_rate": 3.338849322720291e-05, |
|
"loss": 6.5542, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 10.012568077084206, |
|
"grad_norm": 3.0408995151519775, |
|
"learning_rate": 3.331867057673509e-05, |
|
"loss": 6.4978, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 10.054461667364894, |
|
"grad_norm": 2.8702831268310547, |
|
"learning_rate": 3.3248847926267283e-05, |
|
"loss": 6.5264, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 10.09635525764558, |
|
"grad_norm": 2.9117937088012695, |
|
"learning_rate": 3.317902527579947e-05, |
|
"loss": 6.5028, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 10.138248847926267, |
|
"grad_norm": 2.925631046295166, |
|
"learning_rate": 3.310920262533166e-05, |
|
"loss": 6.5143, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 10.180142438206955, |
|
"grad_norm": 2.6605536937713623, |
|
"learning_rate": 3.303937997486385e-05, |
|
"loss": 6.5394, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 10.222036028487642, |
|
"grad_norm": 2.31357479095459, |
|
"learning_rate": 3.2969557324396034e-05, |
|
"loss": 6.5224, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 10.263929618768328, |
|
"grad_norm": 2.6544747352600098, |
|
"learning_rate": 3.2899734673928225e-05, |
|
"loss": 6.5035, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 10.305823209049015, |
|
"grad_norm": 2.5945372581481934, |
|
"learning_rate": 3.2830610249965085e-05, |
|
"loss": 6.4977, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 10.347716799329703, |
|
"grad_norm": 3.120873212814331, |
|
"learning_rate": 3.2760787599497276e-05, |
|
"loss": 6.5399, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 10.38961038961039, |
|
"grad_norm": 2.705008029937744, |
|
"learning_rate": 3.269096494902947e-05, |
|
"loss": 6.4938, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 10.431503979891076, |
|
"grad_norm": 2.2395503520965576, |
|
"learning_rate": 3.262114229856166e-05, |
|
"loss": 6.4854, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 10.473397570171764, |
|
"grad_norm": 2.5891764163970947, |
|
"learning_rate": 3.255131964809384e-05, |
|
"loss": 6.5107, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.51529116045245, |
|
"grad_norm": 3.115931749343872, |
|
"learning_rate": 3.248149699762603e-05, |
|
"loss": 6.5389, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 10.557184750733137, |
|
"grad_norm": 2.264437675476074, |
|
"learning_rate": 3.2411674347158224e-05, |
|
"loss": 6.51, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 10.599078341013826, |
|
"grad_norm": 3.449631690979004, |
|
"learning_rate": 3.234185169669041e-05, |
|
"loss": 6.5161, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 10.640971931294512, |
|
"grad_norm": 2.478337526321411, |
|
"learning_rate": 3.227202904622259e-05, |
|
"loss": 6.5019, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 10.682865521575199, |
|
"grad_norm": 3.2756478786468506, |
|
"learning_rate": 3.2202206395754784e-05, |
|
"loss": 6.4869, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.724759111855885, |
|
"grad_norm": 2.7576985359191895, |
|
"learning_rate": 3.213238374528697e-05, |
|
"loss": 6.5206, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 10.766652702136573, |
|
"grad_norm": 2.200963020324707, |
|
"learning_rate": 3.206256109481916e-05, |
|
"loss": 6.48, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 10.80854629241726, |
|
"grad_norm": 2.7358744144439697, |
|
"learning_rate": 3.199273844435135e-05, |
|
"loss": 6.5126, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 10.850439882697946, |
|
"grad_norm": 2.7179319858551025, |
|
"learning_rate": 3.192291579388354e-05, |
|
"loss": 6.4699, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 10.892333472978635, |
|
"grad_norm": 2.811340808868408, |
|
"learning_rate": 3.1853093143415725e-05, |
|
"loss": 6.5056, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 10.934227063259321, |
|
"grad_norm": 3.010690450668335, |
|
"learning_rate": 3.1783270492947916e-05, |
|
"loss": 6.5103, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 10.976120653540008, |
|
"grad_norm": 3.213487148284912, |
|
"learning_rate": 3.171344784248011e-05, |
|
"loss": 6.4874, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 11.018014243820696, |
|
"grad_norm": 2.5710039138793945, |
|
"learning_rate": 3.164362519201229e-05, |
|
"loss": 6.4919, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 11.059907834101383, |
|
"grad_norm": 2.6933746337890625, |
|
"learning_rate": 3.1573802541544475e-05, |
|
"loss": 6.5284, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 11.101801424382069, |
|
"grad_norm": 3.775012254714966, |
|
"learning_rate": 3.1503979891076666e-05, |
|
"loss": 6.4894, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 11.143695014662757, |
|
"grad_norm": 3.2401301860809326, |
|
"learning_rate": 3.1434855467113534e-05, |
|
"loss": 6.4721, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 11.185588604943444, |
|
"grad_norm": 2.642794132232666, |
|
"learning_rate": 3.1365032816645725e-05, |
|
"loss": 6.4797, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 11.22748219522413, |
|
"grad_norm": 3.191567897796631, |
|
"learning_rate": 3.129521016617791e-05, |
|
"loss": 6.5022, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 11.269375785504817, |
|
"grad_norm": 2.816554307937622, |
|
"learning_rate": 3.12253875157101e-05, |
|
"loss": 6.4853, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 11.311269375785505, |
|
"grad_norm": 2.8666136264801025, |
|
"learning_rate": 3.1155564865242284e-05, |
|
"loss": 6.4839, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 11.353162966066192, |
|
"grad_norm": 2.9831254482269287, |
|
"learning_rate": 3.1085742214774475e-05, |
|
"loss": 6.5082, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 11.395056556346878, |
|
"grad_norm": 2.7065083980560303, |
|
"learning_rate": 3.101591956430666e-05, |
|
"loss": 6.4412, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 11.436950146627566, |
|
"grad_norm": 2.5580694675445557, |
|
"learning_rate": 3.094609691383885e-05, |
|
"loss": 6.4849, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 11.478843736908253, |
|
"grad_norm": 2.571390390396118, |
|
"learning_rate": 3.087627426337104e-05, |
|
"loss": 6.4689, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 11.52073732718894, |
|
"grad_norm": 2.835906982421875, |
|
"learning_rate": 3.0806451612903225e-05, |
|
"loss": 6.4887, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.562630917469628, |
|
"grad_norm": 3.1355161666870117, |
|
"learning_rate": 3.0736628962435416e-05, |
|
"loss": 6.4568, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 11.604524507750314, |
|
"grad_norm": 3.0155599117279053, |
|
"learning_rate": 3.066680631196761e-05, |
|
"loss": 6.4607, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 11.646418098031, |
|
"grad_norm": 2.6346957683563232, |
|
"learning_rate": 3.059698366149979e-05, |
|
"loss": 6.4706, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 11.688311688311689, |
|
"grad_norm": 2.4353625774383545, |
|
"learning_rate": 3.052716101103198e-05, |
|
"loss": 6.482, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 11.730205278592376, |
|
"grad_norm": 3.29835844039917, |
|
"learning_rate": 3.045733836056417e-05, |
|
"loss": 6.4625, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 11.772098868873062, |
|
"grad_norm": 2.233579158782959, |
|
"learning_rate": 3.038751571009636e-05, |
|
"loss": 6.4727, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 11.813992459153749, |
|
"grad_norm": 2.5708439350128174, |
|
"learning_rate": 3.0317693059628545e-05, |
|
"loss": 6.4751, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 11.855886049434437, |
|
"grad_norm": 2.29488205909729, |
|
"learning_rate": 3.0247870409160732e-05, |
|
"loss": 6.4599, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 11.897779639715123, |
|
"grad_norm": 2.858208179473877, |
|
"learning_rate": 3.0178047758692923e-05, |
|
"loss": 6.469, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 11.93967322999581, |
|
"grad_norm": 2.854923725128174, |
|
"learning_rate": 3.0108225108225107e-05, |
|
"loss": 6.4995, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 11.981566820276498, |
|
"grad_norm": 2.590484857559204, |
|
"learning_rate": 3.003910068426198e-05, |
|
"loss": 6.4508, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 12.023460410557185, |
|
"grad_norm": 3.3479676246643066, |
|
"learning_rate": 2.9969278033794163e-05, |
|
"loss": 6.4581, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 12.065354000837871, |
|
"grad_norm": 2.7855923175811768, |
|
"learning_rate": 2.9899455383326354e-05, |
|
"loss": 6.4744, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 12.10724759111856, |
|
"grad_norm": 3.2668962478637695, |
|
"learning_rate": 2.982963273285854e-05, |
|
"loss": 6.4731, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 12.149141181399246, |
|
"grad_norm": 2.850735664367676, |
|
"learning_rate": 2.9759810082390725e-05, |
|
"loss": 6.4788, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 12.191034771679933, |
|
"grad_norm": 2.9676952362060547, |
|
"learning_rate": 2.9689987431922916e-05, |
|
"loss": 6.4525, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 12.23292836196062, |
|
"grad_norm": 2.604408025741577, |
|
"learning_rate": 2.9620164781455107e-05, |
|
"loss": 6.4564, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 12.274821952241307, |
|
"grad_norm": 2.974653482437134, |
|
"learning_rate": 2.9550342130987295e-05, |
|
"loss": 6.463, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 12.316715542521994, |
|
"grad_norm": 3.372664213180542, |
|
"learning_rate": 2.9480519480519482e-05, |
|
"loss": 6.464, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 12.35860913280268, |
|
"grad_norm": 2.6891355514526367, |
|
"learning_rate": 2.941069683005167e-05, |
|
"loss": 6.4674, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 12.400502723083369, |
|
"grad_norm": 2.964113473892212, |
|
"learning_rate": 2.934087417958386e-05, |
|
"loss": 6.4539, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 12.442396313364055, |
|
"grad_norm": 2.7328097820281982, |
|
"learning_rate": 2.9271051529116045e-05, |
|
"loss": 6.4224, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 12.484289903644742, |
|
"grad_norm": 2.6205203533172607, |
|
"learning_rate": 2.9201228878648236e-05, |
|
"loss": 6.4266, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 12.52618349392543, |
|
"grad_norm": 3.681053400039673, |
|
"learning_rate": 2.9131406228180424e-05, |
|
"loss": 6.4549, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 12.568077084206116, |
|
"grad_norm": 2.9732627868652344, |
|
"learning_rate": 2.9061583577712608e-05, |
|
"loss": 6.4466, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.609970674486803, |
|
"grad_norm": 3.47816801071167, |
|
"learning_rate": 2.89917609272448e-05, |
|
"loss": 6.4408, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 12.651864264767491, |
|
"grad_norm": 2.70326566696167, |
|
"learning_rate": 2.892193827677699e-05, |
|
"loss": 6.4444, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 12.693757855048178, |
|
"grad_norm": 2.9219532012939453, |
|
"learning_rate": 2.8852115626309177e-05, |
|
"loss": 6.4183, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 12.735651445328864, |
|
"grad_norm": 2.8546571731567383, |
|
"learning_rate": 2.878229297584136e-05, |
|
"loss": 6.4399, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 12.777545035609553, |
|
"grad_norm": 2.95047926902771, |
|
"learning_rate": 2.8712470325373552e-05, |
|
"loss": 6.4396, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 12.819438625890239, |
|
"grad_norm": 3.397934675216675, |
|
"learning_rate": 2.8643345901410416e-05, |
|
"loss": 6.438, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 12.861332216170926, |
|
"grad_norm": 2.625852346420288, |
|
"learning_rate": 2.8573523250942607e-05, |
|
"loss": 6.4363, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 12.903225806451612, |
|
"grad_norm": 2.5299527645111084, |
|
"learning_rate": 2.85037006004748e-05, |
|
"loss": 6.3952, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 12.9451193967323, |
|
"grad_norm": 2.6445415019989014, |
|
"learning_rate": 2.8433877950006983e-05, |
|
"loss": 6.4559, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 12.987012987012987, |
|
"grad_norm": 2.9675769805908203, |
|
"learning_rate": 2.836405529953917e-05, |
|
"loss": 6.4447, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 13.028906577293673, |
|
"grad_norm": 2.607391119003296, |
|
"learning_rate": 2.829423264907136e-05, |
|
"loss": 6.446, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 13.070800167574362, |
|
"grad_norm": 3.196765661239624, |
|
"learning_rate": 2.8224409998603545e-05, |
|
"loss": 6.4336, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 13.112693757855048, |
|
"grad_norm": 5.778535842895508, |
|
"learning_rate": 2.8154587348135736e-05, |
|
"loss": 6.4339, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 13.154587348135735, |
|
"grad_norm": 3.0479419231414795, |
|
"learning_rate": 2.8084764697667927e-05, |
|
"loss": 6.4147, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 13.196480938416423, |
|
"grad_norm": 2.6787302494049072, |
|
"learning_rate": 2.8014942047200115e-05, |
|
"loss": 6.4312, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 13.23837452869711, |
|
"grad_norm": 2.7929670810699463, |
|
"learning_rate": 2.79451193967323e-05, |
|
"loss": 6.4224, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 13.280268118977796, |
|
"grad_norm": 2.722101926803589, |
|
"learning_rate": 2.787529674626449e-05, |
|
"loss": 6.4247, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 13.322161709258484, |
|
"grad_norm": 3.295348644256592, |
|
"learning_rate": 2.780547409579668e-05, |
|
"loss": 6.4435, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 13.36405529953917, |
|
"grad_norm": 2.5780696868896484, |
|
"learning_rate": 2.7735651445328865e-05, |
|
"loss": 6.406, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 13.405948889819857, |
|
"grad_norm": 2.955299139022827, |
|
"learning_rate": 2.7665828794861053e-05, |
|
"loss": 6.4633, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 13.447842480100544, |
|
"grad_norm": 3.8027708530426025, |
|
"learning_rate": 2.7596006144393244e-05, |
|
"loss": 6.4445, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 13.489736070381232, |
|
"grad_norm": 2.6895995140075684, |
|
"learning_rate": 2.7526183493925428e-05, |
|
"loss": 6.4015, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 13.531629660661919, |
|
"grad_norm": 2.6936516761779785, |
|
"learning_rate": 2.745636084345762e-05, |
|
"loss": 6.4211, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 13.573523250942605, |
|
"grad_norm": 2.948420763015747, |
|
"learning_rate": 2.738653819298981e-05, |
|
"loss": 6.4042, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 13.615416841223293, |
|
"grad_norm": 2.763885974884033, |
|
"learning_rate": 2.7316715542521997e-05, |
|
"loss": 6.393, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.65731043150398, |
|
"grad_norm": 3.1601672172546387, |
|
"learning_rate": 2.724759111855886e-05, |
|
"loss": 6.4398, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 13.699204021784666, |
|
"grad_norm": 2.4161715507507324, |
|
"learning_rate": 2.7177768468091052e-05, |
|
"loss": 6.401, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 13.741097612065355, |
|
"grad_norm": 3.0796055793762207, |
|
"learning_rate": 2.7107945817623236e-05, |
|
"loss": 6.4265, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 13.782991202346041, |
|
"grad_norm": 3.6223697662353516, |
|
"learning_rate": 2.7038123167155427e-05, |
|
"loss": 6.4075, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 13.824884792626728, |
|
"grad_norm": 2.6991615295410156, |
|
"learning_rate": 2.696830051668762e-05, |
|
"loss": 6.3912, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 13.866778382907416, |
|
"grad_norm": 3.1701860427856445, |
|
"learning_rate": 2.6898477866219803e-05, |
|
"loss": 6.4173, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 13.908671973188103, |
|
"grad_norm": 2.915432929992676, |
|
"learning_rate": 2.682865521575199e-05, |
|
"loss": 6.4179, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 13.950565563468789, |
|
"grad_norm": 3.155080795288086, |
|
"learning_rate": 2.675883256528418e-05, |
|
"loss": 6.3895, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 13.992459153749476, |
|
"grad_norm": 3.3861114978790283, |
|
"learning_rate": 2.6689009914816365e-05, |
|
"loss": 6.4279, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 14.034352744030164, |
|
"grad_norm": 3.301805019378662, |
|
"learning_rate": 2.6619187264348556e-05, |
|
"loss": 6.4072, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 14.07624633431085, |
|
"grad_norm": 3.305147171020508, |
|
"learning_rate": 2.6549364613880744e-05, |
|
"loss": 6.3949, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 14.118139924591537, |
|
"grad_norm": 2.7602477073669434, |
|
"learning_rate": 2.6479541963412935e-05, |
|
"loss": 6.4048, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 14.160033514872225, |
|
"grad_norm": 2.5257952213287354, |
|
"learning_rate": 2.640971931294512e-05, |
|
"loss": 6.4033, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 14.201927105152912, |
|
"grad_norm": 2.4649853706359863, |
|
"learning_rate": 2.633989666247731e-05, |
|
"loss": 6.374, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 14.243820695433598, |
|
"grad_norm": 2.7136335372924805, |
|
"learning_rate": 2.6270074012009497e-05, |
|
"loss": 6.3993, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 2.801712989807129, |
|
"learning_rate": 2.6200251361541685e-05, |
|
"loss": 6.4059, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 14.327607875994973, |
|
"grad_norm": 2.7054030895233154, |
|
"learning_rate": 2.6130428711073873e-05, |
|
"loss": 6.431, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 14.36950146627566, |
|
"grad_norm": 2.653932809829712, |
|
"learning_rate": 2.6060606060606063e-05, |
|
"loss": 6.4035, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 14.411395056556348, |
|
"grad_norm": 2.5450570583343506, |
|
"learning_rate": 2.5990783410138248e-05, |
|
"loss": 6.417, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 14.453288646837034, |
|
"grad_norm": 2.9578003883361816, |
|
"learning_rate": 2.592096075967044e-05, |
|
"loss": 6.4087, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 14.49518223711772, |
|
"grad_norm": 2.9408493041992188, |
|
"learning_rate": 2.5851836335707303e-05, |
|
"loss": 6.3936, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 14.537075827398407, |
|
"grad_norm": 2.756441116333008, |
|
"learning_rate": 2.5782013685239494e-05, |
|
"loss": 6.404, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 14.578969417679096, |
|
"grad_norm": 3.685004711151123, |
|
"learning_rate": 2.571219103477168e-05, |
|
"loss": 6.3932, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 14.620863007959782, |
|
"grad_norm": 2.670825719833374, |
|
"learning_rate": 2.5642368384303872e-05, |
|
"loss": 6.3839, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 14.662756598240469, |
|
"grad_norm": 3.0986082553863525, |
|
"learning_rate": 2.5572545733836056e-05, |
|
"loss": 6.3782, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 14.704650188521157, |
|
"grad_norm": 3.003432273864746, |
|
"learning_rate": 2.5502723083368247e-05, |
|
"loss": 6.3775, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 14.746543778801843, |
|
"grad_norm": 2.752516269683838, |
|
"learning_rate": 2.5432900432900435e-05, |
|
"loss": 6.3731, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 14.78843736908253, |
|
"grad_norm": 2.7697649002075195, |
|
"learning_rate": 2.536307778243262e-05, |
|
"loss": 6.3701, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 14.830330959363218, |
|
"grad_norm": 3.0245521068573, |
|
"learning_rate": 2.529325513196481e-05, |
|
"loss": 6.3916, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 14.872224549643905, |
|
"grad_norm": 3.1849350929260254, |
|
"learning_rate": 2.5223432481497e-05, |
|
"loss": 6.3993, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 14.914118139924591, |
|
"grad_norm": 3.6655123233795166, |
|
"learning_rate": 2.5153609831029185e-05, |
|
"loss": 6.3791, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 14.95601173020528, |
|
"grad_norm": 3.2252790927886963, |
|
"learning_rate": 2.5083787180561376e-05, |
|
"loss": 6.3865, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 14.997905320485966, |
|
"grad_norm": 2.8366169929504395, |
|
"learning_rate": 2.5013964530093564e-05, |
|
"loss": 6.3897, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 15.039798910766653, |
|
"grad_norm": 2.757725715637207, |
|
"learning_rate": 2.494414187962575e-05, |
|
"loss": 6.376, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 15.081692501047339, |
|
"grad_norm": 3.1640422344207764, |
|
"learning_rate": 2.4874319229157942e-05, |
|
"loss": 6.3796, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 15.123586091328027, |
|
"grad_norm": 2.849719285964966, |
|
"learning_rate": 2.480449657869013e-05, |
|
"loss": 6.3765, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 15.165479681608714, |
|
"grad_norm": 2.7223923206329346, |
|
"learning_rate": 2.4734673928222314e-05, |
|
"loss": 6.3953, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 15.2073732718894, |
|
"grad_norm": 3.173750162124634, |
|
"learning_rate": 2.4664851277754505e-05, |
|
"loss": 6.3724, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 15.249266862170089, |
|
"grad_norm": 3.054779529571533, |
|
"learning_rate": 2.4595028627286692e-05, |
|
"loss": 6.3764, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 15.291160452450775, |
|
"grad_norm": 3.277862071990967, |
|
"learning_rate": 2.4525205976818883e-05, |
|
"loss": 6.3583, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 15.333054042731462, |
|
"grad_norm": 2.9208297729492188, |
|
"learning_rate": 2.4456081552855748e-05, |
|
"loss": 6.3878, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 15.37494763301215, |
|
"grad_norm": 2.5356411933898926, |
|
"learning_rate": 2.4386258902387935e-05, |
|
"loss": 6.3705, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 15.416841223292836, |
|
"grad_norm": 2.8953468799591064, |
|
"learning_rate": 2.4316436251920126e-05, |
|
"loss": 6.3947, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 15.458734813573523, |
|
"grad_norm": 2.9166266918182373, |
|
"learning_rate": 2.424661360145231e-05, |
|
"loss": 6.3809, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 15.50062840385421, |
|
"grad_norm": 3.4554710388183594, |
|
"learning_rate": 2.41767909509845e-05, |
|
"loss": 6.3746, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 15.542521994134898, |
|
"grad_norm": 3.7208077907562256, |
|
"learning_rate": 2.410696830051669e-05, |
|
"loss": 6.3758, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 15.584415584415584, |
|
"grad_norm": 3.3161842823028564, |
|
"learning_rate": 2.4037145650048876e-05, |
|
"loss": 6.3744, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 15.62630917469627, |
|
"grad_norm": 2.4062047004699707, |
|
"learning_rate": 2.3967322999581064e-05, |
|
"loss": 6.381, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 15.668202764976959, |
|
"grad_norm": 3.1894476413726807, |
|
"learning_rate": 2.389750034911325e-05, |
|
"loss": 6.3895, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 15.710096355257646, |
|
"grad_norm": 2.9203104972839355, |
|
"learning_rate": 2.3827677698645442e-05, |
|
"loss": 6.363, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 15.751989945538332, |
|
"grad_norm": 3.000694513320923, |
|
"learning_rate": 2.375785504817763e-05, |
|
"loss": 6.3837, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 15.79388353581902, |
|
"grad_norm": 2.838684558868408, |
|
"learning_rate": 2.368803239770982e-05, |
|
"loss": 6.3859, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 15.835777126099707, |
|
"grad_norm": 2.648862361907959, |
|
"learning_rate": 2.3618209747242005e-05, |
|
"loss": 6.3411, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 15.877670716380393, |
|
"grad_norm": 3.5438232421875, |
|
"learning_rate": 2.3548387096774193e-05, |
|
"loss": 6.3627, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 15.91956430666108, |
|
"grad_norm": 2.8182501792907715, |
|
"learning_rate": 2.3478564446306384e-05, |
|
"loss": 6.3731, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 15.961457896941768, |
|
"grad_norm": 3.3253772258758545, |
|
"learning_rate": 2.340874179583857e-05, |
|
"loss": 6.396, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 16.003351487222456, |
|
"grad_norm": 3.668926954269409, |
|
"learning_rate": 2.3338919145370762e-05, |
|
"loss": 6.3437, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 16.045245077503143, |
|
"grad_norm": 3.028989315032959, |
|
"learning_rate": 2.3269096494902946e-05, |
|
"loss": 6.3837, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 16.08713866778383, |
|
"grad_norm": 3.220702648162842, |
|
"learning_rate": 2.3199273844435134e-05, |
|
"loss": 6.3609, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 16.129032258064516, |
|
"grad_norm": 3.1788036823272705, |
|
"learning_rate": 2.3129451193967325e-05, |
|
"loss": 6.3723, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 16.170925848345203, |
|
"grad_norm": 3.351151466369629, |
|
"learning_rate": 2.306032677000419e-05, |
|
"loss": 6.3731, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 16.21281943862589, |
|
"grad_norm": 2.933992862701416, |
|
"learning_rate": 2.299050411953638e-05, |
|
"loss": 6.3654, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 16.25471302890658, |
|
"grad_norm": 4.2123589515686035, |
|
"learning_rate": 2.2920681469068568e-05, |
|
"loss": 6.3364, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 16.296606619187266, |
|
"grad_norm": 2.9287397861480713, |
|
"learning_rate": 2.2850858818600755e-05, |
|
"loss": 6.3643, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 16.338500209467952, |
|
"grad_norm": 2.6518173217773438, |
|
"learning_rate": 2.2781036168132943e-05, |
|
"loss": 6.3538, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 16.38039379974864, |
|
"grad_norm": 3.490497589111328, |
|
"learning_rate": 2.271121351766513e-05, |
|
"loss": 6.365, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 16.422287390029325, |
|
"grad_norm": 3.090874195098877, |
|
"learning_rate": 2.264139086719732e-05, |
|
"loss": 6.3513, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 16.46418098031001, |
|
"grad_norm": 2.793083429336548, |
|
"learning_rate": 2.257156821672951e-05, |
|
"loss": 6.3815, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 16.506074570590698, |
|
"grad_norm": 2.656334638595581, |
|
"learning_rate": 2.2501745566261696e-05, |
|
"loss": 6.3677, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 16.547968160871388, |
|
"grad_norm": 2.950857162475586, |
|
"learning_rate": 2.2431922915793884e-05, |
|
"loss": 6.3601, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 16.589861751152075, |
|
"grad_norm": 2.948397636413574, |
|
"learning_rate": 2.236210026532607e-05, |
|
"loss": 6.3633, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 16.63175534143276, |
|
"grad_norm": 3.759934902191162, |
|
"learning_rate": 2.2292277614858262e-05, |
|
"loss": 6.3664, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 16.673648931713448, |
|
"grad_norm": 2.6607794761657715, |
|
"learning_rate": 2.222245496439045e-05, |
|
"loss": 6.3659, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 16.715542521994134, |
|
"grad_norm": 3.2569267749786377, |
|
"learning_rate": 2.2152632313922638e-05, |
|
"loss": 6.3477, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 16.75743611227482, |
|
"grad_norm": 3.1701977252960205, |
|
"learning_rate": 2.2082809663454825e-05, |
|
"loss": 6.3466, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 16.79932970255551, |
|
"grad_norm": 2.8855369091033936, |
|
"learning_rate": 2.2012987012987013e-05, |
|
"loss": 6.3774, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 16.841223292836197, |
|
"grad_norm": 2.8468215465545654, |
|
"learning_rate": 2.1943164362519204e-05, |
|
"loss": 6.3388, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 16.883116883116884, |
|
"grad_norm": 3.3314404487609863, |
|
"learning_rate": 2.187334171205139e-05, |
|
"loss": 6.3658, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 16.92501047339757, |
|
"grad_norm": 3.023106336593628, |
|
"learning_rate": 2.180351906158358e-05, |
|
"loss": 6.3443, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 16.966904063678257, |
|
"grad_norm": 3.2845230102539062, |
|
"learning_rate": 2.1733696411115766e-05, |
|
"loss": 6.3785, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 17.008797653958943, |
|
"grad_norm": 2.805790424346924, |
|
"learning_rate": 2.166457198715263e-05, |
|
"loss": 6.3792, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 17.05069124423963, |
|
"grad_norm": 2.893737554550171, |
|
"learning_rate": 2.159474933668482e-05, |
|
"loss": 6.3138, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 17.09258483452032, |
|
"grad_norm": 3.238863945007324, |
|
"learning_rate": 2.1525624912721686e-05, |
|
"loss": 6.3686, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 17.134478424801006, |
|
"grad_norm": 3.403582811355591, |
|
"learning_rate": 2.1455802262253877e-05, |
|
"loss": 6.3312, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 17.176372015081693, |
|
"grad_norm": 2.963287353515625, |
|
"learning_rate": 2.1385979611786064e-05, |
|
"loss": 6.3515, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 17.21826560536238, |
|
"grad_norm": 3.867340087890625, |
|
"learning_rate": 2.1316156961318255e-05, |
|
"loss": 6.3566, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 17.260159195643066, |
|
"grad_norm": 2.841190814971924, |
|
"learning_rate": 2.124633431085044e-05, |
|
"loss": 6.3308, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 17.302052785923753, |
|
"grad_norm": 2.872523307800293, |
|
"learning_rate": 2.1176511660382627e-05, |
|
"loss": 6.3433, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 17.34394637620444, |
|
"grad_norm": 3.156465530395508, |
|
"learning_rate": 2.1106689009914818e-05, |
|
"loss": 6.3779, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 17.38583996648513, |
|
"grad_norm": 3.5904667377471924, |
|
"learning_rate": 2.1036866359447005e-05, |
|
"loss": 6.3402, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 17.427733556765816, |
|
"grad_norm": 3.5753939151763916, |
|
"learning_rate": 2.0967043708979196e-05, |
|
"loss": 6.3572, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 17.469627147046502, |
|
"grad_norm": 3.129514217376709, |
|
"learning_rate": 2.089722105851138e-05, |
|
"loss": 6.3302, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 17.51152073732719, |
|
"grad_norm": 2.988732099533081, |
|
"learning_rate": 2.0827398408043568e-05, |
|
"loss": 6.3807, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 17.553414327607875, |
|
"grad_norm": 2.857875108718872, |
|
"learning_rate": 2.075757575757576e-05, |
|
"loss": 6.3519, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 17.59530791788856, |
|
"grad_norm": 4.023842811584473, |
|
"learning_rate": 2.0687753107107947e-05, |
|
"loss": 6.3467, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 17.63720150816925, |
|
"grad_norm": 3.049686908721924, |
|
"learning_rate": 2.0617930456640137e-05, |
|
"loss": 6.3306, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 17.679095098449938, |
|
"grad_norm": 3.3211073875427246, |
|
"learning_rate": 2.054810780617232e-05, |
|
"loss": 6.3611, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 17.720988688730625, |
|
"grad_norm": 3.064138174057007, |
|
"learning_rate": 2.047828515570451e-05, |
|
"loss": 6.3217, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 17.76288227901131, |
|
"grad_norm": 2.7812724113464355, |
|
"learning_rate": 2.04084625052367e-05, |
|
"loss": 6.3131, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 17.804775869291998, |
|
"grad_norm": 2.5516164302825928, |
|
"learning_rate": 2.0338639854768888e-05, |
|
"loss": 6.3428, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 17.846669459572684, |
|
"grad_norm": 2.9599711894989014, |
|
"learning_rate": 2.026881720430108e-05, |
|
"loss": 6.3545, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 17.88856304985337, |
|
"grad_norm": 2.8674137592315674, |
|
"learning_rate": 2.0198994553833263e-05, |
|
"loss": 6.3302, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 17.93045664013406, |
|
"grad_norm": 3.3227078914642334, |
|
"learning_rate": 2.012917190336545e-05, |
|
"loss": 6.3278, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 17.972350230414747, |
|
"grad_norm": 3.080399751663208, |
|
"learning_rate": 2.005934925289764e-05, |
|
"loss": 6.3206, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 18.014243820695434, |
|
"grad_norm": 4.004719257354736, |
|
"learning_rate": 1.998952660242983e-05, |
|
"loss": 6.3407, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 18.05613741097612, |
|
"grad_norm": 2.8186423778533936, |
|
"learning_rate": 1.991970395196202e-05, |
|
"loss": 6.3136, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 18.098031001256807, |
|
"grad_norm": 2.81748104095459, |
|
"learning_rate": 1.9849881301494204e-05, |
|
"loss": 6.3353, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 18.139924591537493, |
|
"grad_norm": 2.9991416931152344, |
|
"learning_rate": 1.9780058651026395e-05, |
|
"loss": 6.3194, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 18.181818181818183, |
|
"grad_norm": 3.4876794815063477, |
|
"learning_rate": 1.9710236000558583e-05, |
|
"loss": 6.3293, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 18.22371177209887, |
|
"grad_norm": 3.0756711959838867, |
|
"learning_rate": 1.964041335009077e-05, |
|
"loss": 6.341, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 18.265605362379556, |
|
"grad_norm": 3.171670436859131, |
|
"learning_rate": 1.9570590699622958e-05, |
|
"loss": 6.3075, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 18.307498952660243, |
|
"grad_norm": 3.3317439556121826, |
|
"learning_rate": 1.9500768049155145e-05, |
|
"loss": 6.3436, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 18.34939254294093, |
|
"grad_norm": 2.924349308013916, |
|
"learning_rate": 1.9430945398687336e-05, |
|
"loss": 6.3217, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 18.391286133221616, |
|
"grad_norm": 3.247955560684204, |
|
"learning_rate": 1.9361122748219524e-05, |
|
"loss": 6.3324, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 18.433179723502302, |
|
"grad_norm": 3.340263843536377, |
|
"learning_rate": 1.929130009775171e-05, |
|
"loss": 6.2993, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 18.475073313782993, |
|
"grad_norm": 2.973019599914551, |
|
"learning_rate": 1.92214774472839e-05, |
|
"loss": 6.3292, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 18.51696690406368, |
|
"grad_norm": 3.5055582523345947, |
|
"learning_rate": 1.9151654796816086e-05, |
|
"loss": 6.3175, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 18.558860494344366, |
|
"grad_norm": 2.9543776512145996, |
|
"learning_rate": 1.9081832146348277e-05, |
|
"loss": 6.3206, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 18.600754084625052, |
|
"grad_norm": 2.790940284729004, |
|
"learning_rate": 1.9012009495880465e-05, |
|
"loss": 6.3383, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 18.64264767490574, |
|
"grad_norm": 3.419908285140991, |
|
"learning_rate": 1.8942186845412653e-05, |
|
"loss": 6.3329, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 18.684541265186425, |
|
"grad_norm": 3.3396215438842773, |
|
"learning_rate": 1.887236419494484e-05, |
|
"loss": 6.312, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 18.726434855467115, |
|
"grad_norm": 2.6713643074035645, |
|
"learning_rate": 1.8802541544477028e-05, |
|
"loss": 6.315, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 18.7683284457478, |
|
"grad_norm": 3.2764880657196045, |
|
"learning_rate": 1.8733417120513895e-05, |
|
"loss": 6.3311, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 18.810222036028488, |
|
"grad_norm": 3.602581739425659, |
|
"learning_rate": 1.8663594470046083e-05, |
|
"loss": 6.327, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 18.852115626309175, |
|
"grad_norm": 3.052971124649048, |
|
"learning_rate": 1.8593771819578274e-05, |
|
"loss": 6.2911, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 18.89400921658986, |
|
"grad_norm": 3.0912699699401855, |
|
"learning_rate": 1.852394916911046e-05, |
|
"loss": 6.3057, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 18.935902806870548, |
|
"grad_norm": 2.631545305252075, |
|
"learning_rate": 1.845412651864265e-05, |
|
"loss": 6.3381, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 18.977796397151234, |
|
"grad_norm": 3.8213324546813965, |
|
"learning_rate": 1.8384303868174836e-05, |
|
"loss": 6.3123, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 19.019689987431924, |
|
"grad_norm": 3.3717353343963623, |
|
"learning_rate": 1.8314481217707024e-05, |
|
"loss": 6.3194, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 19.06158357771261, |
|
"grad_norm": 2.831409215927124, |
|
"learning_rate": 1.8244658567239215e-05, |
|
"loss": 6.3383, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 19.103477167993297, |
|
"grad_norm": 2.915093183517456, |
|
"learning_rate": 1.8174835916771403e-05, |
|
"loss": 6.3208, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 19.145370758273984, |
|
"grad_norm": 3.1236917972564697, |
|
"learning_rate": 1.810501326630359e-05, |
|
"loss": 6.3089, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 19.18726434855467, |
|
"grad_norm": 3.2876298427581787, |
|
"learning_rate": 1.8035190615835778e-05, |
|
"loss": 6.2975, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 19.229157938835357, |
|
"grad_norm": 2.6437103748321533, |
|
"learning_rate": 1.7965367965367965e-05, |
|
"loss": 6.3341, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 19.271051529116047, |
|
"grad_norm": 2.9252028465270996, |
|
"learning_rate": 1.7895545314900156e-05, |
|
"loss": 6.3404, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 19.312945119396733, |
|
"grad_norm": 3.4250340461730957, |
|
"learning_rate": 1.7825722664432344e-05, |
|
"loss": 6.3072, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 19.35483870967742, |
|
"grad_norm": 3.1287946701049805, |
|
"learning_rate": 1.775590001396453e-05, |
|
"loss": 6.3022, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 19.396732299958106, |
|
"grad_norm": 3.4577419757843018, |
|
"learning_rate": 1.76867755900014e-05, |
|
"loss": 6.2938, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 19.438625890238793, |
|
"grad_norm": 3.7131240367889404, |
|
"learning_rate": 1.7616952939533586e-05, |
|
"loss": 6.3088, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 19.48051948051948, |
|
"grad_norm": 3.6799802780151367, |
|
"learning_rate": 1.7547130289065774e-05, |
|
"loss": 6.3326, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 19.522413070800166, |
|
"grad_norm": 2.834351062774658, |
|
"learning_rate": 1.747730763859796e-05, |
|
"loss": 6.2952, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 19.564306661080856, |
|
"grad_norm": 3.0629451274871826, |
|
"learning_rate": 1.7407484988130152e-05, |
|
"loss": 6.3185, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 19.606200251361543, |
|
"grad_norm": 3.4801712036132812, |
|
"learning_rate": 1.733766233766234e-05, |
|
"loss": 6.3003, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 19.64809384164223, |
|
"grad_norm": 2.8250389099121094, |
|
"learning_rate": 1.7267839687194524e-05, |
|
"loss": 6.3033, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 19.689987431922916, |
|
"grad_norm": 3.5964672565460205, |
|
"learning_rate": 1.7198017036726715e-05, |
|
"loss": 6.293, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 19.731881022203602, |
|
"grad_norm": 2.7947146892547607, |
|
"learning_rate": 1.7128194386258903e-05, |
|
"loss": 6.2884, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 19.77377461248429, |
|
"grad_norm": 3.0473551750183105, |
|
"learning_rate": 1.7058371735791094e-05, |
|
"loss": 6.312, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 19.81566820276498, |
|
"grad_norm": 3.1810736656188965, |
|
"learning_rate": 1.698854908532328e-05, |
|
"loss": 6.3102, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 19.857561793045665, |
|
"grad_norm": 3.0046746730804443, |
|
"learning_rate": 1.6918726434855465e-05, |
|
"loss": 6.3115, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 19.89945538332635, |
|
"grad_norm": 2.6985220909118652, |
|
"learning_rate": 1.6848903784387656e-05, |
|
"loss": 6.3132, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 19.941348973607038, |
|
"grad_norm": 2.958906650543213, |
|
"learning_rate": 1.6779081133919844e-05, |
|
"loss": 6.3024, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 19.983242563887725, |
|
"grad_norm": 3.5484089851379395, |
|
"learning_rate": 1.6709258483452035e-05, |
|
"loss": 6.2989, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 20.02513615416841, |
|
"grad_norm": 4.328272342681885, |
|
"learning_rate": 1.6639435832984222e-05, |
|
"loss": 6.3162, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 20.067029744449098, |
|
"grad_norm": 3.0396926403045654, |
|
"learning_rate": 1.6569613182516407e-05, |
|
"loss": 6.3004, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 20.108923334729788, |
|
"grad_norm": 3.328972339630127, |
|
"learning_rate": 1.6499790532048598e-05, |
|
"loss": 6.2855, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 20.150816925010474, |
|
"grad_norm": 3.301114320755005, |
|
"learning_rate": 1.6429967881580785e-05, |
|
"loss": 6.2874, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 20.19271051529116, |
|
"grad_norm": 3.297041177749634, |
|
"learning_rate": 1.6360145231112976e-05, |
|
"loss": 6.3089, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 20.234604105571847, |
|
"grad_norm": 2.9122605323791504, |
|
"learning_rate": 1.6290322580645164e-05, |
|
"loss": 6.3157, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 20.276497695852534, |
|
"grad_norm": 2.8182084560394287, |
|
"learning_rate": 1.6220499930177348e-05, |
|
"loss": 6.3118, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 20.31839128613322, |
|
"grad_norm": 3.8560192584991455, |
|
"learning_rate": 1.615067727970954e-05, |
|
"loss": 6.2858, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 20.36028487641391, |
|
"grad_norm": 2.457240581512451, |
|
"learning_rate": 1.6080854629241726e-05, |
|
"loss": 6.3077, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 20.402178466694597, |
|
"grad_norm": 3.5376362800598145, |
|
"learning_rate": 1.6011031978773917e-05, |
|
"loss": 6.2892, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 20.444072056975283, |
|
"grad_norm": 3.3489222526550293, |
|
"learning_rate": 1.59412093283061e-05, |
|
"loss": 6.2973, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 20.48596564725597, |
|
"grad_norm": 3.600166082382202, |
|
"learning_rate": 1.587138667783829e-05, |
|
"loss": 6.31, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 20.527859237536656, |
|
"grad_norm": 3.255598783493042, |
|
"learning_rate": 1.580156402737048e-05, |
|
"loss": 6.2389, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 20.569752827817343, |
|
"grad_norm": 3.166994094848633, |
|
"learning_rate": 1.5731741376902668e-05, |
|
"loss": 6.303, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 20.61164641809803, |
|
"grad_norm": 3.615269184112549, |
|
"learning_rate": 1.566191872643486e-05, |
|
"loss": 6.281, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 20.65354000837872, |
|
"grad_norm": 3.1495063304901123, |
|
"learning_rate": 1.5592096075967043e-05, |
|
"loss": 6.2666, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 20.695433598659406, |
|
"grad_norm": 2.9170730113983154, |
|
"learning_rate": 1.552227342549923e-05, |
|
"loss": 6.2738, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 20.737327188940093, |
|
"grad_norm": 3.0922224521636963, |
|
"learning_rate": 1.545245077503142e-05, |
|
"loss": 6.2805, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 20.77922077922078, |
|
"grad_norm": 3.088012933731079, |
|
"learning_rate": 1.538262812456361e-05, |
|
"loss": 6.2906, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 20.821114369501466, |
|
"grad_norm": 2.939486503601074, |
|
"learning_rate": 1.53128054740958e-05, |
|
"loss": 6.2636, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 20.863007959782152, |
|
"grad_norm": 3.597949743270874, |
|
"learning_rate": 1.5242982823627986e-05, |
|
"loss": 6.2745, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 20.90490155006284, |
|
"grad_norm": 3.4760777950286865, |
|
"learning_rate": 1.5173160173160175e-05, |
|
"loss": 6.2702, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 20.94679514034353, |
|
"grad_norm": 3.04856014251709, |
|
"learning_rate": 1.5103337522692362e-05, |
|
"loss": 6.2841, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 20.988688730624215, |
|
"grad_norm": 2.849895477294922, |
|
"learning_rate": 1.503351487222455e-05, |
|
"loss": 6.2814, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 21.0305823209049, |
|
"grad_norm": 3.1246280670166016, |
|
"learning_rate": 1.496369222175674e-05, |
|
"loss": 6.2754, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 21.072475911185588, |
|
"grad_norm": 3.303846836090088, |
|
"learning_rate": 1.4894567797793605e-05, |
|
"loss": 6.2661, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 21.114369501466275, |
|
"grad_norm": 3.5818755626678467, |
|
"learning_rate": 1.4824745147325794e-05, |
|
"loss": 6.2804, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 21.15626309174696, |
|
"grad_norm": 3.0695786476135254, |
|
"learning_rate": 1.4754922496857982e-05, |
|
"loss": 6.284, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 21.19815668202765, |
|
"grad_norm": 3.6067614555358887, |
|
"learning_rate": 1.4685099846390168e-05, |
|
"loss": 6.2863, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 21.240050272308338, |
|
"grad_norm": 3.2230417728424072, |
|
"learning_rate": 1.4615277195922359e-05, |
|
"loss": 6.287, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 21.281943862589024, |
|
"grad_norm": 3.059466600418091, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 6.2442, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 21.32383745286971, |
|
"grad_norm": 3.7770040035247803, |
|
"learning_rate": 1.4475631894986736e-05, |
|
"loss": 6.2612, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 21.365731043150397, |
|
"grad_norm": 3.3269879817962646, |
|
"learning_rate": 1.4405809244518923e-05, |
|
"loss": 6.2985, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 21.407624633431084, |
|
"grad_norm": 2.649940252304077, |
|
"learning_rate": 1.4335986594051109e-05, |
|
"loss": 6.2343, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 21.44951822371177, |
|
"grad_norm": 3.4042983055114746, |
|
"learning_rate": 1.42661639435833e-05, |
|
"loss": 6.2701, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 21.49141181399246, |
|
"grad_norm": 3.1958000659942627, |
|
"learning_rate": 1.4196341293115486e-05, |
|
"loss": 6.2866, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 21.533305404273147, |
|
"grad_norm": 3.6010313034057617, |
|
"learning_rate": 1.4126518642647677e-05, |
|
"loss": 6.2683, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 21.575198994553833, |
|
"grad_norm": 3.429414749145508, |
|
"learning_rate": 1.4056695992179864e-05, |
|
"loss": 6.2408, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 21.61709258483452, |
|
"grad_norm": 3.069561004638672, |
|
"learning_rate": 1.3986873341712054e-05, |
|
"loss": 6.2641, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 21.658986175115206, |
|
"grad_norm": 3.575247287750244, |
|
"learning_rate": 1.3917050691244241e-05, |
|
"loss": 6.2722, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 21.700879765395893, |
|
"grad_norm": 3.033505439758301, |
|
"learning_rate": 1.3847228040776427e-05, |
|
"loss": 6.2424, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 21.742773355676583, |
|
"grad_norm": 3.287740707397461, |
|
"learning_rate": 1.3777405390308618e-05, |
|
"loss": 6.2516, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 21.78466694595727, |
|
"grad_norm": 3.0363028049468994, |
|
"learning_rate": 1.3707582739840804e-05, |
|
"loss": 6.2641, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 21.826560536237956, |
|
"grad_norm": 3.1549689769744873, |
|
"learning_rate": 1.3637760089372995e-05, |
|
"loss": 6.2335, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 21.868454126518643, |
|
"grad_norm": 3.8512282371520996, |
|
"learning_rate": 1.3567937438905182e-05, |
|
"loss": 6.2729, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 21.91034771679933, |
|
"grad_norm": 4.0751824378967285, |
|
"learning_rate": 1.3498813014942047e-05, |
|
"loss": 6.2397, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 21.952241307080016, |
|
"grad_norm": 3.375235080718994, |
|
"learning_rate": 1.3428990364474236e-05, |
|
"loss": 6.2316, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 21.994134897360702, |
|
"grad_norm": 3.093156337738037, |
|
"learning_rate": 1.3359167714006423e-05, |
|
"loss": 6.2468, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 22.036028487641392, |
|
"grad_norm": 3.729182243347168, |
|
"learning_rate": 1.3289345063538614e-05, |
|
"loss": 6.2366, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 22.07792207792208, |
|
"grad_norm": 3.4075732231140137, |
|
"learning_rate": 1.32195224130708e-05, |
|
"loss": 6.2693, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 22.119815668202765, |
|
"grad_norm": 2.9553005695343018, |
|
"learning_rate": 1.3149699762602988e-05, |
|
"loss": 6.2592, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 22.16170925848345, |
|
"grad_norm": 3.094538688659668, |
|
"learning_rate": 1.3079877112135177e-05, |
|
"loss": 6.26, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 22.203602848764138, |
|
"grad_norm": 3.907914161682129, |
|
"learning_rate": 1.3010054461667365e-05, |
|
"loss": 6.2711, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 22.245496439044825, |
|
"grad_norm": 3.7182159423828125, |
|
"learning_rate": 1.2940231811199554e-05, |
|
"loss": 6.2713, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 22.287390029325515, |
|
"grad_norm": 2.8652303218841553, |
|
"learning_rate": 1.2870409160731741e-05, |
|
"loss": 6.2325, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 22.3292836196062, |
|
"grad_norm": 3.190359592437744, |
|
"learning_rate": 1.2800586510263929e-05, |
|
"loss": 6.2563, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 22.371177209886888, |
|
"grad_norm": 3.372394561767578, |
|
"learning_rate": 1.2730763859796118e-05, |
|
"loss": 6.2489, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 22.413070800167574, |
|
"grad_norm": 3.340397596359253, |
|
"learning_rate": 1.2660941209328306e-05, |
|
"loss": 6.2147, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 22.45496439044826, |
|
"grad_norm": 3.1127400398254395, |
|
"learning_rate": 1.2591118558860495e-05, |
|
"loss": 6.2588, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 22.496857980728947, |
|
"grad_norm": 4.315746307373047, |
|
"learning_rate": 1.2521295908392683e-05, |
|
"loss": 6.2641, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 22.538751571009634, |
|
"grad_norm": 3.204827070236206, |
|
"learning_rate": 1.2451473257924872e-05, |
|
"loss": 6.2506, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 22.580645161290324, |
|
"grad_norm": 3.653074026107788, |
|
"learning_rate": 1.238165060745706e-05, |
|
"loss": 6.2512, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 22.62253875157101, |
|
"grad_norm": 3.8693697452545166, |
|
"learning_rate": 1.2311827956989249e-05, |
|
"loss": 6.2515, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 22.664432341851697, |
|
"grad_norm": 3.9418985843658447, |
|
"learning_rate": 1.2242005306521436e-05, |
|
"loss": 6.2522, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 22.706325932132383, |
|
"grad_norm": 3.328951358795166, |
|
"learning_rate": 1.2172182656053624e-05, |
|
"loss": 6.2244, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 22.74821952241307, |
|
"grad_norm": 3.251552104949951, |
|
"learning_rate": 1.210305823209049e-05, |
|
"loss": 6.2413, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 22.790113112693756, |
|
"grad_norm": 3.0756313800811768, |
|
"learning_rate": 1.2033235581622679e-05, |
|
"loss": 6.2343, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 22.832006702974446, |
|
"grad_norm": 3.174830913543701, |
|
"learning_rate": 1.1963412931154867e-05, |
|
"loss": 6.2445, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 22.873900293255133, |
|
"grad_norm": 2.831454038619995, |
|
"learning_rate": 1.1893590280687056e-05, |
|
"loss": 6.2457, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 22.91579388353582, |
|
"grad_norm": 3.3783247470855713, |
|
"learning_rate": 1.1823767630219245e-05, |
|
"loss": 6.2202, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 22.957687473816506, |
|
"grad_norm": 3.4505226612091064, |
|
"learning_rate": 1.1753944979751431e-05, |
|
"loss": 6.2329, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 22.999581064097192, |
|
"grad_norm": 4.203530311584473, |
|
"learning_rate": 1.168412232928362e-05, |
|
"loss": 6.2464, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 23.04147465437788, |
|
"grad_norm": 3.295198678970337, |
|
"learning_rate": 1.1614299678815808e-05, |
|
"loss": 6.2163, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 23.083368244658566, |
|
"grad_norm": 3.6795082092285156, |
|
"learning_rate": 1.1544477028347997e-05, |
|
"loss": 6.2108, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 23.125261834939256, |
|
"grad_norm": 3.7577404975891113, |
|
"learning_rate": 1.1474654377880186e-05, |
|
"loss": 6.2406, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 23.167155425219942, |
|
"grad_norm": 4.524641036987305, |
|
"learning_rate": 1.1404831727412372e-05, |
|
"loss": 6.2449, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 23.20904901550063, |
|
"grad_norm": 3.3049490451812744, |
|
"learning_rate": 1.1335009076944561e-05, |
|
"loss": 6.202, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 23.250942605781315, |
|
"grad_norm": 3.6244115829467773, |
|
"learning_rate": 1.1265186426476749e-05, |
|
"loss": 6.2214, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 23.292836196062, |
|
"grad_norm": 3.1158556938171387, |
|
"learning_rate": 1.1195363776008938e-05, |
|
"loss": 6.2247, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 23.334729786342688, |
|
"grad_norm": 3.208771228790283, |
|
"learning_rate": 1.1125541125541126e-05, |
|
"loss": 6.2416, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 23.376623376623378, |
|
"grad_norm": 4.181106090545654, |
|
"learning_rate": 1.1055718475073313e-05, |
|
"loss": 6.2343, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 23.418516966904065, |
|
"grad_norm": 2.8972866535186768, |
|
"learning_rate": 1.0985895824605503e-05, |
|
"loss": 6.2186, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 23.46041055718475, |
|
"grad_norm": 3.1691384315490723, |
|
"learning_rate": 1.091607317413769e-05, |
|
"loss": 6.2328, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 23.502304147465438, |
|
"grad_norm": 3.214346408843994, |
|
"learning_rate": 1.084625052366988e-05, |
|
"loss": 6.2356, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 23.544197737746124, |
|
"grad_norm": 3.0547690391540527, |
|
"learning_rate": 1.0776427873202067e-05, |
|
"loss": 6.2245, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 23.58609132802681, |
|
"grad_norm": 3.6090760231018066, |
|
"learning_rate": 1.0707303449238935e-05, |
|
"loss": 6.2634, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 23.627984918307497, |
|
"grad_norm": 3.210068702697754, |
|
"learning_rate": 1.0637480798771122e-05, |
|
"loss": 6.2126, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 23.669878508588187, |
|
"grad_norm": 3.872507095336914, |
|
"learning_rate": 1.056765814830331e-05, |
|
"loss": 6.2286, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 23.711772098868874, |
|
"grad_norm": 4.503695011138916, |
|
"learning_rate": 1.0497835497835499e-05, |
|
"loss": 6.2156, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 23.75366568914956, |
|
"grad_norm": 3.963315486907959, |
|
"learning_rate": 1.0428012847367686e-05, |
|
"loss": 6.2247, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 23.795559279430247, |
|
"grad_norm": 3.4394917488098145, |
|
"learning_rate": 1.0358190196899876e-05, |
|
"loss": 6.234, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 23.837452869710933, |
|
"grad_norm": 3.403167724609375, |
|
"learning_rate": 1.0288367546432063e-05, |
|
"loss": 6.2045, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 23.87934645999162, |
|
"grad_norm": 2.8274378776550293, |
|
"learning_rate": 1.0218544895964251e-05, |
|
"loss": 6.2121, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 23.92124005027231, |
|
"grad_norm": 3.277188301086426, |
|
"learning_rate": 1.0148722245496438e-05, |
|
"loss": 6.222, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 23.963133640552996, |
|
"grad_norm": 3.0735063552856445, |
|
"learning_rate": 1.0078899595028628e-05, |
|
"loss": 6.2257, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 24.005027230833683, |
|
"grad_norm": 3.6680026054382324, |
|
"learning_rate": 1.0009076944560817e-05, |
|
"loss": 6.2131, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 24.04692082111437, |
|
"grad_norm": 3.134713888168335, |
|
"learning_rate": 9.939254294093005e-06, |
|
"loss": 6.2241, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 24.088814411395056, |
|
"grad_norm": 2.9466712474823, |
|
"learning_rate": 9.869431643625192e-06, |
|
"loss": 6.2158, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 24.130708001675742, |
|
"grad_norm": 3.468949794769287, |
|
"learning_rate": 9.79960899315738e-06, |
|
"loss": 6.1793, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 24.17260159195643, |
|
"grad_norm": 3.5487060546875, |
|
"learning_rate": 9.729786342689569e-06, |
|
"loss": 6.2218, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 24.21449518223712, |
|
"grad_norm": 4.345893383026123, |
|
"learning_rate": 9.659963692221758e-06, |
|
"loss": 6.2023, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 24.256388772517806, |
|
"grad_norm": 2.9016401767730713, |
|
"learning_rate": 9.590141041753946e-06, |
|
"loss": 6.23, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 24.298282362798492, |
|
"grad_norm": 4.17023229598999, |
|
"learning_rate": 9.520318391286135e-06, |
|
"loss": 6.2114, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 24.34017595307918, |
|
"grad_norm": 3.322115421295166, |
|
"learning_rate": 9.45049574081832e-06, |
|
"loss": 6.204, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 24.382069543359865, |
|
"grad_norm": 3.709805488586426, |
|
"learning_rate": 9.38067309035051e-06, |
|
"loss": 6.2087, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 24.42396313364055, |
|
"grad_norm": 3.225588798522949, |
|
"learning_rate": 9.311548666387376e-06, |
|
"loss": 6.2436, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 24.46585672392124, |
|
"grad_norm": 3.1229472160339355, |
|
"learning_rate": 9.241726015919565e-06, |
|
"loss": 6.2253, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 24.507750314201928, |
|
"grad_norm": 3.4445230960845947, |
|
"learning_rate": 9.171903365451753e-06, |
|
"loss": 6.2254, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 24.549643904482615, |
|
"grad_norm": 4.2796807289123535, |
|
"learning_rate": 9.102080714983942e-06, |
|
"loss": 6.221, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 24.5915374947633, |
|
"grad_norm": 3.2323966026306152, |
|
"learning_rate": 9.03225806451613e-06, |
|
"loss": 6.228, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 24.633431085043988, |
|
"grad_norm": 4.064596652984619, |
|
"learning_rate": 8.962435414048317e-06, |
|
"loss": 6.2363, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 24.675324675324674, |
|
"grad_norm": 3.068544864654541, |
|
"learning_rate": 8.893310990085183e-06, |
|
"loss": 6.2508, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 24.71721826560536, |
|
"grad_norm": 2.6201155185699463, |
|
"learning_rate": 8.823488339617372e-06, |
|
"loss": 6.2193, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 24.75911185588605, |
|
"grad_norm": 4.960629463195801, |
|
"learning_rate": 8.753665689149562e-06, |
|
"loss": 6.1999, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 24.801005446166737, |
|
"grad_norm": 3.191586971282959, |
|
"learning_rate": 8.683843038681749e-06, |
|
"loss": 6.2203, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 24.842899036447424, |
|
"grad_norm": 3.224745512008667, |
|
"learning_rate": 8.614020388213937e-06, |
|
"loss": 6.212, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 24.88479262672811, |
|
"grad_norm": 3.450741767883301, |
|
"learning_rate": 8.544197737746124e-06, |
|
"loss": 6.2386, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 24.926686217008797, |
|
"grad_norm": 4.297729969024658, |
|
"learning_rate": 8.474375087278313e-06, |
|
"loss": 6.2088, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 24.968579807289483, |
|
"grad_norm": 3.376110553741455, |
|
"learning_rate": 8.404552436810503e-06, |
|
"loss": 6.2176, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 25.010473397570173, |
|
"grad_norm": 3.0211358070373535, |
|
"learning_rate": 8.33472978634269e-06, |
|
"loss": 6.1906, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 25.05236698785086, |
|
"grad_norm": 2.8490803241729736, |
|
"learning_rate": 8.264907135874878e-06, |
|
"loss": 6.2, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 25.094260578131546, |
|
"grad_norm": 3.0233705043792725, |
|
"learning_rate": 8.195084485407065e-06, |
|
"loss": 6.1886, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 25.136154168412233, |
|
"grad_norm": 3.7582995891571045, |
|
"learning_rate": 8.125261834939255e-06, |
|
"loss": 6.2064, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 25.17804775869292, |
|
"grad_norm": 3.128079891204834, |
|
"learning_rate": 8.055439184471442e-06, |
|
"loss": 6.2264, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 25.219941348973606, |
|
"grad_norm": 3.1808972358703613, |
|
"learning_rate": 7.985616534003632e-06, |
|
"loss": 6.2149, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 25.261834939254292, |
|
"grad_norm": 3.2326996326446533, |
|
"learning_rate": 7.91579388353582e-06, |
|
"loss": 6.2142, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 25.303728529534983, |
|
"grad_norm": 3.267465114593506, |
|
"learning_rate": 7.845971233068007e-06, |
|
"loss": 6.2439, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 25.34562211981567, |
|
"grad_norm": 3.691075563430786, |
|
"learning_rate": 7.776148582600196e-06, |
|
"loss": 6.2178, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 25.387515710096356, |
|
"grad_norm": 3.290562152862549, |
|
"learning_rate": 7.706325932132383e-06, |
|
"loss": 6.2165, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 25.429409300377042, |
|
"grad_norm": 4.553886413574219, |
|
"learning_rate": 7.636503281664573e-06, |
|
"loss": 6.2165, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 25.47130289065773, |
|
"grad_norm": 4.013444423675537, |
|
"learning_rate": 7.566680631196761e-06, |
|
"loss": 6.2122, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 25.513196480938415, |
|
"grad_norm": 4.044810771942139, |
|
"learning_rate": 7.496857980728948e-06, |
|
"loss": 6.2533, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 25.555090071219105, |
|
"grad_norm": 3.788613796234131, |
|
"learning_rate": 7.427035330261137e-06, |
|
"loss": 6.2039, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 25.59698366149979, |
|
"grad_norm": 3.317281484603882, |
|
"learning_rate": 7.3572126797933255e-06, |
|
"loss": 6.2228, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 25.638877251780478, |
|
"grad_norm": 3.4238085746765137, |
|
"learning_rate": 7.287390029325514e-06, |
|
"loss": 6.1979, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 25.680770842061165, |
|
"grad_norm": 3.1558725833892822, |
|
"learning_rate": 7.217567378857702e-06, |
|
"loss": 6.2044, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 25.72266443234185, |
|
"grad_norm": 2.939328670501709, |
|
"learning_rate": 7.147744728389889e-06, |
|
"loss": 6.2312, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 25.764558022622538, |
|
"grad_norm": 4.0037455558776855, |
|
"learning_rate": 7.0779220779220775e-06, |
|
"loss": 6.228, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 25.806451612903224, |
|
"grad_norm": 4.4582343101501465, |
|
"learning_rate": 7.008099427454267e-06, |
|
"loss": 6.2132, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 25.848345203183914, |
|
"grad_norm": 3.006201982498169, |
|
"learning_rate": 6.938276776986455e-06, |
|
"loss": 6.2242, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 25.8902387934646, |
|
"grad_norm": 3.6898059844970703, |
|
"learning_rate": 6.8684541265186436e-06, |
|
"loss": 6.2134, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 25.932132383745287, |
|
"grad_norm": 3.3489785194396973, |
|
"learning_rate": 6.798631476050832e-06, |
|
"loss": 6.2042, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 25.974025974025974, |
|
"grad_norm": 3.2489922046661377, |
|
"learning_rate": 6.729507052087698e-06, |
|
"loss": 6.2212, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 26.01591956430666, |
|
"grad_norm": 4.022356033325195, |
|
"learning_rate": 6.659684401619885e-06, |
|
"loss": 6.2423, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 26.057813154587347, |
|
"grad_norm": 4.803937911987305, |
|
"learning_rate": 6.589861751152074e-06, |
|
"loss": 6.2319, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 26.099706744868037, |
|
"grad_norm": 3.7283337116241455, |
|
"learning_rate": 6.520039100684262e-06, |
|
"loss": 6.1924, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 26.141600335148723, |
|
"grad_norm": 3.817946672439575, |
|
"learning_rate": 6.450216450216451e-06, |
|
"loss": 6.2039, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 26.18349392542941, |
|
"grad_norm": 3.4621963500976562, |
|
"learning_rate": 6.380393799748639e-06, |
|
"loss": 6.214, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 26.225387515710096, |
|
"grad_norm": 4.458475112915039, |
|
"learning_rate": 6.310571149280827e-06, |
|
"loss": 6.2327, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 26.267281105990783, |
|
"grad_norm": 3.1324493885040283, |
|
"learning_rate": 6.240748498813015e-06, |
|
"loss": 6.2518, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 26.30917469627147, |
|
"grad_norm": 3.410626173019409, |
|
"learning_rate": 6.1709258483452034e-06, |
|
"loss": 6.2054, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 26.351068286552156, |
|
"grad_norm": 3.221602201461792, |
|
"learning_rate": 6.101103197877392e-06, |
|
"loss": 6.2297, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 26.392961876832846, |
|
"grad_norm": 3.1413893699645996, |
|
"learning_rate": 6.031280547409579e-06, |
|
"loss": 6.2134, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 26.434855467113533, |
|
"grad_norm": 3.3834433555603027, |
|
"learning_rate": 5.961457896941768e-06, |
|
"loss": 6.167, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 26.47674905739422, |
|
"grad_norm": 3.016921281814575, |
|
"learning_rate": 5.891635246473957e-06, |
|
"loss": 6.2181, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 26.518642647674906, |
|
"grad_norm": 3.4190244674682617, |
|
"learning_rate": 5.821812596006145e-06, |
|
"loss": 6.172, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 26.560536237955592, |
|
"grad_norm": 3.519742488861084, |
|
"learning_rate": 5.751989945538333e-06, |
|
"loss": 6.2144, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 26.60242982823628, |
|
"grad_norm": 3.083923101425171, |
|
"learning_rate": 5.682167295070521e-06, |
|
"loss": 6.204, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 26.64432341851697, |
|
"grad_norm": 3.8977878093719482, |
|
"learning_rate": 5.612344644602709e-06, |
|
"loss": 6.1759, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 26.686217008797655, |
|
"grad_norm": 3.5598249435424805, |
|
"learning_rate": 5.5425219941348974e-06, |
|
"loss": 6.2233, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 26.72811059907834, |
|
"grad_norm": 3.6333513259887695, |
|
"learning_rate": 5.472699343667086e-06, |
|
"loss": 6.2133, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 26.770004189359028, |
|
"grad_norm": 3.2468085289001465, |
|
"learning_rate": 5.402876693199274e-06, |
|
"loss": 6.2081, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 26.811897779639715, |
|
"grad_norm": 3.6896772384643555, |
|
"learning_rate": 5.333054042731463e-06, |
|
"loss": 6.1935, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 26.8537913699204, |
|
"grad_norm": 3.263144016265869, |
|
"learning_rate": 5.26323139226365e-06, |
|
"loss": 6.2127, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 26.895684960201088, |
|
"grad_norm": 3.2848362922668457, |
|
"learning_rate": 5.193408741795839e-06, |
|
"loss": 6.2074, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 26.937578550481778, |
|
"grad_norm": 3.675541639328003, |
|
"learning_rate": 5.123586091328027e-06, |
|
"loss": 6.2015, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 26.979472140762464, |
|
"grad_norm": 3.413780689239502, |
|
"learning_rate": 5.0537634408602155e-06, |
|
"loss": 6.2218, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 27.02136573104315, |
|
"grad_norm": 4.108157634735107, |
|
"learning_rate": 4.983940790392404e-06, |
|
"loss": 6.212, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 27.063259321323837, |
|
"grad_norm": 3.7690155506134033, |
|
"learning_rate": 4.9141181399245915e-06, |
|
"loss": 6.22, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 27.105152911604524, |
|
"grad_norm": 3.379786491394043, |
|
"learning_rate": 4.84429548945678e-06, |
|
"loss": 6.2334, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 27.14704650188521, |
|
"grad_norm": 3.5175390243530273, |
|
"learning_rate": 4.774472838988968e-06, |
|
"loss": 6.1932, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 27.1889400921659, |
|
"grad_norm": 2.8454129695892334, |
|
"learning_rate": 4.704650188521157e-06, |
|
"loss": 6.2087, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 27.230833682446587, |
|
"grad_norm": 3.4630961418151855, |
|
"learning_rate": 4.634827538053345e-06, |
|
"loss": 6.2142, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 27.272727272727273, |
|
"grad_norm": 3.339860677719116, |
|
"learning_rate": 4.565004887585533e-06, |
|
"loss": 6.1772, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 27.31462086300796, |
|
"grad_norm": 3.0743260383605957, |
|
"learning_rate": 4.495182237117721e-06, |
|
"loss": 6.2044, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 27.356514453288646, |
|
"grad_norm": 3.2576496601104736, |
|
"learning_rate": 4.4253595866499095e-06, |
|
"loss": 6.2191, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 27.398408043569333, |
|
"grad_norm": 3.326819896697998, |
|
"learning_rate": 4.355536936182097e-06, |
|
"loss": 6.1762, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 27.44030163385002, |
|
"grad_norm": 3.4447667598724365, |
|
"learning_rate": 4.285714285714286e-06, |
|
"loss": 6.1823, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 27.48219522413071, |
|
"grad_norm": 3.4771687984466553, |
|
"learning_rate": 4.215891635246475e-06, |
|
"loss": 6.228, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 27.524088814411396, |
|
"grad_norm": 3.3457424640655518, |
|
"learning_rate": 4.146068984778662e-06, |
|
"loss": 6.1651, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 27.565982404692082, |
|
"grad_norm": 3.006155490875244, |
|
"learning_rate": 4.076246334310851e-06, |
|
"loss": 6.2026, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 27.60787599497277, |
|
"grad_norm": 4.228708744049072, |
|
"learning_rate": 4.006423683843038e-06, |
|
"loss": 6.1923, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 27.649769585253456, |
|
"grad_norm": 3.4744226932525635, |
|
"learning_rate": 3.937299259879905e-06, |
|
"loss": 6.1891, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 27.691663175534142, |
|
"grad_norm": 3.8300633430480957, |
|
"learning_rate": 3.867476609412093e-06, |
|
"loss": 6.2237, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 27.733556765814832, |
|
"grad_norm": 2.9689528942108154, |
|
"learning_rate": 3.7976539589442818e-06, |
|
"loss": 6.217, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 27.77545035609552, |
|
"grad_norm": 3.1309947967529297, |
|
"learning_rate": 3.7278313084764698e-06, |
|
"loss": 6.2061, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 27.817343946376205, |
|
"grad_norm": 3.4571166038513184, |
|
"learning_rate": 3.658008658008658e-06, |
|
"loss": 6.1863, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 27.85923753665689, |
|
"grad_norm": 3.354229211807251, |
|
"learning_rate": 3.5881860075408466e-06, |
|
"loss": 6.1996, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 27.901131126937578, |
|
"grad_norm": 3.745568037033081, |
|
"learning_rate": 3.5183633570730346e-06, |
|
"loss": 6.1839, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 27.943024717218265, |
|
"grad_norm": 3.356715440750122, |
|
"learning_rate": 3.448540706605223e-06, |
|
"loss": 6.2048, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 27.98491830749895, |
|
"grad_norm": 2.964492082595825, |
|
"learning_rate": 3.378718056137411e-06, |
|
"loss": 6.228, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 28.02681189777964, |
|
"grad_norm": 3.336606502532959, |
|
"learning_rate": 3.3088954056695994e-06, |
|
"loss": 6.1953, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 28.068705488060328, |
|
"grad_norm": 3.264971971511841, |
|
"learning_rate": 3.239072755201788e-06, |
|
"loss": 6.1783, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 28.110599078341014, |
|
"grad_norm": 3.4968082904815674, |
|
"learning_rate": 3.169250104733976e-06, |
|
"loss": 6.2117, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 28.1524926686217, |
|
"grad_norm": 3.4082252979278564, |
|
"learning_rate": 3.099427454266164e-06, |
|
"loss": 6.2278, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 28.194386258902387, |
|
"grad_norm": 3.52056884765625, |
|
"learning_rate": 3.029604803798352e-06, |
|
"loss": 6.2037, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 28.236279849183074, |
|
"grad_norm": 3.6062779426574707, |
|
"learning_rate": 2.9597821533305406e-06, |
|
"loss": 6.1952, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 28.278173439463764, |
|
"grad_norm": 3.158705472946167, |
|
"learning_rate": 2.889959502862729e-06, |
|
"loss": 6.2067, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 28.32006702974445, |
|
"grad_norm": 3.6732075214385986, |
|
"learning_rate": 2.820136852394917e-06, |
|
"loss": 6.1752, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 28.361960620025137, |
|
"grad_norm": 2.842560291290283, |
|
"learning_rate": 2.7503142019271054e-06, |
|
"loss": 6.1823, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 28.403854210305823, |
|
"grad_norm": 3.412233591079712, |
|
"learning_rate": 2.6804915514592934e-06, |
|
"loss": 6.1997, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 28.44574780058651, |
|
"grad_norm": 2.8313143253326416, |
|
"learning_rate": 2.610668900991482e-06, |
|
"loss": 6.187, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 28.487641390867196, |
|
"grad_norm": 3.122307300567627, |
|
"learning_rate": 2.541544477028348e-06, |
|
"loss": 6.1977, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 28.529534981147883, |
|
"grad_norm": 3.4732697010040283, |
|
"learning_rate": 2.4717218265605365e-06, |
|
"loss": 6.1913, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 3.3936917781829834, |
|
"learning_rate": 2.4018991760927245e-06, |
|
"loss": 6.2216, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 28.61332216170926, |
|
"grad_norm": 3.2980170249938965, |
|
"learning_rate": 2.332076525624913e-06, |
|
"loss": 6.1989, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 28.655215751989946, |
|
"grad_norm": 4.099823951721191, |
|
"learning_rate": 2.2622538751571013e-06, |
|
"loss": 6.1755, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 28.697109342270632, |
|
"grad_norm": 3.7930960655212402, |
|
"learning_rate": 2.1924312246892893e-06, |
|
"loss": 6.2155, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 28.73900293255132, |
|
"grad_norm": 3.620065212249756, |
|
"learning_rate": 2.1226085742214777e-06, |
|
"loss": 6.2041, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 28.780896522832006, |
|
"grad_norm": 3.3451735973358154, |
|
"learning_rate": 2.0527859237536657e-06, |
|
"loss": 6.2095, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 28.822790113112696, |
|
"grad_norm": 4.007857799530029, |
|
"learning_rate": 1.982963273285854e-06, |
|
"loss": 6.2283, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 28.864683703393382, |
|
"grad_norm": 4.236888885498047, |
|
"learning_rate": 1.9131406228180425e-06, |
|
"loss": 6.1799, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 28.90657729367407, |
|
"grad_norm": 3.222273111343384, |
|
"learning_rate": 1.8433179723502305e-06, |
|
"loss": 6.2008, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 28.948470883954755, |
|
"grad_norm": 3.8649580478668213, |
|
"learning_rate": 1.7734953218824187e-06, |
|
"loss": 6.194, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 28.99036447423544, |
|
"grad_norm": 3.9630191326141357, |
|
"learning_rate": 1.7036726714146071e-06, |
|
"loss": 6.2083, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 29.032258064516128, |
|
"grad_norm": 3.9617035388946533, |
|
"learning_rate": 1.6338500209467953e-06, |
|
"loss": 6.2197, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 29.074151654796815, |
|
"grad_norm": 3.4647514820098877, |
|
"learning_rate": 1.5640273704789835e-06, |
|
"loss": 6.1919, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 29.116045245077505, |
|
"grad_norm": 3.7548468112945557, |
|
"learning_rate": 1.4942047200111717e-06, |
|
"loss": 6.171, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 29.15793883535819, |
|
"grad_norm": 3.4267735481262207, |
|
"learning_rate": 1.42438206954336e-06, |
|
"loss": 6.2031, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 29.199832425638878, |
|
"grad_norm": 3.166888952255249, |
|
"learning_rate": 1.3545594190755483e-06, |
|
"loss": 6.1937, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 29.241726015919564, |
|
"grad_norm": 2.9794344902038574, |
|
"learning_rate": 1.2847367686077363e-06, |
|
"loss": 6.2068, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 29.28361960620025, |
|
"grad_norm": 3.056293249130249, |
|
"learning_rate": 1.2149141181399247e-06, |
|
"loss": 6.1652, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 29.325513196480937, |
|
"grad_norm": 3.851149320602417, |
|
"learning_rate": 1.145789694176791e-06, |
|
"loss": 6.2129, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 29.367406786761624, |
|
"grad_norm": 3.670929193496704, |
|
"learning_rate": 1.0759670437089792e-06, |
|
"loss": 6.2068, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 29.409300377042314, |
|
"grad_norm": 3.3581252098083496, |
|
"learning_rate": 1.0061443932411674e-06, |
|
"loss": 6.2208, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 29.451193967323, |
|
"grad_norm": 3.7551257610321045, |
|
"learning_rate": 9.363217427733557e-07, |
|
"loss": 6.179, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 29.493087557603687, |
|
"grad_norm": 2.9767682552337646, |
|
"learning_rate": 8.664990923055439e-07, |
|
"loss": 6.2157, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 29.534981147884373, |
|
"grad_norm": 3.3218774795532227, |
|
"learning_rate": 7.966764418377322e-07, |
|
"loss": 6.1773, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 29.57687473816506, |
|
"grad_norm": 4.360437870025635, |
|
"learning_rate": 7.268537913699204e-07, |
|
"loss": 6.1985, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 29.618768328445746, |
|
"grad_norm": 3.544264078140259, |
|
"learning_rate": 6.570311409021087e-07, |
|
"loss": 6.1973, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 29.660661918726436, |
|
"grad_norm": 3.7416069507598877, |
|
"learning_rate": 5.872084904342969e-07, |
|
"loss": 6.2031, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 29.702555509007123, |
|
"grad_norm": 3.0346035957336426, |
|
"learning_rate": 5.173858399664851e-07, |
|
"loss": 6.2123, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 29.74444909928781, |
|
"grad_norm": 3.2308425903320312, |
|
"learning_rate": 4.4756318949867344e-07, |
|
"loss": 6.2106, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 29.786342689568496, |
|
"grad_norm": 3.0109570026397705, |
|
"learning_rate": 3.7774053903086163e-07, |
|
"loss": 6.213, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 29.828236279849182, |
|
"grad_norm": 3.733609199523926, |
|
"learning_rate": 3.079178885630499e-07, |
|
"loss": 6.1984, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 29.87012987012987, |
|
"grad_norm": 3.5430541038513184, |
|
"learning_rate": 2.3809523809523814e-07, |
|
"loss": 6.183, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 29.912023460410555, |
|
"grad_norm": 3.1964950561523438, |
|
"learning_rate": 1.6827258762742634e-07, |
|
"loss": 6.1817, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 29.953917050691246, |
|
"grad_norm": 3.6197755336761475, |
|
"learning_rate": 9.844993715961458e-08, |
|
"loss": 6.1907, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 29.995810640971932, |
|
"grad_norm": 3.035473346710205, |
|
"learning_rate": 2.862728669180282e-08, |
|
"loss": 6.1677, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 71610, |
|
"total_flos": 781486986700800.0, |
|
"train_loss": 6.528281962779993, |
|
"train_runtime": 3948.1737, |
|
"train_samples_per_second": 580.354, |
|
"train_steps_per_second": 18.137 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 71610, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 5000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 781486986700800.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|