|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999850411368736, |
|
"eval_steps": 500, |
|
"global_step": 3342, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002991772625280479, |
|
"grad_norm": 24.73757525746042, |
|
"learning_rate": 2.985074626865672e-08, |
|
"loss": 1.4467, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0014958863126402393, |
|
"grad_norm": 25.170481305263397, |
|
"learning_rate": 1.4925373134328358e-07, |
|
"loss": 1.418, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0029917726252804786, |
|
"grad_norm": 17.671227057151118, |
|
"learning_rate": 2.9850746268656716e-07, |
|
"loss": 1.4125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004487658937920718, |
|
"grad_norm": 8.909500017199079, |
|
"learning_rate": 4.4776119402985074e-07, |
|
"loss": 1.3226, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.005983545250560957, |
|
"grad_norm": 11.280714871339804, |
|
"learning_rate": 5.970149253731343e-07, |
|
"loss": 1.1982, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0074794315632011965, |
|
"grad_norm": 8.398369640505729, |
|
"learning_rate": 7.462686567164179e-07, |
|
"loss": 1.0909, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.008975317875841436, |
|
"grad_norm": 2.9571179549426505, |
|
"learning_rate": 8.955223880597015e-07, |
|
"loss": 0.9951, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010471204188481676, |
|
"grad_norm": 2.8894837770977224, |
|
"learning_rate": 1.044776119402985e-06, |
|
"loss": 0.9586, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.011967090501121914, |
|
"grad_norm": 2.324424480950711, |
|
"learning_rate": 1.1940298507462686e-06, |
|
"loss": 0.9305, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.013462976813762155, |
|
"grad_norm": 2.2619500315834866, |
|
"learning_rate": 1.3432835820895524e-06, |
|
"loss": 0.91, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.014958863126402393, |
|
"grad_norm": 2.3542199193134663, |
|
"learning_rate": 1.4925373134328358e-06, |
|
"loss": 0.8925, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016454749439042633, |
|
"grad_norm": 2.1942393280485444, |
|
"learning_rate": 1.6417910447761196e-06, |
|
"loss": 0.8768, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01795063575168287, |
|
"grad_norm": 2.1616387387297245, |
|
"learning_rate": 1.791044776119403e-06, |
|
"loss": 0.8723, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01944652206432311, |
|
"grad_norm": 2.2022102428237957, |
|
"learning_rate": 1.9402985074626867e-06, |
|
"loss": 0.8639, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.020942408376963352, |
|
"grad_norm": 2.1621065249124585, |
|
"learning_rate": 2.08955223880597e-06, |
|
"loss": 0.8699, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02243829468960359, |
|
"grad_norm": 2.3233760195216147, |
|
"learning_rate": 2.238805970149254e-06, |
|
"loss": 0.8603, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02393418100224383, |
|
"grad_norm": 2.266487924942459, |
|
"learning_rate": 2.3880597014925373e-06, |
|
"loss": 0.8537, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.025430067314884067, |
|
"grad_norm": 2.2873594748046076, |
|
"learning_rate": 2.537313432835821e-06, |
|
"loss": 0.8487, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.02692595362752431, |
|
"grad_norm": 2.283742446960398, |
|
"learning_rate": 2.686567164179105e-06, |
|
"loss": 0.8413, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.028421839940164548, |
|
"grad_norm": 2.1774038405712486, |
|
"learning_rate": 2.835820895522388e-06, |
|
"loss": 0.8281, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.029917726252804786, |
|
"grad_norm": 2.342263870606748, |
|
"learning_rate": 2.9850746268656716e-06, |
|
"loss": 0.8305, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.031413612565445025, |
|
"grad_norm": 2.385361324640983, |
|
"learning_rate": 3.1343283582089558e-06, |
|
"loss": 0.8168, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.032909498878085267, |
|
"grad_norm": 2.500114420278517, |
|
"learning_rate": 3.283582089552239e-06, |
|
"loss": 0.8207, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0344053851907255, |
|
"grad_norm": 2.4374602854285286, |
|
"learning_rate": 3.4328358208955225e-06, |
|
"loss": 0.8244, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.03590127150336574, |
|
"grad_norm": 2.44198563300893, |
|
"learning_rate": 3.582089552238806e-06, |
|
"loss": 0.8199, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.037397157816005985, |
|
"grad_norm": 2.2760332882107157, |
|
"learning_rate": 3.73134328358209e-06, |
|
"loss": 0.8216, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03889304412864622, |
|
"grad_norm": 2.508508841712968, |
|
"learning_rate": 3.8805970149253735e-06, |
|
"loss": 0.8014, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04038893044128646, |
|
"grad_norm": 2.3029396530303066, |
|
"learning_rate": 4.029850746268657e-06, |
|
"loss": 0.7989, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.041884816753926704, |
|
"grad_norm": 2.3322348191256594, |
|
"learning_rate": 4.17910447761194e-06, |
|
"loss": 0.7964, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04338070306656694, |
|
"grad_norm": 2.331622885776369, |
|
"learning_rate": 4.3283582089552236e-06, |
|
"loss": 0.8013, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.04487658937920718, |
|
"grad_norm": 2.288860960559162, |
|
"learning_rate": 4.477611940298508e-06, |
|
"loss": 0.8045, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04637247569184742, |
|
"grad_norm": 2.4508630826235778, |
|
"learning_rate": 4.626865671641791e-06, |
|
"loss": 0.7898, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.04786836200448766, |
|
"grad_norm": 2.3184684975983045, |
|
"learning_rate": 4.7761194029850745e-06, |
|
"loss": 0.7937, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0493642483171279, |
|
"grad_norm": 2.1613921165346826, |
|
"learning_rate": 4.925373134328359e-06, |
|
"loss": 0.7911, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.050860134629768135, |
|
"grad_norm": 2.453275213296358, |
|
"learning_rate": 5.074626865671642e-06, |
|
"loss": 0.7857, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05235602094240838, |
|
"grad_norm": 2.3284898790399353, |
|
"learning_rate": 5.2238805970149255e-06, |
|
"loss": 0.7793, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.05385190725504862, |
|
"grad_norm": 2.3201172049873686, |
|
"learning_rate": 5.37313432835821e-06, |
|
"loss": 0.7779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.055347793567688854, |
|
"grad_norm": 2.357711081062365, |
|
"learning_rate": 5.522388059701493e-06, |
|
"loss": 0.7847, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.056843679880329095, |
|
"grad_norm": 2.7611374374886037, |
|
"learning_rate": 5.671641791044776e-06, |
|
"loss": 0.7706, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05833956619296934, |
|
"grad_norm": 2.6271055846372513, |
|
"learning_rate": 5.820895522388061e-06, |
|
"loss": 0.7607, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.05983545250560957, |
|
"grad_norm": 2.376880891581398, |
|
"learning_rate": 5.970149253731343e-06, |
|
"loss": 0.7671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.061331338818249814, |
|
"grad_norm": 2.3835041311189022, |
|
"learning_rate": 6.119402985074627e-06, |
|
"loss": 0.7586, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.06282722513089005, |
|
"grad_norm": 2.462749716678564, |
|
"learning_rate": 6.2686567164179116e-06, |
|
"loss": 0.7758, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.0643231114435303, |
|
"grad_norm": 2.4077847282275866, |
|
"learning_rate": 6.417910447761194e-06, |
|
"loss": 0.7638, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.06581899775617053, |
|
"grad_norm": 2.3747888389216616, |
|
"learning_rate": 6.567164179104478e-06, |
|
"loss": 0.761, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06731488406881077, |
|
"grad_norm": 2.493092964041521, |
|
"learning_rate": 6.7164179104477625e-06, |
|
"loss": 0.7574, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.068810770381451, |
|
"grad_norm": 2.6477317119354984, |
|
"learning_rate": 6.865671641791045e-06, |
|
"loss": 0.7498, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07030665669409125, |
|
"grad_norm": 2.563921275934862, |
|
"learning_rate": 7.014925373134329e-06, |
|
"loss": 0.761, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.07180254300673149, |
|
"grad_norm": 2.4406043810417257, |
|
"learning_rate": 7.164179104477612e-06, |
|
"loss": 0.7423, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07329842931937172, |
|
"grad_norm": 2.2639780494151034, |
|
"learning_rate": 7.313432835820896e-06, |
|
"loss": 0.7478, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.07479431563201197, |
|
"grad_norm": 2.6132105098393628, |
|
"learning_rate": 7.46268656716418e-06, |
|
"loss": 0.7522, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0762902019446522, |
|
"grad_norm": 2.5688006092201365, |
|
"learning_rate": 7.611940298507463e-06, |
|
"loss": 0.7409, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.07778608825729244, |
|
"grad_norm": 2.4383140799178564, |
|
"learning_rate": 7.761194029850747e-06, |
|
"loss": 0.7449, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07928197456993269, |
|
"grad_norm": 2.3404023048993365, |
|
"learning_rate": 7.91044776119403e-06, |
|
"loss": 0.7306, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.08077786088257292, |
|
"grad_norm": 2.435305377918958, |
|
"learning_rate": 8.059701492537314e-06, |
|
"loss": 0.7464, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08227374719521316, |
|
"grad_norm": 2.9299430373461433, |
|
"learning_rate": 8.208955223880599e-06, |
|
"loss": 0.7279, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08376963350785341, |
|
"grad_norm": 2.387726009024013, |
|
"learning_rate": 8.35820895522388e-06, |
|
"loss": 0.7388, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08526551982049364, |
|
"grad_norm": 2.268041368580347, |
|
"learning_rate": 8.507462686567165e-06, |
|
"loss": 0.7407, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.08676140613313388, |
|
"grad_norm": 2.408844552763582, |
|
"learning_rate": 8.656716417910447e-06, |
|
"loss": 0.7222, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08825729244577413, |
|
"grad_norm": 2.401282808607445, |
|
"learning_rate": 8.805970149253732e-06, |
|
"loss": 0.7265, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.08975317875841436, |
|
"grad_norm": 2.4691438286047425, |
|
"learning_rate": 8.955223880597016e-06, |
|
"loss": 0.7239, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0912490650710546, |
|
"grad_norm": 2.3268484057723673, |
|
"learning_rate": 9.104477611940299e-06, |
|
"loss": 0.7227, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.09274495138369485, |
|
"grad_norm": 2.402308618181149, |
|
"learning_rate": 9.253731343283582e-06, |
|
"loss": 0.7244, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09424083769633508, |
|
"grad_norm": 2.3361311735184604, |
|
"learning_rate": 9.402985074626867e-06, |
|
"loss": 0.7263, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.09573672400897532, |
|
"grad_norm": 2.335173501165771, |
|
"learning_rate": 9.552238805970149e-06, |
|
"loss": 0.7215, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09723261032161555, |
|
"grad_norm": 2.730322757042367, |
|
"learning_rate": 9.701492537313434e-06, |
|
"loss": 0.7332, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.0987284966342558, |
|
"grad_norm": 2.2835032077275312, |
|
"learning_rate": 9.850746268656717e-06, |
|
"loss": 0.7115, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.10022438294689603, |
|
"grad_norm": 2.4782326787594338, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7142, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.10172026925953627, |
|
"grad_norm": 2.4128411535499454, |
|
"learning_rate": 9.999931779967976e-06, |
|
"loss": 0.7108, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10321615557217652, |
|
"grad_norm": 2.230247650641441, |
|
"learning_rate": 9.99972712173349e-06, |
|
"loss": 0.7037, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.10471204188481675, |
|
"grad_norm": 2.4375274487235363, |
|
"learning_rate": 9.999386030881264e-06, |
|
"loss": 0.7052, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10620792819745699, |
|
"grad_norm": 2.497148482639146, |
|
"learning_rate": 9.998908516718984e-06, |
|
"loss": 0.723, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.10770381451009724, |
|
"grad_norm": 2.7572554578379793, |
|
"learning_rate": 9.998294592277064e-06, |
|
"loss": 0.7089, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.10919970082273747, |
|
"grad_norm": 2.4811541468806175, |
|
"learning_rate": 9.997544274308282e-06, |
|
"loss": 0.7049, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.11069558713537771, |
|
"grad_norm": 2.3612905719078556, |
|
"learning_rate": 9.996657583287326e-06, |
|
"loss": 0.7112, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11219147344801796, |
|
"grad_norm": 2.3269094749093346, |
|
"learning_rate": 9.995634543410231e-06, |
|
"loss": 0.7091, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.11368735976065819, |
|
"grad_norm": 2.1856434021067543, |
|
"learning_rate": 9.99447518259372e-06, |
|
"loss": 0.6945, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11518324607329843, |
|
"grad_norm": 2.2818233887085677, |
|
"learning_rate": 9.99317953247445e-06, |
|
"loss": 0.695, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.11667913238593867, |
|
"grad_norm": 2.5644750677779267, |
|
"learning_rate": 9.991747628408138e-06, |
|
"loss": 0.6878, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11817501869857891, |
|
"grad_norm": 2.485293875776813, |
|
"learning_rate": 9.990179509468595e-06, |
|
"loss": 0.6987, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.11967090501121914, |
|
"grad_norm": 2.291043542367887, |
|
"learning_rate": 9.988475218446676e-06, |
|
"loss": 0.6898, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1211667913238594, |
|
"grad_norm": 2.4232767050288726, |
|
"learning_rate": 9.986634801849093e-06, |
|
"loss": 0.6963, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.12266267763649963, |
|
"grad_norm": 2.4211336275289512, |
|
"learning_rate": 9.984658309897161e-06, |
|
"loss": 0.6793, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.12415856394913986, |
|
"grad_norm": 2.5029979080579734, |
|
"learning_rate": 9.982545796525416e-06, |
|
"loss": 0.6773, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.1256544502617801, |
|
"grad_norm": 2.4668720551969487, |
|
"learning_rate": 9.980297319380148e-06, |
|
"loss": 0.6741, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12715033657442035, |
|
"grad_norm": 2.1324057406570796, |
|
"learning_rate": 9.977912939817833e-06, |
|
"loss": 0.6717, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.1286462228870606, |
|
"grad_norm": 2.216513918034811, |
|
"learning_rate": 9.97539272290345e-06, |
|
"loss": 0.664, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.13014210919970082, |
|
"grad_norm": 2.3891946944700346, |
|
"learning_rate": 9.97273673740871e-06, |
|
"loss": 0.6779, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.13163799551234107, |
|
"grad_norm": 2.107297355911597, |
|
"learning_rate": 9.96994505581018e-06, |
|
"loss": 0.663, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13313388182498131, |
|
"grad_norm": 2.2741240440107666, |
|
"learning_rate": 9.967017754287303e-06, |
|
"loss": 0.6628, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.13462976813762154, |
|
"grad_norm": 2.231118541487464, |
|
"learning_rate": 9.963954912720319e-06, |
|
"loss": 0.6805, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13612565445026178, |
|
"grad_norm": 2.327411278722037, |
|
"learning_rate": 9.960756614688089e-06, |
|
"loss": 0.6572, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.137621540762902, |
|
"grad_norm": 2.2727646648145097, |
|
"learning_rate": 9.957422947465814e-06, |
|
"loss": 0.6682, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13911742707554225, |
|
"grad_norm": 2.43427967377174, |
|
"learning_rate": 9.953954002022643e-06, |
|
"loss": 0.658, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.1406133133881825, |
|
"grad_norm": 2.203173002529278, |
|
"learning_rate": 9.950349873019204e-06, |
|
"loss": 0.6513, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14210919970082272, |
|
"grad_norm": 2.159064147239943, |
|
"learning_rate": 9.946610658805018e-06, |
|
"loss": 0.6597, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.14360508601346297, |
|
"grad_norm": 2.2802374368293186, |
|
"learning_rate": 9.94273646141581e-06, |
|
"loss": 0.6642, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14510097232610322, |
|
"grad_norm": 2.321550706239028, |
|
"learning_rate": 9.938727386570727e-06, |
|
"loss": 0.6525, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.14659685863874344, |
|
"grad_norm": 2.3398188402263105, |
|
"learning_rate": 9.934583543669454e-06, |
|
"loss": 0.6583, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1480927449513837, |
|
"grad_norm": 2.1439110014914524, |
|
"learning_rate": 9.93030504578923e-06, |
|
"loss": 0.6413, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.14958863126402394, |
|
"grad_norm": 2.2275265346511377, |
|
"learning_rate": 9.925892009681762e-06, |
|
"loss": 0.6529, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15108451757666416, |
|
"grad_norm": 2.3496939081419637, |
|
"learning_rate": 9.921344555770033e-06, |
|
"loss": 0.6437, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.1525804038893044, |
|
"grad_norm": 2.238484219281493, |
|
"learning_rate": 9.916662808145023e-06, |
|
"loss": 0.6452, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15407629020194466, |
|
"grad_norm": 2.78908558811821, |
|
"learning_rate": 9.911846894562325e-06, |
|
"loss": 0.6436, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.15557217651458488, |
|
"grad_norm": 2.320928708686177, |
|
"learning_rate": 9.906896946438646e-06, |
|
"loss": 0.6336, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.15706806282722513, |
|
"grad_norm": 2.2586199846671686, |
|
"learning_rate": 9.901813098848238e-06, |
|
"loss": 0.6338, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.15856394913986538, |
|
"grad_norm": 2.3116521162760217, |
|
"learning_rate": 9.896595490519196e-06, |
|
"loss": 0.6414, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1600598354525056, |
|
"grad_norm": 2.1311643830360767, |
|
"learning_rate": 9.891244263829685e-06, |
|
"loss": 0.64, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.16155572176514585, |
|
"grad_norm": 2.3201652793369605, |
|
"learning_rate": 9.885759564804045e-06, |
|
"loss": 0.6197, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1630516080777861, |
|
"grad_norm": 2.1802123067545134, |
|
"learning_rate": 9.880141543108816e-06, |
|
"loss": 0.6354, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.16454749439042632, |
|
"grad_norm": 2.3111352831943086, |
|
"learning_rate": 9.874390352048646e-06, |
|
"loss": 0.6422, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.16604338070306657, |
|
"grad_norm": 2.3857931202103524, |
|
"learning_rate": 9.868506148562107e-06, |
|
"loss": 0.6255, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.16753926701570682, |
|
"grad_norm": 2.3118891681947518, |
|
"learning_rate": 9.862489093217422e-06, |
|
"loss": 0.6123, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.16903515332834704, |
|
"grad_norm": 2.3891897641974165, |
|
"learning_rate": 9.856339350208073e-06, |
|
"loss": 0.6426, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.1705310396409873, |
|
"grad_norm": 2.305906878734901, |
|
"learning_rate": 9.850057087348328e-06, |
|
"loss": 0.6199, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.17202692595362754, |
|
"grad_norm": 2.1960382748129432, |
|
"learning_rate": 9.843642476068654e-06, |
|
"loss": 0.6095, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.17352281226626776, |
|
"grad_norm": 2.0839495395902534, |
|
"learning_rate": 9.837095691411047e-06, |
|
"loss": 0.6131, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.175018698578908, |
|
"grad_norm": 2.4685394970589685, |
|
"learning_rate": 9.83041691202425e-06, |
|
"loss": 0.6257, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.17651458489154825, |
|
"grad_norm": 2.8548483464223957, |
|
"learning_rate": 9.82360632015888e-06, |
|
"loss": 0.5935, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.17801047120418848, |
|
"grad_norm": 2.5687866778693347, |
|
"learning_rate": 9.816664101662458e-06, |
|
"loss": 0.6176, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.17950635751682872, |
|
"grad_norm": 2.1643123544103497, |
|
"learning_rate": 9.809590445974328e-06, |
|
"loss": 0.6236, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.18100224382946897, |
|
"grad_norm": 2.1920911452788023, |
|
"learning_rate": 9.802385546120498e-06, |
|
"loss": 0.6149, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.1824981301421092, |
|
"grad_norm": 2.1719167623114046, |
|
"learning_rate": 9.795049598708369e-06, |
|
"loss": 0.6165, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18399401645474944, |
|
"grad_norm": 2.045624267196742, |
|
"learning_rate": 9.787582803921366e-06, |
|
"loss": 0.6056, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.1854899027673897, |
|
"grad_norm": 2.1670193890658105, |
|
"learning_rate": 9.77998536551348e-06, |
|
"loss": 0.583, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1869857890800299, |
|
"grad_norm": 2.143005021612413, |
|
"learning_rate": 9.77225749080371e-06, |
|
"loss": 0.6025, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.18848167539267016, |
|
"grad_norm": 2.2897606994593733, |
|
"learning_rate": 9.764399390670401e-06, |
|
"loss": 0.6044, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.18997756170531038, |
|
"grad_norm": 2.1407407791372304, |
|
"learning_rate": 9.756411279545486e-06, |
|
"loss": 0.6028, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.19147344801795063, |
|
"grad_norm": 2.1400040414477512, |
|
"learning_rate": 9.748293375408647e-06, |
|
"loss": 0.6008, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.19296933433059088, |
|
"grad_norm": 2.3487555741055646, |
|
"learning_rate": 9.740045899781353e-06, |
|
"loss": 0.5905, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.1944652206432311, |
|
"grad_norm": 2.211663714643132, |
|
"learning_rate": 9.731669077720828e-06, |
|
"loss": 0.5834, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.19596110695587135, |
|
"grad_norm": 2.188161715718423, |
|
"learning_rate": 9.723163137813898e-06, |
|
"loss": 0.5855, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.1974569932685116, |
|
"grad_norm": 2.133955120338045, |
|
"learning_rate": 9.714528312170762e-06, |
|
"loss": 0.5944, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.19895287958115182, |
|
"grad_norm": 2.2340780975578527, |
|
"learning_rate": 9.705764836418648e-06, |
|
"loss": 0.583, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.20044876589379207, |
|
"grad_norm": 2.3292781920189936, |
|
"learning_rate": 9.696872949695399e-06, |
|
"loss": 0.5827, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.20194465220643232, |
|
"grad_norm": 2.3176955302107647, |
|
"learning_rate": 9.687852894642932e-06, |
|
"loss": 0.584, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.20344053851907254, |
|
"grad_norm": 2.2410986216187863, |
|
"learning_rate": 9.678704917400628e-06, |
|
"loss": 0.5702, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2049364248317128, |
|
"grad_norm": 2.2113552696479766, |
|
"learning_rate": 9.669429267598603e-06, |
|
"loss": 0.5656, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.20643231114435304, |
|
"grad_norm": 2.1894234586204613, |
|
"learning_rate": 9.660026198350906e-06, |
|
"loss": 0.5688, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.20792819745699326, |
|
"grad_norm": 2.2894157314528183, |
|
"learning_rate": 9.650495966248618e-06, |
|
"loss": 0.5563, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.2094240837696335, |
|
"grad_norm": 2.2231586059805863, |
|
"learning_rate": 9.64083883135283e-06, |
|
"loss": 0.5642, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.21091997008227376, |
|
"grad_norm": 2.227615707267463, |
|
"learning_rate": 9.631055057187564e-06, |
|
"loss": 0.5788, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.21241585639491398, |
|
"grad_norm": 2.155741018622304, |
|
"learning_rate": 9.621144910732573e-06, |
|
"loss": 0.5634, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21391174270755423, |
|
"grad_norm": 2.396343334926677, |
|
"learning_rate": 9.611108662416064e-06, |
|
"loss": 0.5655, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.21540762902019447, |
|
"grad_norm": 2.331449791458783, |
|
"learning_rate": 9.600946586107306e-06, |
|
"loss": 0.5739, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2169035153328347, |
|
"grad_norm": 2.2507152546219924, |
|
"learning_rate": 9.590658959109168e-06, |
|
"loss": 0.5768, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.21839940164547494, |
|
"grad_norm": 2.164980578292193, |
|
"learning_rate": 9.58024606215055e-06, |
|
"loss": 0.5517, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2198952879581152, |
|
"grad_norm": 2.2186056393230484, |
|
"learning_rate": 9.569708179378716e-06, |
|
"loss": 0.5773, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.22139117427075541, |
|
"grad_norm": 2.1412265933937245, |
|
"learning_rate": 9.559045598351544e-06, |
|
"loss": 0.5597, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.22288706058339566, |
|
"grad_norm": 2.113998854082962, |
|
"learning_rate": 9.548258610029684e-06, |
|
"loss": 0.5602, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.2243829468960359, |
|
"grad_norm": 2.1066935794719823, |
|
"learning_rate": 9.537347508768613e-06, |
|
"loss": 0.553, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.22587883320867613, |
|
"grad_norm": 2.1269652854319285, |
|
"learning_rate": 9.526312592310597e-06, |
|
"loss": 0.5462, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.22737471952131638, |
|
"grad_norm": 2.1421869014604966, |
|
"learning_rate": 9.515154161776584e-06, |
|
"loss": 0.5508, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.22887060583395663, |
|
"grad_norm": 2.116284198421969, |
|
"learning_rate": 9.503872521657964e-06, |
|
"loss": 0.549, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.23036649214659685, |
|
"grad_norm": 2.0774732327342673, |
|
"learning_rate": 9.49246797980828e-06, |
|
"loss": 0.5485, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2318623784592371, |
|
"grad_norm": 2.276120847003367, |
|
"learning_rate": 9.480940847434814e-06, |
|
"loss": 0.5553, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.23335826477187735, |
|
"grad_norm": 2.1356056201671882, |
|
"learning_rate": 9.469291439090104e-06, |
|
"loss": 0.5465, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.23485415108451757, |
|
"grad_norm": 2.048373811826588, |
|
"learning_rate": 9.457520072663353e-06, |
|
"loss": 0.5396, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.23635003739715782, |
|
"grad_norm": 2.2466734007706397, |
|
"learning_rate": 9.445627069371758e-06, |
|
"loss": 0.5688, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.23784592370979807, |
|
"grad_norm": 2.3976619549715292, |
|
"learning_rate": 9.433612753751748e-06, |
|
"loss": 0.5496, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.2393418100224383, |
|
"grad_norm": 2.0982203268057793, |
|
"learning_rate": 9.421477453650118e-06, |
|
"loss": 0.5482, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.24083769633507854, |
|
"grad_norm": 2.1926594347223936, |
|
"learning_rate": 9.409221500215096e-06, |
|
"loss": 0.5281, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.2423335826477188, |
|
"grad_norm": 2.046500172753204, |
|
"learning_rate": 9.396845227887295e-06, |
|
"loss": 0.5495, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.243829468960359, |
|
"grad_norm": 2.116270403530158, |
|
"learning_rate": 9.38434897439059e-06, |
|
"loss": 0.5333, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.24532535527299926, |
|
"grad_norm": 2.1427393113292026, |
|
"learning_rate": 9.371733080722911e-06, |
|
"loss": 0.5314, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.24682124158563948, |
|
"grad_norm": 2.2287931226941766, |
|
"learning_rate": 9.358997891146924e-06, |
|
"loss": 0.5389, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.24831712789827973, |
|
"grad_norm": 2.183511996335904, |
|
"learning_rate": 9.346143753180646e-06, |
|
"loss": 0.5332, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.24981301421091998, |
|
"grad_norm": 2.1563125330336077, |
|
"learning_rate": 9.333171017587956e-06, |
|
"loss": 0.5278, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.2513089005235602, |
|
"grad_norm": 2.384672087516804, |
|
"learning_rate": 9.320080038369032e-06, |
|
"loss": 0.5321, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.25280478683620045, |
|
"grad_norm": 2.2250998536771154, |
|
"learning_rate": 9.30687117275068e-06, |
|
"loss": 0.5237, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.2543006731488407, |
|
"grad_norm": 2.3295538202244237, |
|
"learning_rate": 9.293544781176598e-06, |
|
"loss": 0.5238, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.25579655946148094, |
|
"grad_norm": 2.46386287871832, |
|
"learning_rate": 9.280101227297526e-06, |
|
"loss": 0.5274, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.2572924457741212, |
|
"grad_norm": 2.2480305463427865, |
|
"learning_rate": 9.266540877961337e-06, |
|
"loss": 0.535, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2587883320867614, |
|
"grad_norm": 2.1850110027540826, |
|
"learning_rate": 9.252864103203015e-06, |
|
"loss": 0.5216, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.26028421839940163, |
|
"grad_norm": 2.1759114077528845, |
|
"learning_rate": 9.239071276234568e-06, |
|
"loss": 0.5162, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2617801047120419, |
|
"grad_norm": 2.1338769320741515, |
|
"learning_rate": 9.225162773434831e-06, |
|
"loss": 0.5143, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.26327599102468213, |
|
"grad_norm": 2.1659203361390063, |
|
"learning_rate": 9.21113897433921e-06, |
|
"loss": 0.5103, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2647718773373224, |
|
"grad_norm": 2.122282430960376, |
|
"learning_rate": 9.197000261629314e-06, |
|
"loss": 0.5081, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.26626776364996263, |
|
"grad_norm": 2.056748593014802, |
|
"learning_rate": 9.182747021122516e-06, |
|
"loss": 0.5117, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2677636499626028, |
|
"grad_norm": 2.203097118962648, |
|
"learning_rate": 9.168379641761425e-06, |
|
"loss": 0.5166, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.26925953627524307, |
|
"grad_norm": 2.248299702751712, |
|
"learning_rate": 9.153898515603272e-06, |
|
"loss": 0.5121, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2707554225878833, |
|
"grad_norm": 2.215308947297488, |
|
"learning_rate": 9.139304037809216e-06, |
|
"loss": 0.5151, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.27225130890052357, |
|
"grad_norm": 2.115586467592, |
|
"learning_rate": 9.124596606633551e-06, |
|
"loss": 0.5083, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2737471952131638, |
|
"grad_norm": 2.2977950459018017, |
|
"learning_rate": 9.10977662341285e-06, |
|
"loss": 0.5153, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.275243081525804, |
|
"grad_norm": 2.205780583800523, |
|
"learning_rate": 9.094844492555004e-06, |
|
"loss": 0.5123, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.27673896783844426, |
|
"grad_norm": 2.227802917043228, |
|
"learning_rate": 9.07980062152819e-06, |
|
"loss": 0.5117, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.2782348541510845, |
|
"grad_norm": 2.2359783620231632, |
|
"learning_rate": 9.064645420849754e-06, |
|
"loss": 0.5022, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.27973074046372476, |
|
"grad_norm": 2.1642613110172366, |
|
"learning_rate": 9.049379304075009e-06, |
|
"loss": 0.4907, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.281226626776365, |
|
"grad_norm": 2.2277389804733447, |
|
"learning_rate": 9.03400268778594e-06, |
|
"loss": 0.5011, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.28272251308900526, |
|
"grad_norm": 2.1493583853918907, |
|
"learning_rate": 9.018515991579851e-06, |
|
"loss": 0.5019, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.28421839940164545, |
|
"grad_norm": 2.4395894627674073, |
|
"learning_rate": 9.002919638057908e-06, |
|
"loss": 0.5033, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 2.2370400153506806, |
|
"learning_rate": 8.987214052813605e-06, |
|
"loss": 0.5045, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.28721017202692595, |
|
"grad_norm": 2.078576437577485, |
|
"learning_rate": 8.971399664421154e-06, |
|
"loss": 0.5009, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.2887060583395662, |
|
"grad_norm": 2.2142839400817937, |
|
"learning_rate": 8.955476904423785e-06, |
|
"loss": 0.5023, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.29020194465220644, |
|
"grad_norm": 2.14232609513754, |
|
"learning_rate": 8.939446207321982e-06, |
|
"loss": 0.477, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2916978309648467, |
|
"grad_norm": 2.21107323554905, |
|
"learning_rate": 8.923308010561608e-06, |
|
"loss": 0.4994, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2931937172774869, |
|
"grad_norm": 2.1386395431438054, |
|
"learning_rate": 8.907062754521985e-06, |
|
"loss": 0.5023, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.29468960359012714, |
|
"grad_norm": 2.1332355719651037, |
|
"learning_rate": 8.89071088250387e-06, |
|
"loss": 0.4843, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.2961854899027674, |
|
"grad_norm": 2.0749503641930276, |
|
"learning_rate": 8.87425284071735e-06, |
|
"loss": 0.4942, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.29768137621540763, |
|
"grad_norm": 2.159991846647922, |
|
"learning_rate": 8.857689078269688e-06, |
|
"loss": 0.5108, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.2991772625280479, |
|
"grad_norm": 2.1267522505598446, |
|
"learning_rate": 8.841020047153039e-06, |
|
"loss": 0.4935, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.30067314884068813, |
|
"grad_norm": 2.1642503588715245, |
|
"learning_rate": 8.824246202232142e-06, |
|
"loss": 0.4907, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.3021690351533283, |
|
"grad_norm": 2.084991570149356, |
|
"learning_rate": 8.80736800123189e-06, |
|
"loss": 0.4781, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.3036649214659686, |
|
"grad_norm": 2.1035440822771223, |
|
"learning_rate": 8.790385904724848e-06, |
|
"loss": 0.4845, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.3051608077786088, |
|
"grad_norm": 2.1736909744601687, |
|
"learning_rate": 8.773300376118685e-06, |
|
"loss": 0.4801, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.30665669409124907, |
|
"grad_norm": 2.2520314938860815, |
|
"learning_rate": 8.75611188164352e-06, |
|
"loss": 0.4893, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.3081525804038893, |
|
"grad_norm": 2.1104641749948403, |
|
"learning_rate": 8.738820890339217e-06, |
|
"loss": 0.4938, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.30964846671652957, |
|
"grad_norm": 2.0838403753220986, |
|
"learning_rate": 8.721427874042563e-06, |
|
"loss": 0.4835, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.31114435302916976, |
|
"grad_norm": 2.0711510810184266, |
|
"learning_rate": 8.703933307374413e-06, |
|
"loss": 0.4725, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.31264023934181, |
|
"grad_norm": 2.1063779245743888, |
|
"learning_rate": 8.686337667726723e-06, |
|
"loss": 0.4892, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.31413612565445026, |
|
"grad_norm": 2.1105067703269422, |
|
"learning_rate": 8.668641435249534e-06, |
|
"loss": 0.4825, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3156320119670905, |
|
"grad_norm": 2.102573408737706, |
|
"learning_rate": 8.650845092837867e-06, |
|
"loss": 0.4885, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.31712789827973076, |
|
"grad_norm": 2.2988609972066274, |
|
"learning_rate": 8.632949126118538e-06, |
|
"loss": 0.4752, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.318623784592371, |
|
"grad_norm": 2.122502919871484, |
|
"learning_rate": 8.61495402343692e-06, |
|
"loss": 0.4769, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.3201196709050112, |
|
"grad_norm": 2.165018274340972, |
|
"learning_rate": 8.596860275843602e-06, |
|
"loss": 0.4671, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.32161555721765145, |
|
"grad_norm": 1.9717223958070753, |
|
"learning_rate": 8.578668377081001e-06, |
|
"loss": 0.4675, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.3231114435302917, |
|
"grad_norm": 2.1031743583556803, |
|
"learning_rate": 8.560378823569886e-06, |
|
"loss": 0.4713, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.32460732984293195, |
|
"grad_norm": 2.0178473800411307, |
|
"learning_rate": 8.541992114395825e-06, |
|
"loss": 0.4715, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.3261032161555722, |
|
"grad_norm": 2.0225831073597007, |
|
"learning_rate": 8.523508751295574e-06, |
|
"loss": 0.4772, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3275991024682124, |
|
"grad_norm": 2.087877364586164, |
|
"learning_rate": 8.504929238643381e-06, |
|
"loss": 0.4688, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.32909498878085264, |
|
"grad_norm": 2.160270876260719, |
|
"learning_rate": 8.486254083437227e-06, |
|
"loss": 0.4665, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3305908750934929, |
|
"grad_norm": 2.060627567407879, |
|
"learning_rate": 8.467483795284987e-06, |
|
"loss": 0.4617, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.33208676140613314, |
|
"grad_norm": 2.1150833498354893, |
|
"learning_rate": 8.448618886390523e-06, |
|
"loss": 0.4676, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3335826477187734, |
|
"grad_norm": 2.026961078510351, |
|
"learning_rate": 8.429659871539709e-06, |
|
"loss": 0.4772, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.33507853403141363, |
|
"grad_norm": 2.6128907908421852, |
|
"learning_rate": 8.410607268086388e-06, |
|
"loss": 0.4678, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3365744203440538, |
|
"grad_norm": 2.1162204368840185, |
|
"learning_rate": 8.391461595938245e-06, |
|
"loss": 0.4728, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.3380703066566941, |
|
"grad_norm": 2.0236924938571095, |
|
"learning_rate": 8.372223377542631e-06, |
|
"loss": 0.4556, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3395661929693343, |
|
"grad_norm": 2.0470011404134345, |
|
"learning_rate": 8.352893137872292e-06, |
|
"loss": 0.4476, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.3410620792819746, |
|
"grad_norm": 2.100726525573022, |
|
"learning_rate": 8.333471404411054e-06, |
|
"loss": 0.458, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3425579655946148, |
|
"grad_norm": 2.162727675316811, |
|
"learning_rate": 8.313958707139434e-06, |
|
"loss": 0.4751, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.34405385190725507, |
|
"grad_norm": 2.0116970709952495, |
|
"learning_rate": 8.29435557852016e-06, |
|
"loss": 0.4647, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.34554973821989526, |
|
"grad_norm": 1.9814728402387116, |
|
"learning_rate": 8.274662553483662e-06, |
|
"loss": 0.4441, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.3470456245325355, |
|
"grad_norm": 2.0453867973962607, |
|
"learning_rate": 8.254880169413455e-06, |
|
"loss": 0.4613, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.34854151084517576, |
|
"grad_norm": 2.0854663750868268, |
|
"learning_rate": 8.235008966131492e-06, |
|
"loss": 0.456, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.350037397157816, |
|
"grad_norm": 2.1204249951123706, |
|
"learning_rate": 8.215049485883419e-06, |
|
"loss": 0.4526, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.35153328347045626, |
|
"grad_norm": 2.125080372850005, |
|
"learning_rate": 8.195002273323792e-06, |
|
"loss": 0.4442, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.3530291697830965, |
|
"grad_norm": 2.2626876387499224, |
|
"learning_rate": 8.174867875501203e-06, |
|
"loss": 0.4491, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3545250560957367, |
|
"grad_norm": 2.17644103793076, |
|
"learning_rate": 8.154646841843358e-06, |
|
"loss": 0.449, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.35602094240837695, |
|
"grad_norm": 1.9934405786856697, |
|
"learning_rate": 8.134339724142083e-06, |
|
"loss": 0.4491, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3575168287210172, |
|
"grad_norm": 1.9811124546772585, |
|
"learning_rate": 8.113947076538264e-06, |
|
"loss": 0.4412, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.35901271503365745, |
|
"grad_norm": 2.1197485018681785, |
|
"learning_rate": 8.093469455506731e-06, |
|
"loss": 0.4448, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3605086013462977, |
|
"grad_norm": 2.0582968984341967, |
|
"learning_rate": 8.07290741984107e-06, |
|
"loss": 0.4397, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.36200448765893795, |
|
"grad_norm": 1.9803742197531462, |
|
"learning_rate": 8.052261530638375e-06, |
|
"loss": 0.4486, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.36350037397157814, |
|
"grad_norm": 1.9763814917893987, |
|
"learning_rate": 8.03153235128393e-06, |
|
"loss": 0.4379, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.3649962602842184, |
|
"grad_norm": 2.177684384739003, |
|
"learning_rate": 8.01072044743585e-06, |
|
"loss": 0.4448, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.36649214659685864, |
|
"grad_norm": 2.2177683069308047, |
|
"learning_rate": 7.989826387009634e-06, |
|
"loss": 0.4398, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.3679880329094989, |
|
"grad_norm": 2.0614298881537416, |
|
"learning_rate": 7.96885074016267e-06, |
|
"loss": 0.438, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.36948391922213913, |
|
"grad_norm": 2.063175118233129, |
|
"learning_rate": 7.947794079278678e-06, |
|
"loss": 0.4353, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.3709798055347794, |
|
"grad_norm": 2.0902885795644943, |
|
"learning_rate": 7.926656978952089e-06, |
|
"loss": 0.4369, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3724756918474196, |
|
"grad_norm": 2.081819065453435, |
|
"learning_rate": 7.905440015972372e-06, |
|
"loss": 0.4392, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.3739715781600598, |
|
"grad_norm": 1.9635390617281576, |
|
"learning_rate": 7.884143769308276e-06, |
|
"loss": 0.4318, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3754674644727001, |
|
"grad_norm": 2.010397135845292, |
|
"learning_rate": 7.862768820092061e-06, |
|
"loss": 0.4294, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.3769633507853403, |
|
"grad_norm": 2.120029095014225, |
|
"learning_rate": 7.84131575160361e-06, |
|
"loss": 0.4367, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.37845923709798057, |
|
"grad_norm": 2.047223712557703, |
|
"learning_rate": 7.819785149254534e-06, |
|
"loss": 0.4247, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.37995512341062077, |
|
"grad_norm": 2.1565665198769546, |
|
"learning_rate": 7.798177600572184e-06, |
|
"loss": 0.4545, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.381451009723261, |
|
"grad_norm": 1.9698630282226646, |
|
"learning_rate": 7.776493695183623e-06, |
|
"loss": 0.4327, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.38294689603590126, |
|
"grad_norm": 2.027501209185265, |
|
"learning_rate": 7.754734024799544e-06, |
|
"loss": 0.4378, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3844427823485415, |
|
"grad_norm": 1.9336783003915325, |
|
"learning_rate": 7.732899183198108e-06, |
|
"loss": 0.4199, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.38593866866118176, |
|
"grad_norm": 2.074909881667748, |
|
"learning_rate": 7.710989766208751e-06, |
|
"loss": 0.431, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.387434554973822, |
|
"grad_norm": 2.08466673344805, |
|
"learning_rate": 7.689006371695928e-06, |
|
"loss": 0.436, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.3889304412864622, |
|
"grad_norm": 2.0101045976441334, |
|
"learning_rate": 7.666949599542788e-06, |
|
"loss": 0.4363, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.39042632759910245, |
|
"grad_norm": 2.1388630620219304, |
|
"learning_rate": 7.644820051634813e-06, |
|
"loss": 0.4353, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.3919222139117427, |
|
"grad_norm": 1.9897181694789714, |
|
"learning_rate": 7.62261833184339e-06, |
|
"loss": 0.4321, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.39341810022438295, |
|
"grad_norm": 2.069750404086554, |
|
"learning_rate": 7.60034504600933e-06, |
|
"loss": 0.4166, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.3949139865370232, |
|
"grad_norm": 2.0828214162126564, |
|
"learning_rate": 7.5780008019263465e-06, |
|
"loss": 0.4309, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.39640987284966345, |
|
"grad_norm": 2.1311064881304183, |
|
"learning_rate": 7.555586209324455e-06, |
|
"loss": 0.42, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.39790575916230364, |
|
"grad_norm": 2.0067032988225715, |
|
"learning_rate": 7.533101879853348e-06, |
|
"loss": 0.4247, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3994016454749439, |
|
"grad_norm": 2.1601395941384514, |
|
"learning_rate": 7.510548427065693e-06, |
|
"loss": 0.4103, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.40089753178758414, |
|
"grad_norm": 2.0545268261654166, |
|
"learning_rate": 7.487926466400403e-06, |
|
"loss": 0.418, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4023934181002244, |
|
"grad_norm": 2.029856636678106, |
|
"learning_rate": 7.465236615165826e-06, |
|
"loss": 0.4265, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.40388930441286464, |
|
"grad_norm": 1.9396811090214083, |
|
"learning_rate": 7.4424794925229175e-06, |
|
"loss": 0.4241, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4053851907255049, |
|
"grad_norm": 2.073788987162284, |
|
"learning_rate": 7.4196557194683265e-06, |
|
"loss": 0.4039, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.4068810770381451, |
|
"grad_norm": 2.070263015501858, |
|
"learning_rate": 7.3967659188174676e-06, |
|
"loss": 0.4331, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.4083769633507853, |
|
"grad_norm": 1.957024406881209, |
|
"learning_rate": 7.373810715187516e-06, |
|
"loss": 0.4198, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.4098728496634256, |
|
"grad_norm": 2.0021094595131705, |
|
"learning_rate": 7.350790734980359e-06, |
|
"loss": 0.4138, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.4113687359760658, |
|
"grad_norm": 2.038893591791927, |
|
"learning_rate": 7.327706606365512e-06, |
|
"loss": 0.4099, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.4128646222887061, |
|
"grad_norm": 2.091182328954734, |
|
"learning_rate": 7.304558959262973e-06, |
|
"loss": 0.4091, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4143605086013463, |
|
"grad_norm": 2.005484469630839, |
|
"learning_rate": 7.281348425326034e-06, |
|
"loss": 0.4071, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.4158563949139865, |
|
"grad_norm": 2.000171729890043, |
|
"learning_rate": 7.258075637924039e-06, |
|
"loss": 0.4077, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.41735228122662676, |
|
"grad_norm": 1.88335343776708, |
|
"learning_rate": 7.234741232125111e-06, |
|
"loss": 0.4106, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.418848167539267, |
|
"grad_norm": 2.041697368575073, |
|
"learning_rate": 7.211345844678816e-06, |
|
"loss": 0.4124, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.42034405385190726, |
|
"grad_norm": 2.1120074891606313, |
|
"learning_rate": 7.1878901139987826e-06, |
|
"loss": 0.414, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.4218399401645475, |
|
"grad_norm": 2.017409414749495, |
|
"learning_rate": 7.164374680145293e-06, |
|
"loss": 0.4038, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.42333582647718776, |
|
"grad_norm": 2.0432465019716144, |
|
"learning_rate": 7.140800184807805e-06, |
|
"loss": 0.4073, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.42483171278982795, |
|
"grad_norm": 2.060077990063716, |
|
"learning_rate": 7.117167271287453e-06, |
|
"loss": 0.4068, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.4263275991024682, |
|
"grad_norm": 2.027592571205212, |
|
"learning_rate": 7.09347658447948e-06, |
|
"loss": 0.4042, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.42782348541510845, |
|
"grad_norm": 2.040823863949173, |
|
"learning_rate": 7.069728770855652e-06, |
|
"loss": 0.4034, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4293193717277487, |
|
"grad_norm": 2.1465152715010722, |
|
"learning_rate": 7.0459244784466115e-06, |
|
"loss": 0.4049, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.43081525804038895, |
|
"grad_norm": 2.017024929241199, |
|
"learning_rate": 7.022064356824196e-06, |
|
"loss": 0.4051, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4323111443530292, |
|
"grad_norm": 1.9756966229288817, |
|
"learning_rate": 6.998149057083711e-06, |
|
"loss": 0.3991, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.4338070306656694, |
|
"grad_norm": 1.9869718270881975, |
|
"learning_rate": 6.9741792318261585e-06, |
|
"loss": 0.4029, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.43530291697830964, |
|
"grad_norm": 2.1015981628011136, |
|
"learning_rate": 6.950155535140439e-06, |
|
"loss": 0.3998, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.4367988032909499, |
|
"grad_norm": 2.1512869214406174, |
|
"learning_rate": 6.926078622585496e-06, |
|
"loss": 0.4001, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.43829468960359014, |
|
"grad_norm": 2.0152270376530677, |
|
"learning_rate": 6.901949151172427e-06, |
|
"loss": 0.4047, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.4397905759162304, |
|
"grad_norm": 2.11665136839116, |
|
"learning_rate": 6.877767779346556e-06, |
|
"loss": 0.4064, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4412864622288706, |
|
"grad_norm": 2.231208727114714, |
|
"learning_rate": 6.8535351669694694e-06, |
|
"loss": 0.3884, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.44278234854151083, |
|
"grad_norm": 1.9444993004804072, |
|
"learning_rate": 6.829251975301003e-06, |
|
"loss": 0.3949, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4442782348541511, |
|
"grad_norm": 1.98272069907838, |
|
"learning_rate": 6.8049188669812024e-06, |
|
"loss": 0.395, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.4457741211667913, |
|
"grad_norm": 1.9120999593676538, |
|
"learning_rate": 6.7805365060122386e-06, |
|
"loss": 0.3968, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4472700074794316, |
|
"grad_norm": 2.0053365034386186, |
|
"learning_rate": 6.756105557740289e-06, |
|
"loss": 0.402, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.4487658937920718, |
|
"grad_norm": 1.9514629474872618, |
|
"learning_rate": 6.731626688837387e-06, |
|
"loss": 0.3836, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.450261780104712, |
|
"grad_norm": 2.1087506038221955, |
|
"learning_rate": 6.707100567283217e-06, |
|
"loss": 0.3843, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.45175766641735227, |
|
"grad_norm": 2.1300871436189306, |
|
"learning_rate": 6.682527862346898e-06, |
|
"loss": 0.3996, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.4532535527299925, |
|
"grad_norm": 1.9854434493239195, |
|
"learning_rate": 6.657909244568721e-06, |
|
"loss": 0.4011, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.45474943904263276, |
|
"grad_norm": 1.9814246083045182, |
|
"learning_rate": 6.6332453857418375e-06, |
|
"loss": 0.4012, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.456245325355273, |
|
"grad_norm": 2.023928605650618, |
|
"learning_rate": 6.608536958893948e-06, |
|
"loss": 0.3962, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.45774121166791326, |
|
"grad_norm": 2.012248063709598, |
|
"learning_rate": 6.583784638268919e-06, |
|
"loss": 0.4001, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.45923709798055345, |
|
"grad_norm": 1.9208473033828253, |
|
"learning_rate": 6.5589890993083934e-06, |
|
"loss": 0.3965, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.4607329842931937, |
|
"grad_norm": 2.1713469007968476, |
|
"learning_rate": 6.534151018633355e-06, |
|
"loss": 0.3962, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.46222887060583395, |
|
"grad_norm": 2.079357026566145, |
|
"learning_rate": 6.509271074025668e-06, |
|
"loss": 0.3913, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.4637247569184742, |
|
"grad_norm": 1.9619607602752462, |
|
"learning_rate": 6.484349944409579e-06, |
|
"loss": 0.3885, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.46522064323111445, |
|
"grad_norm": 2.025383663450042, |
|
"learning_rate": 6.459388309833193e-06, |
|
"loss": 0.396, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.4667165295437547, |
|
"grad_norm": 1.9926140480691588, |
|
"learning_rate": 6.434386851449914e-06, |
|
"loss": 0.3978, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4682124158563949, |
|
"grad_norm": 2.0001783057698677, |
|
"learning_rate": 6.409346251499859e-06, |
|
"loss": 0.3889, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.46970830216903514, |
|
"grad_norm": 1.9977538629610117, |
|
"learning_rate": 6.384267193291238e-06, |
|
"loss": 0.3872, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4712041884816754, |
|
"grad_norm": 1.9725560974868908, |
|
"learning_rate": 6.3591503611817155e-06, |
|
"loss": 0.39, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.47270007479431564, |
|
"grad_norm": 1.9326640130746877, |
|
"learning_rate": 6.333996440559726e-06, |
|
"loss": 0.3815, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4741959611069559, |
|
"grad_norm": 1.9055402813860574, |
|
"learning_rate": 6.308806117825777e-06, |
|
"loss": 0.3801, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.47569184741959614, |
|
"grad_norm": 1.9722002752461958, |
|
"learning_rate": 6.283580080373721e-06, |
|
"loss": 0.3804, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.47718773373223633, |
|
"grad_norm": 2.0082373206027526, |
|
"learning_rate": 6.25831901657199e-06, |
|
"loss": 0.3775, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.4786836200448766, |
|
"grad_norm": 1.9266846313881612, |
|
"learning_rate": 6.233023615744813e-06, |
|
"loss": 0.3883, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4801795063575168, |
|
"grad_norm": 2.088263839026747, |
|
"learning_rate": 6.207694568153418e-06, |
|
"loss": 0.389, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.4816753926701571, |
|
"grad_norm": 2.1042065332247555, |
|
"learning_rate": 6.182332564977174e-06, |
|
"loss": 0.3792, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4831712789827973, |
|
"grad_norm": 1.9720059814432505, |
|
"learning_rate": 6.156938298294752e-06, |
|
"loss": 0.3706, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.4846671652954376, |
|
"grad_norm": 1.9180314469419848, |
|
"learning_rate": 6.131512461065227e-06, |
|
"loss": 0.377, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.48616305160807777, |
|
"grad_norm": 2.2323059057893775, |
|
"learning_rate": 6.106055747109169e-06, |
|
"loss": 0.3737, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.487658937920718, |
|
"grad_norm": 1.9094100696871863, |
|
"learning_rate": 6.080568851089717e-06, |
|
"loss": 0.381, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.48915482423335827, |
|
"grad_norm": 1.8740312923707445, |
|
"learning_rate": 6.055052468493614e-06, |
|
"loss": 0.3712, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.4906507105459985, |
|
"grad_norm": 2.2274282125289364, |
|
"learning_rate": 6.029507295612235e-06, |
|
"loss": 0.3818, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.49214659685863876, |
|
"grad_norm": 1.9574498076717952, |
|
"learning_rate": 6.0039340295225845e-06, |
|
"loss": 0.3808, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.49364248317127896, |
|
"grad_norm": 2.019106731639845, |
|
"learning_rate": 5.978333368068278e-06, |
|
"loss": 0.3739, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4951383694839192, |
|
"grad_norm": 2.003448813298111, |
|
"learning_rate": 5.952706009840491e-06, |
|
"loss": 0.3801, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.49663425579655945, |
|
"grad_norm": 2.033251099732251, |
|
"learning_rate": 5.9270526541589025e-06, |
|
"loss": 0.3719, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4981301421091997, |
|
"grad_norm": 1.9736607384350244, |
|
"learning_rate": 5.901374001052614e-06, |
|
"loss": 0.3647, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.49962602842183995, |
|
"grad_norm": 1.8450885851841383, |
|
"learning_rate": 5.875670751241036e-06, |
|
"loss": 0.3694, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.5011219147344802, |
|
"grad_norm": 1.9550125676086019, |
|
"learning_rate": 5.849943606114782e-06, |
|
"loss": 0.3765, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.5026178010471204, |
|
"grad_norm": 1.9088487115244133, |
|
"learning_rate": 5.824193267716517e-06, |
|
"loss": 0.3628, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5041136873597607, |
|
"grad_norm": 2.0236760384942887, |
|
"learning_rate": 5.798420438721804e-06, |
|
"loss": 0.3681, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.5056095736724009, |
|
"grad_norm": 1.8739105083496626, |
|
"learning_rate": 5.772625822419933e-06, |
|
"loss": 0.3626, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5071054599850411, |
|
"grad_norm": 2.0538294961575048, |
|
"learning_rate": 5.74681012269472e-06, |
|
"loss": 0.3664, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.5086013462976814, |
|
"grad_norm": 1.9510478793415906, |
|
"learning_rate": 5.720974044005314e-06, |
|
"loss": 0.3687, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5100972326103216, |
|
"grad_norm": 2.0027682706640206, |
|
"learning_rate": 5.695118291366959e-06, |
|
"loss": 0.3791, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.5115931189229619, |
|
"grad_norm": 2.017422075178467, |
|
"learning_rate": 5.669243570331766e-06, |
|
"loss": 0.3592, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5130890052356021, |
|
"grad_norm": 1.9298103072373924, |
|
"learning_rate": 5.643350586969453e-06, |
|
"loss": 0.3624, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.5145848915482424, |
|
"grad_norm": 1.828229384099037, |
|
"learning_rate": 5.617440047848081e-06, |
|
"loss": 0.3693, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5160807778608826, |
|
"grad_norm": 1.9835871613164413, |
|
"learning_rate": 5.591512660014773e-06, |
|
"loss": 0.367, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.5175766641735228, |
|
"grad_norm": 1.906111097537283, |
|
"learning_rate": 5.5655691309764225e-06, |
|
"loss": 0.3698, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5190725504861631, |
|
"grad_norm": 1.8980070836105973, |
|
"learning_rate": 5.539610168680381e-06, |
|
"loss": 0.3617, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.5205684367988033, |
|
"grad_norm": 1.929980721348062, |
|
"learning_rate": 5.513636481495143e-06, |
|
"loss": 0.3603, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5220643231114436, |
|
"grad_norm": 1.91015401663393, |
|
"learning_rate": 5.487648778191021e-06, |
|
"loss": 0.3533, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.5235602094240838, |
|
"grad_norm": 1.9455506909545779, |
|
"learning_rate": 5.4616477679207906e-06, |
|
"loss": 0.3746, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.525056095736724, |
|
"grad_norm": 1.8552115044332138, |
|
"learning_rate": 5.435634160200355e-06, |
|
"loss": 0.3583, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.5265519820493643, |
|
"grad_norm": 1.913776238110964, |
|
"learning_rate": 5.409608664889376e-06, |
|
"loss": 0.3571, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5280478683620045, |
|
"grad_norm": 1.9566204864416041, |
|
"learning_rate": 5.383571992171904e-06, |
|
"loss": 0.3681, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.5295437546746448, |
|
"grad_norm": 2.0484694098984813, |
|
"learning_rate": 5.357524852536996e-06, |
|
"loss": 0.3579, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.531039640987285, |
|
"grad_norm": 1.9124761975111415, |
|
"learning_rate": 5.331467956759331e-06, |
|
"loss": 0.3508, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.5325355272999253, |
|
"grad_norm": 1.9151628917936323, |
|
"learning_rate": 5.305402015879817e-06, |
|
"loss": 0.3582, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5340314136125655, |
|
"grad_norm": 1.8760817819604374, |
|
"learning_rate": 5.279327741186179e-06, |
|
"loss": 0.3607, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.5355272999252056, |
|
"grad_norm": 1.961131431192389, |
|
"learning_rate": 5.253245844193564e-06, |
|
"loss": 0.3517, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.537023186237846, |
|
"grad_norm": 1.971571895204417, |
|
"learning_rate": 5.227157036625108e-06, |
|
"loss": 0.3456, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.5385190725504861, |
|
"grad_norm": 1.8838335367241383, |
|
"learning_rate": 5.2010620303925275e-06, |
|
"loss": 0.3519, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5400149588631264, |
|
"grad_norm": 1.829377568323147, |
|
"learning_rate": 5.174961537576685e-06, |
|
"loss": 0.3564, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.5415108451757666, |
|
"grad_norm": 1.8522486080816014, |
|
"learning_rate": 5.148856270408163e-06, |
|
"loss": 0.3568, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5430067314884068, |
|
"grad_norm": 1.938579795945218, |
|
"learning_rate": 5.122746941247828e-06, |
|
"loss": 0.3607, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.5445026178010471, |
|
"grad_norm": 1.8962553032833915, |
|
"learning_rate": 5.096634262567388e-06, |
|
"loss": 0.3578, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5459985041136873, |
|
"grad_norm": 1.7953489371783111, |
|
"learning_rate": 5.070518946929954e-06, |
|
"loss": 0.3495, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.5474943904263276, |
|
"grad_norm": 1.9518780090135102, |
|
"learning_rate": 5.044401706970592e-06, |
|
"loss": 0.3558, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5489902767389678, |
|
"grad_norm": 2.029523910683152, |
|
"learning_rate": 5.018283255376882e-06, |
|
"loss": 0.3505, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.550486163051608, |
|
"grad_norm": 1.9831397143651377, |
|
"learning_rate": 4.992164304869464e-06, |
|
"loss": 0.3569, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5519820493642483, |
|
"grad_norm": 2.1279272506945075, |
|
"learning_rate": 4.966045568182596e-06, |
|
"loss": 0.3372, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.5534779356768885, |
|
"grad_norm": 1.9637293854690605, |
|
"learning_rate": 4.939927758044698e-06, |
|
"loss": 0.3518, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5549738219895288, |
|
"grad_norm": 2.0828701986556695, |
|
"learning_rate": 4.913811587158908e-06, |
|
"loss": 0.3443, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.556469708302169, |
|
"grad_norm": 1.981630887644782, |
|
"learning_rate": 4.887697768183633e-06, |
|
"loss": 0.3444, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5579655946148093, |
|
"grad_norm": 1.8743980292802156, |
|
"learning_rate": 4.861587013713096e-06, |
|
"loss": 0.346, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.5594614809274495, |
|
"grad_norm": 1.9064350126377236, |
|
"learning_rate": 4.835480036257904e-06, |
|
"loss": 0.3467, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5609573672400897, |
|
"grad_norm": 1.8972548935569284, |
|
"learning_rate": 4.809377548225589e-06, |
|
"loss": 0.3388, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.56245325355273, |
|
"grad_norm": 1.8677668781712522, |
|
"learning_rate": 4.783280261901179e-06, |
|
"loss": 0.3442, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5639491398653702, |
|
"grad_norm": 1.900487755648876, |
|
"learning_rate": 4.757188889427761e-06, |
|
"loss": 0.3389, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.5654450261780105, |
|
"grad_norm": 1.9420787190248043, |
|
"learning_rate": 4.731104142787035e-06, |
|
"loss": 0.3472, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5669409124906507, |
|
"grad_norm": 1.927314753260138, |
|
"learning_rate": 4.7050267337799074e-06, |
|
"loss": 0.3481, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.5684367988032909, |
|
"grad_norm": 1.9207634340998982, |
|
"learning_rate": 4.678957374007046e-06, |
|
"loss": 0.3424, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5699326851159312, |
|
"grad_norm": 1.9212470848002643, |
|
"learning_rate": 4.652896774849477e-06, |
|
"loss": 0.3358, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.924739770896096, |
|
"learning_rate": 4.626845647449161e-06, |
|
"loss": 0.3353, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5729244577412117, |
|
"grad_norm": 1.9350839334038696, |
|
"learning_rate": 4.600804702689598e-06, |
|
"loss": 0.3348, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.5744203440538519, |
|
"grad_norm": 1.8695042520523082, |
|
"learning_rate": 4.57477465117642e-06, |
|
"loss": 0.338, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5759162303664922, |
|
"grad_norm": 1.9312558535320394, |
|
"learning_rate": 4.54875620321801e-06, |
|
"loss": 0.343, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.5774121166791324, |
|
"grad_norm": 1.8821605226871228, |
|
"learning_rate": 4.522750068806107e-06, |
|
"loss": 0.3407, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5789080029917726, |
|
"grad_norm": 2.0267756257950906, |
|
"learning_rate": 4.496756957596438e-06, |
|
"loss": 0.332, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.5804038893044129, |
|
"grad_norm": 1.87047157167879, |
|
"learning_rate": 4.4707775788893586e-06, |
|
"loss": 0.3377, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5818997756170531, |
|
"grad_norm": 1.8334500325846965, |
|
"learning_rate": 4.444812641610482e-06, |
|
"loss": 0.3331, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.5833956619296934, |
|
"grad_norm": 1.865940227461524, |
|
"learning_rate": 4.418862854291356e-06, |
|
"loss": 0.3336, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5848915482423336, |
|
"grad_norm": 1.8290658280068524, |
|
"learning_rate": 4.392928925050106e-06, |
|
"loss": 0.3237, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.5863874345549738, |
|
"grad_norm": 1.8622254708709993, |
|
"learning_rate": 4.3670115615721265e-06, |
|
"loss": 0.3376, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5878833208676141, |
|
"grad_norm": 1.9201843032013242, |
|
"learning_rate": 4.341111471090762e-06, |
|
"loss": 0.3459, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.5893792071802543, |
|
"grad_norm": 1.8796680590731187, |
|
"learning_rate": 4.315229360368014e-06, |
|
"loss": 0.3278, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5908750934928946, |
|
"grad_norm": 1.8016030738978284, |
|
"learning_rate": 4.289365935675255e-06, |
|
"loss": 0.3268, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.5923709798055348, |
|
"grad_norm": 1.9190193806693643, |
|
"learning_rate": 4.263521902773944e-06, |
|
"loss": 0.3333, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5938668661181751, |
|
"grad_norm": 1.8784476290504393, |
|
"learning_rate": 4.237697966896385e-06, |
|
"loss": 0.3271, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.5953627524308153, |
|
"grad_norm": 1.8712250384764961, |
|
"learning_rate": 4.211894832726471e-06, |
|
"loss": 0.3342, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5968586387434555, |
|
"grad_norm": 1.9036575376553382, |
|
"learning_rate": 4.1861132043804555e-06, |
|
"loss": 0.335, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.5983545250560958, |
|
"grad_norm": 1.9581740636617746, |
|
"learning_rate": 4.160353785387746e-06, |
|
"loss": 0.324, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.599850411368736, |
|
"grad_norm": 1.834158258904465, |
|
"learning_rate": 4.134617278671694e-06, |
|
"loss": 0.3278, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.6013462976813763, |
|
"grad_norm": 1.864212034584157, |
|
"learning_rate": 4.108904386530429e-06, |
|
"loss": 0.3293, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6028421839940165, |
|
"grad_norm": 1.7424366923402765, |
|
"learning_rate": 4.083215810617678e-06, |
|
"loss": 0.327, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.6043380703066566, |
|
"grad_norm": 1.8772680059539715, |
|
"learning_rate": 4.057552251923633e-06, |
|
"loss": 0.3327, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.605833956619297, |
|
"grad_norm": 1.8850386701103279, |
|
"learning_rate": 4.031914410755809e-06, |
|
"loss": 0.327, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.6073298429319371, |
|
"grad_norm": 1.8735991544459796, |
|
"learning_rate": 4.0063029867199455e-06, |
|
"loss": 0.3278, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6088257292445775, |
|
"grad_norm": 2.0742483586745952, |
|
"learning_rate": 3.980718678700909e-06, |
|
"loss": 0.3295, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.6103216155572176, |
|
"grad_norm": 1.805997806919521, |
|
"learning_rate": 3.955162184843625e-06, |
|
"loss": 0.318, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6118175018698578, |
|
"grad_norm": 1.9482369327485018, |
|
"learning_rate": 3.929634202534026e-06, |
|
"loss": 0.3303, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.6133133881824981, |
|
"grad_norm": 1.8643741778263954, |
|
"learning_rate": 3.904135428380019e-06, |
|
"loss": 0.3221, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6148092744951383, |
|
"grad_norm": 1.9119914679721755, |
|
"learning_rate": 3.8786665581924805e-06, |
|
"loss": 0.3259, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.6163051608077786, |
|
"grad_norm": 2.0294178588740808, |
|
"learning_rate": 3.853228286966265e-06, |
|
"loss": 0.3114, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6178010471204188, |
|
"grad_norm": 1.8598282314437558, |
|
"learning_rate": 3.827821308861244e-06, |
|
"loss": 0.3242, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.6192969334330591, |
|
"grad_norm": 1.9818198802388973, |
|
"learning_rate": 3.8024463171833636e-06, |
|
"loss": 0.3252, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6207928197456993, |
|
"grad_norm": 1.9439228162479631, |
|
"learning_rate": 3.777104004365721e-06, |
|
"loss": 0.3258, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.6222887060583395, |
|
"grad_norm": 1.9406393323579751, |
|
"learning_rate": 3.7517950619496713e-06, |
|
"loss": 0.327, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6237845923709798, |
|
"grad_norm": 1.8702196116833902, |
|
"learning_rate": 3.7265201805659618e-06, |
|
"loss": 0.3274, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.62528047868362, |
|
"grad_norm": 1.8206045884367064, |
|
"learning_rate": 3.701280049915877e-06, |
|
"loss": 0.3087, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.6267763649962603, |
|
"grad_norm": 1.8946084974764223, |
|
"learning_rate": 3.676075358752426e-06, |
|
"loss": 0.3227, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.6282722513089005, |
|
"grad_norm": 1.8425099562092453, |
|
"learning_rate": 3.6509067948615464e-06, |
|
"loss": 0.3091, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6297681376215407, |
|
"grad_norm": 1.833988306261615, |
|
"learning_rate": 3.6257750450433284e-06, |
|
"loss": 0.3158, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.631264023934181, |
|
"grad_norm": 1.848102418513888, |
|
"learning_rate": 3.6006807950932867e-06, |
|
"loss": 0.3231, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6327599102468212, |
|
"grad_norm": 1.8597295350064236, |
|
"learning_rate": 3.575624729783632e-06, |
|
"loss": 0.317, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.6342557965594615, |
|
"grad_norm": 1.8571116178437028, |
|
"learning_rate": 3.550607532844596e-06, |
|
"loss": 0.3185, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6357516828721017, |
|
"grad_norm": 1.850039717310936, |
|
"learning_rate": 3.5256298869457715e-06, |
|
"loss": 0.3153, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.637247569184742, |
|
"grad_norm": 1.8517187441330423, |
|
"learning_rate": 3.5006924736774793e-06, |
|
"loss": 0.3231, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6387434554973822, |
|
"grad_norm": 1.886804887794377, |
|
"learning_rate": 3.47579597353217e-06, |
|
"loss": 0.3132, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.6402393418100224, |
|
"grad_norm": 1.8207891498106763, |
|
"learning_rate": 3.4509410658858606e-06, |
|
"loss": 0.3239, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6417352281226627, |
|
"grad_norm": 1.9080722925799685, |
|
"learning_rate": 3.426128428979589e-06, |
|
"loss": 0.3127, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.6432311144353029, |
|
"grad_norm": 1.7978167092374475, |
|
"learning_rate": 3.4013587399009073e-06, |
|
"loss": 0.3112, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6447270007479432, |
|
"grad_norm": 1.8462499082396047, |
|
"learning_rate": 3.376632674565411e-06, |
|
"loss": 0.3168, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.6462228870605834, |
|
"grad_norm": 1.856553229309688, |
|
"learning_rate": 3.351950907698285e-06, |
|
"loss": 0.3065, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6477187733732236, |
|
"grad_norm": 1.7800004213781706, |
|
"learning_rate": 3.3273141128159005e-06, |
|
"loss": 0.3132, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.6492146596858639, |
|
"grad_norm": 1.9132965188669029, |
|
"learning_rate": 3.3027229622074335e-06, |
|
"loss": 0.3179, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6507105459985041, |
|
"grad_norm": 1.7650226022206836, |
|
"learning_rate": 3.278178126916515e-06, |
|
"loss": 0.3137, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.6522064323111444, |
|
"grad_norm": 1.951509417973989, |
|
"learning_rate": 3.2536802767229243e-06, |
|
"loss": 0.3084, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6537023186237846, |
|
"grad_norm": 1.772116366162939, |
|
"learning_rate": 3.2292300801243133e-06, |
|
"loss": 0.3102, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.6551982049364248, |
|
"grad_norm": 1.8140401176421401, |
|
"learning_rate": 3.20482820431796e-06, |
|
"loss": 0.3056, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6566940912490651, |
|
"grad_norm": 1.8243620700136636, |
|
"learning_rate": 3.180475315182563e-06, |
|
"loss": 0.3033, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.6581899775617053, |
|
"grad_norm": 1.8380166168759837, |
|
"learning_rate": 3.1561720772600736e-06, |
|
"loss": 0.304, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6596858638743456, |
|
"grad_norm": 1.8336050039462124, |
|
"learning_rate": 3.1319191537375577e-06, |
|
"loss": 0.3143, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.6611817501869858, |
|
"grad_norm": 1.8667890213032734, |
|
"learning_rate": 3.107717206429105e-06, |
|
"loss": 0.3031, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6626776364996261, |
|
"grad_norm": 1.7638159112909835, |
|
"learning_rate": 3.0835668957577636e-06, |
|
"loss": 0.3013, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.6641735228122663, |
|
"grad_norm": 1.900781665691589, |
|
"learning_rate": 3.059468880737519e-06, |
|
"loss": 0.3073, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6656694091249065, |
|
"grad_norm": 1.943524014415726, |
|
"learning_rate": 3.035423818955316e-06, |
|
"loss": 0.3087, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.6671652954375468, |
|
"grad_norm": 1.736021065342517, |
|
"learning_rate": 3.0114323665531066e-06, |
|
"loss": 0.2979, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.668661181750187, |
|
"grad_norm": 1.746010053168365, |
|
"learning_rate": 2.987495178209951e-06, |
|
"loss": 0.307, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.6701570680628273, |
|
"grad_norm": 1.8018064213578624, |
|
"learning_rate": 2.9636129071241515e-06, |
|
"loss": 0.3126, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6716529543754675, |
|
"grad_norm": 1.8077932770071266, |
|
"learning_rate": 2.9397862049954307e-06, |
|
"loss": 0.3004, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.6731488406881077, |
|
"grad_norm": 1.7048569088891747, |
|
"learning_rate": 2.916015722007137e-06, |
|
"loss": 0.3066, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.674644727000748, |
|
"grad_norm": 1.7988871113907166, |
|
"learning_rate": 2.892302106808519e-06, |
|
"loss": 0.3052, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.6761406133133882, |
|
"grad_norm": 1.8715481375394143, |
|
"learning_rate": 2.8686460064970078e-06, |
|
"loss": 0.3085, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.6776364996260285, |
|
"grad_norm": 1.8258948545382783, |
|
"learning_rate": 2.8450480666005743e-06, |
|
"loss": 0.3023, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.6791323859386686, |
|
"grad_norm": 1.7183769572814935, |
|
"learning_rate": 2.821508931060104e-06, |
|
"loss": 0.3169, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.680628272251309, |
|
"grad_norm": 1.8087144140013556, |
|
"learning_rate": 2.7980292422118282e-06, |
|
"loss": 0.3, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.6821241585639491, |
|
"grad_norm": 1.9721800720444596, |
|
"learning_rate": 2.7746096407698004e-06, |
|
"loss": 0.3029, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6836200448765893, |
|
"grad_norm": 1.8344419672931702, |
|
"learning_rate": 2.7512507658083996e-06, |
|
"loss": 0.2996, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.6851159311892296, |
|
"grad_norm": 1.757267551606752, |
|
"learning_rate": 2.7279532547449083e-06, |
|
"loss": 0.3033, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6866118175018698, |
|
"grad_norm": 1.8575093423008022, |
|
"learning_rate": 2.704717743322104e-06, |
|
"loss": 0.2873, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.6881077038145101, |
|
"grad_norm": 1.761502547654336, |
|
"learning_rate": 2.681544865590926e-06, |
|
"loss": 0.2999, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6896035901271503, |
|
"grad_norm": 1.958074773552565, |
|
"learning_rate": 2.6584352538931523e-06, |
|
"loss": 0.3023, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.6910994764397905, |
|
"grad_norm": 1.7604780827427178, |
|
"learning_rate": 2.635389538844166e-06, |
|
"loss": 0.2923, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6925953627524308, |
|
"grad_norm": 1.860509876291064, |
|
"learning_rate": 2.612408349315734e-06, |
|
"loss": 0.2968, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.694091249065071, |
|
"grad_norm": 1.8116830542415268, |
|
"learning_rate": 2.5894923124188498e-06, |
|
"loss": 0.2911, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6955871353777113, |
|
"grad_norm": 1.816773761816662, |
|
"learning_rate": 2.5666420534866256e-06, |
|
"loss": 0.3017, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.6970830216903515, |
|
"grad_norm": 1.810456487051493, |
|
"learning_rate": 2.543858196057214e-06, |
|
"loss": 0.3045, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6985789080029918, |
|
"grad_norm": 1.8462477832363797, |
|
"learning_rate": 2.5211413618568114e-06, |
|
"loss": 0.2979, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.700074794315632, |
|
"grad_norm": 1.749680469906487, |
|
"learning_rate": 2.4984921707826805e-06, |
|
"loss": 0.298, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.7015706806282722, |
|
"grad_norm": 1.7715319612256217, |
|
"learning_rate": 2.4759112408862366e-06, |
|
"loss": 0.2905, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.7030665669409125, |
|
"grad_norm": 1.9011349884243633, |
|
"learning_rate": 2.4533991883561868e-06, |
|
"loss": 0.2938, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7045624532535527, |
|
"grad_norm": 1.7509668722553002, |
|
"learning_rate": 2.4309566275017027e-06, |
|
"loss": 0.2931, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.706058339566193, |
|
"grad_norm": 1.7463279622870067, |
|
"learning_rate": 2.4085841707356787e-06, |
|
"loss": 0.2948, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7075542258788332, |
|
"grad_norm": 1.7457958614044327, |
|
"learning_rate": 2.386282428558001e-06, |
|
"loss": 0.2935, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.7090501121914734, |
|
"grad_norm": 1.8306487338719184, |
|
"learning_rate": 2.364052009538892e-06, |
|
"loss": 0.3029, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7105459985041137, |
|
"grad_norm": 1.8902782477754185, |
|
"learning_rate": 2.341893520302313e-06, |
|
"loss": 0.2937, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.7120418848167539, |
|
"grad_norm": 1.7948687484011157, |
|
"learning_rate": 2.3198075655094023e-06, |
|
"loss": 0.2925, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7135377711293942, |
|
"grad_norm": 1.8682547497864384, |
|
"learning_rate": 2.297794747841976e-06, |
|
"loss": 0.2992, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.7150336574420344, |
|
"grad_norm": 1.7985072864408282, |
|
"learning_rate": 2.275855667986086e-06, |
|
"loss": 0.2992, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7165295437546746, |
|
"grad_norm": 1.6780824098442955, |
|
"learning_rate": 2.2539909246156257e-06, |
|
"loss": 0.2902, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.7180254300673149, |
|
"grad_norm": 1.9327685022447323, |
|
"learning_rate": 2.232201114375988e-06, |
|
"loss": 0.2879, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7195213163799551, |
|
"grad_norm": 1.8312593750432005, |
|
"learning_rate": 2.2104868318677963e-06, |
|
"loss": 0.2967, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.7210172026925954, |
|
"grad_norm": 1.8041698028281294, |
|
"learning_rate": 2.1888486696306706e-06, |
|
"loss": 0.2849, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7225130890052356, |
|
"grad_norm": 1.8021876820178402, |
|
"learning_rate": 2.1672872181270575e-06, |
|
"loss": 0.2918, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.7240089753178759, |
|
"grad_norm": 1.807836863115144, |
|
"learning_rate": 2.1458030657261235e-06, |
|
"loss": 0.282, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7255048616305161, |
|
"grad_norm": 1.7515999717106407, |
|
"learning_rate": 2.1243967986876933e-06, |
|
"loss": 0.2922, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.7270007479431563, |
|
"grad_norm": 1.8149872804694056, |
|
"learning_rate": 2.1030690011462567e-06, |
|
"loss": 0.2912, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7284966342557966, |
|
"grad_norm": 1.7878582875336215, |
|
"learning_rate": 2.081820255095028e-06, |
|
"loss": 0.2886, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.7299925205684368, |
|
"grad_norm": 1.7664930533873893, |
|
"learning_rate": 2.0606511403700575e-06, |
|
"loss": 0.2964, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7314884068810771, |
|
"grad_norm": 1.7856577814800616, |
|
"learning_rate": 2.0395622346344213e-06, |
|
"loss": 0.2849, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.7329842931937173, |
|
"grad_norm": 1.7620387064486105, |
|
"learning_rate": 2.018554113362449e-06, |
|
"loss": 0.2811, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.7344801795063575, |
|
"grad_norm": 1.746148787119175, |
|
"learning_rate": 1.9976273498240234e-06, |
|
"loss": 0.2866, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.7359760658189978, |
|
"grad_norm": 1.759195000248038, |
|
"learning_rate": 1.976782515068938e-06, |
|
"loss": 0.294, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.737471952131638, |
|
"grad_norm": 1.6081462651916374, |
|
"learning_rate": 1.9560201779113056e-06, |
|
"loss": 0.2821, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.7389678384442783, |
|
"grad_norm": 1.8127282683936143, |
|
"learning_rate": 1.9353409049140515e-06, |
|
"loss": 0.2827, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7404637247569185, |
|
"grad_norm": 1.7928349569557254, |
|
"learning_rate": 1.9147452603734402e-06, |
|
"loss": 0.2889, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.7419596110695588, |
|
"grad_norm": 1.7519180416889486, |
|
"learning_rate": 1.894233806303689e-06, |
|
"loss": 0.2816, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.743455497382199, |
|
"grad_norm": 1.792648064853805, |
|
"learning_rate": 1.8738071024216141e-06, |
|
"loss": 0.2843, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.7449513836948392, |
|
"grad_norm": 1.7815734013272622, |
|
"learning_rate": 1.8534657061313744e-06, |
|
"loss": 0.2742, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7464472700074795, |
|
"grad_norm": 1.825180595387709, |
|
"learning_rate": 1.8332101725092522e-06, |
|
"loss": 0.2816, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.7479431563201197, |
|
"grad_norm": 1.8420097876440362, |
|
"learning_rate": 1.8130410542885084e-06, |
|
"loss": 0.2808, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.74943904263276, |
|
"grad_norm": 1.8442353488656769, |
|
"learning_rate": 1.7929589018443016e-06, |
|
"loss": 0.2923, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.7509349289454001, |
|
"grad_norm": 1.876793012170064, |
|
"learning_rate": 1.7729642631786613e-06, |
|
"loss": 0.2872, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7524308152580403, |
|
"grad_norm": 1.7511287142130798, |
|
"learning_rate": 1.7530576839055453e-06, |
|
"loss": 0.2822, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.7539267015706806, |
|
"grad_norm": 1.8394555324866848, |
|
"learning_rate": 1.7332397072359435e-06, |
|
"loss": 0.2765, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7554225878833208, |
|
"grad_norm": 1.773080627419537, |
|
"learning_rate": 1.7135108739630573e-06, |
|
"loss": 0.2772, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.7569184741959611, |
|
"grad_norm": 1.7397840701003071, |
|
"learning_rate": 1.693871722447542e-06, |
|
"loss": 0.2748, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7584143605086013, |
|
"grad_norm": 1.8139047134561623, |
|
"learning_rate": 1.6743227886028152e-06, |
|
"loss": 0.2809, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.7599102468212415, |
|
"grad_norm": 1.723146398169513, |
|
"learning_rate": 1.6548646058804347e-06, |
|
"loss": 0.277, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7614061331338818, |
|
"grad_norm": 1.755509982892445, |
|
"learning_rate": 1.6354977052555393e-06, |
|
"loss": 0.2845, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.762902019446522, |
|
"grad_norm": 1.7634745348399379, |
|
"learning_rate": 1.6162226152123633e-06, |
|
"loss": 0.2845, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.7643979057591623, |
|
"grad_norm": 1.8539062432851583, |
|
"learning_rate": 1.5970398617298078e-06, |
|
"loss": 0.2828, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.7658937920718025, |
|
"grad_norm": 1.8053358835812254, |
|
"learning_rate": 1.5779499682670963e-06, |
|
"loss": 0.2774, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.7673896783844428, |
|
"grad_norm": 1.8014531312640616, |
|
"learning_rate": 1.5589534557494868e-06, |
|
"loss": 0.2841, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.768885564697083, |
|
"grad_norm": 1.735571527942806, |
|
"learning_rate": 1.5400508425540562e-06, |
|
"loss": 0.2746, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.7703814510097232, |
|
"grad_norm": 1.8540824858023373, |
|
"learning_rate": 1.5212426444955569e-06, |
|
"loss": 0.2807, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.7718773373223635, |
|
"grad_norm": 1.7139393419525597, |
|
"learning_rate": 1.5025293748123354e-06, |
|
"loss": 0.2815, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.7733732236350037, |
|
"grad_norm": 1.6431033212935895, |
|
"learning_rate": 1.4839115441523355e-06, |
|
"loss": 0.2696, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.774869109947644, |
|
"grad_norm": 1.7227778483828726, |
|
"learning_rate": 1.4653896605591584e-06, |
|
"loss": 0.2732, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.7763649962602842, |
|
"grad_norm": 1.7527519060060008, |
|
"learning_rate": 1.4469642294582048e-06, |
|
"loss": 0.2748, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.7778608825729244, |
|
"grad_norm": 1.6997524796558416, |
|
"learning_rate": 1.4286357536428696e-06, |
|
"loss": 0.2729, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7793567688855647, |
|
"grad_norm": 1.7807204337692575, |
|
"learning_rate": 1.4104047332608379e-06, |
|
"loss": 0.2755, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.7808526551982049, |
|
"grad_norm": 1.7182846099936764, |
|
"learning_rate": 1.392271665800427e-06, |
|
"loss": 0.2777, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7823485415108452, |
|
"grad_norm": 1.7302301084436003, |
|
"learning_rate": 1.3742370460770144e-06, |
|
"loss": 0.2762, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.7838444278234854, |
|
"grad_norm": 1.711106037244554, |
|
"learning_rate": 1.3563013662195356e-06, |
|
"loss": 0.2737, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7853403141361257, |
|
"grad_norm": 1.8191358842574659, |
|
"learning_rate": 1.3384651156570483e-06, |
|
"loss": 0.2732, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.7868362004487659, |
|
"grad_norm": 1.751260410944088, |
|
"learning_rate": 1.3207287811053893e-06, |
|
"loss": 0.2771, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7883320867614061, |
|
"grad_norm": 1.7320253510102213, |
|
"learning_rate": 1.3030928465538822e-06, |
|
"loss": 0.27, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.7898279730740464, |
|
"grad_norm": 1.7406452518990843, |
|
"learning_rate": 1.2855577932521352e-06, |
|
"loss": 0.2703, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.7913238593866866, |
|
"grad_norm": 1.8538751789457641, |
|
"learning_rate": 1.2681240996969085e-06, |
|
"loss": 0.2776, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.7928197456993269, |
|
"grad_norm": 1.740887599672242, |
|
"learning_rate": 1.250792241619051e-06, |
|
"loss": 0.2736, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7943156320119671, |
|
"grad_norm": 1.8281991178787242, |
|
"learning_rate": 1.233562691970533e-06, |
|
"loss": 0.2749, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.7958115183246073, |
|
"grad_norm": 1.6556477939621426, |
|
"learning_rate": 1.2164359209115235e-06, |
|
"loss": 0.2776, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7973074046372476, |
|
"grad_norm": 1.695787778492541, |
|
"learning_rate": 1.1994123957975722e-06, |
|
"loss": 0.2702, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.7988032909498878, |
|
"grad_norm": 1.7707776645975837, |
|
"learning_rate": 1.1824925811668485e-06, |
|
"loss": 0.2627, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.8002991772625281, |
|
"grad_norm": 1.8300425136047838, |
|
"learning_rate": 1.1656769387274714e-06, |
|
"loss": 0.2688, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.8017950635751683, |
|
"grad_norm": 1.6906589157556278, |
|
"learning_rate": 1.1489659273449073e-06, |
|
"loss": 0.2672, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.8032909498878086, |
|
"grad_norm": 1.7718115103968484, |
|
"learning_rate": 1.132360003029449e-06, |
|
"loss": 0.2673, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.8047868362004488, |
|
"grad_norm": 1.7597119643475179, |
|
"learning_rate": 1.115859618923773e-06, |
|
"loss": 0.2744, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.806282722513089, |
|
"grad_norm": 1.7801333538259148, |
|
"learning_rate": 1.0994652252905695e-06, |
|
"loss": 0.2662, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.8077786088257293, |
|
"grad_norm": 1.6866429011639965, |
|
"learning_rate": 1.083177269500264e-06, |
|
"loss": 0.2675, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.8092744951383695, |
|
"grad_norm": 1.9195992948000482, |
|
"learning_rate": 1.0669961960188008e-06, |
|
"loss": 0.2739, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.8107703814510098, |
|
"grad_norm": 1.8220041781840073, |
|
"learning_rate": 1.0509224463955249e-06, |
|
"loss": 0.2604, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.81226626776365, |
|
"grad_norm": 1.7303540258737908, |
|
"learning_rate": 1.0349564592511162e-06, |
|
"loss": 0.2743, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.8137621540762902, |
|
"grad_norm": 1.6406056857804932, |
|
"learning_rate": 1.0190986702656403e-06, |
|
"loss": 0.2719, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.8152580403889305, |
|
"grad_norm": 1.8590839739169418, |
|
"learning_rate": 1.0033495121666442e-06, |
|
"loss": 0.273, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.8167539267015707, |
|
"grad_norm": 1.7341252368355093, |
|
"learning_rate": 9.877094147173566e-07, |
|
"loss": 0.2712, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.818249813014211, |
|
"grad_norm": 1.7272695337289556, |
|
"learning_rate": 9.721788047049586e-07, |
|
"loss": 0.2628, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.8197456993268512, |
|
"grad_norm": 1.7050895419647492, |
|
"learning_rate": 9.567581059289322e-07, |
|
"loss": 0.2678, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8212415856394913, |
|
"grad_norm": 1.7258978187627068, |
|
"learning_rate": 9.414477391895044e-07, |
|
"loss": 0.2715, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.8227374719521316, |
|
"grad_norm": 1.8460755537922702, |
|
"learning_rate": 9.262481222761588e-07, |
|
"loss": 0.2716, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8242333582647718, |
|
"grad_norm": 1.7677837124955216, |
|
"learning_rate": 9.11159669956237e-07, |
|
"loss": 0.2725, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.8257292445774121, |
|
"grad_norm": 1.7183389424616196, |
|
"learning_rate": 8.961827939636198e-07, |
|
"loss": 0.2683, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8272251308900523, |
|
"grad_norm": 1.8851170229714924, |
|
"learning_rate": 8.813179029874874e-07, |
|
"loss": 0.2588, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.8287210172026926, |
|
"grad_norm": 1.8163919089444864, |
|
"learning_rate": 8.665654026611797e-07, |
|
"loss": 0.2631, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8302169035153328, |
|
"grad_norm": 1.7098860990754234, |
|
"learning_rate": 8.51925695551113e-07, |
|
"loss": 0.2679, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.831712789827973, |
|
"grad_norm": 1.7663056355635183, |
|
"learning_rate": 8.373991811458027e-07, |
|
"loss": 0.2652, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.8332086761406133, |
|
"grad_norm": 1.7186868648976898, |
|
"learning_rate": 8.229862558449592e-07, |
|
"loss": 0.2661, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.8347045624532535, |
|
"grad_norm": 1.8059879215165224, |
|
"learning_rate": 8.086873129486722e-07, |
|
"loss": 0.2686, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8362004487658938, |
|
"grad_norm": 1.7374284001547664, |
|
"learning_rate": 7.945027426466801e-07, |
|
"loss": 0.2708, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.837696335078534, |
|
"grad_norm": 1.6598096486422094, |
|
"learning_rate": 7.804329320077181e-07, |
|
"loss": 0.2653, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8391922213911742, |
|
"grad_norm": 1.676734657625906, |
|
"learning_rate": 7.664782649689611e-07, |
|
"loss": 0.2563, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.8406881077038145, |
|
"grad_norm": 1.7941246676620155, |
|
"learning_rate": 7.526391223255386e-07, |
|
"loss": 0.2643, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.8421839940164547, |
|
"grad_norm": 1.7441327844730907, |
|
"learning_rate": 7.389158817201541e-07, |
|
"loss": 0.2663, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.843679880329095, |
|
"grad_norm": 1.6764728143369185, |
|
"learning_rate": 7.253089176327738e-07, |
|
"loss": 0.2631, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8451757666417352, |
|
"grad_norm": 1.7090343355435693, |
|
"learning_rate": 7.118186013704065e-07, |
|
"loss": 0.2579, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.8466716529543755, |
|
"grad_norm": 1.723034589615204, |
|
"learning_rate": 6.984453010569758e-07, |
|
"loss": 0.2718, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8481675392670157, |
|
"grad_norm": 1.7083769223090157, |
|
"learning_rate": 6.851893816232729e-07, |
|
"loss": 0.259, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.8496634255796559, |
|
"grad_norm": 1.6983173618906942, |
|
"learning_rate": 6.720512047969957e-07, |
|
"loss": 0.2655, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8511593118922962, |
|
"grad_norm": 1.6008652695866359, |
|
"learning_rate": 6.590311290928825e-07, |
|
"loss": 0.2661, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.8526551982049364, |
|
"grad_norm": 1.723592329316595, |
|
"learning_rate": 6.461295098029269e-07, |
|
"loss": 0.2548, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8541510845175767, |
|
"grad_norm": 1.8054575785485054, |
|
"learning_rate": 6.333466989866787e-07, |
|
"loss": 0.264, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.8556469708302169, |
|
"grad_norm": 1.7902077125134892, |
|
"learning_rate": 6.206830454616447e-07, |
|
"loss": 0.266, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.7147769185915753, |
|
"learning_rate": 6.08138894793765e-07, |
|
"loss": 0.2654, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.8586387434554974, |
|
"grad_norm": 1.7518112730752275, |
|
"learning_rate": 5.957145892879829e-07, |
|
"loss": 0.2594, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.8601346297681376, |
|
"grad_norm": 1.811592287193994, |
|
"learning_rate": 5.834104679789077e-07, |
|
"loss": 0.2647, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.8616305160807779, |
|
"grad_norm": 1.773326433422328, |
|
"learning_rate": 5.712268666215559e-07, |
|
"loss": 0.264, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.8631264023934181, |
|
"grad_norm": 1.68178039725722, |
|
"learning_rate": 5.591641176822005e-07, |
|
"loss": 0.2614, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.8646222887060584, |
|
"grad_norm": 1.6842479909967625, |
|
"learning_rate": 5.472225503292883e-07, |
|
"loss": 0.2621, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.8661181750186986, |
|
"grad_norm": 1.7449782410599817, |
|
"learning_rate": 5.354024904244632e-07, |
|
"loss": 0.2522, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.8676140613313388, |
|
"grad_norm": 1.7259602780620398, |
|
"learning_rate": 5.237042605136689e-07, |
|
"loss": 0.2614, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.8691099476439791, |
|
"grad_norm": 1.688101500268341, |
|
"learning_rate": 5.121281798183547e-07, |
|
"loss": 0.2611, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.8706058339566193, |
|
"grad_norm": 1.7726586716734274, |
|
"learning_rate": 5.00674564226758e-07, |
|
"loss": 0.2544, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.8721017202692596, |
|
"grad_norm": 1.6935216955087868, |
|
"learning_rate": 4.893437262852885e-07, |
|
"loss": 0.2523, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.8735976065818998, |
|
"grad_norm": 1.878804856678552, |
|
"learning_rate": 4.781359751899984e-07, |
|
"loss": 0.2538, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.87509349289454, |
|
"grad_norm": 1.64770700770445, |
|
"learning_rate": 4.6705161677814024e-07, |
|
"loss": 0.2569, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.8765893792071803, |
|
"grad_norm": 1.860024134107886, |
|
"learning_rate": 4.560909535198299e-07, |
|
"loss": 0.2576, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.8780852655198205, |
|
"grad_norm": 1.675929796569693, |
|
"learning_rate": 4.4525428450978627e-07, |
|
"loss": 0.2539, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.8795811518324608, |
|
"grad_norm": 1.6649509488101208, |
|
"learning_rate": 4.3454190545917317e-07, |
|
"loss": 0.2654, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.881077038145101, |
|
"grad_norm": 1.7894352860083609, |
|
"learning_rate": 4.239541086875265e-07, |
|
"loss": 0.2647, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.8825729244577412, |
|
"grad_norm": 1.688537215035147, |
|
"learning_rate": 4.134911831147798e-07, |
|
"loss": 0.2563, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.8840688107703815, |
|
"grad_norm": 1.7422422459372517, |
|
"learning_rate": 4.031534142533816e-07, |
|
"loss": 0.2517, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.8855646970830217, |
|
"grad_norm": 1.832964243427611, |
|
"learning_rate": 3.9294108420049935e-07, |
|
"loss": 0.2664, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.887060583395662, |
|
"grad_norm": 1.7932779810454953, |
|
"learning_rate": 3.828544716303284e-07, |
|
"loss": 0.2543, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.8885564697083022, |
|
"grad_norm": 1.8073243004592312, |
|
"learning_rate": 3.728938517864794e-07, |
|
"loss": 0.2601, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.8900523560209425, |
|
"grad_norm": 1.714561248097055, |
|
"learning_rate": 3.6305949647447545e-07, |
|
"loss": 0.2564, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.8915482423335827, |
|
"grad_norm": 1.647575871046988, |
|
"learning_rate": 3.5335167405433024e-07, |
|
"loss": 0.2607, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8930441286462228, |
|
"grad_norm": 1.6977984176077578, |
|
"learning_rate": 3.437706494332266e-07, |
|
"loss": 0.2522, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.8945400149588631, |
|
"grad_norm": 1.7141499596339997, |
|
"learning_rate": 3.3431668405828675e-07, |
|
"loss": 0.2558, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8960359012715033, |
|
"grad_norm": 1.6494105719449952, |
|
"learning_rate": 3.249900359094388e-07, |
|
"loss": 0.256, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.8975317875841436, |
|
"grad_norm": 1.6630293618544516, |
|
"learning_rate": 3.1579095949237584e-07, |
|
"loss": 0.2508, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8990276738967838, |
|
"grad_norm": 1.7346655505039537, |
|
"learning_rate": 3.067197058316157e-07, |
|
"loss": 0.2614, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.900523560209424, |
|
"grad_norm": 1.7107296935219805, |
|
"learning_rate": 2.9777652246364306e-07, |
|
"loss": 0.2538, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.9020194465220643, |
|
"grad_norm": 1.6491436991741326, |
|
"learning_rate": 2.889616534301598e-07, |
|
"loss": 0.2521, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.9035153328347045, |
|
"grad_norm": 1.7323747022001885, |
|
"learning_rate": 2.8027533927142525e-07, |
|
"loss": 0.2593, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.9050112191473448, |
|
"grad_norm": 1.7534706658955106, |
|
"learning_rate": 2.717178170196916e-07, |
|
"loss": 0.249, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.906507105459985, |
|
"grad_norm": 1.7068949519667596, |
|
"learning_rate": 2.6328932019273556e-07, |
|
"loss": 0.2625, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.9080029917726253, |
|
"grad_norm": 1.7466561136363379, |
|
"learning_rate": 2.549900787874876e-07, |
|
"loss": 0.2572, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.9094988780852655, |
|
"grad_norm": 1.6487218463492848, |
|
"learning_rate": 2.468203192737512e-07, |
|
"loss": 0.2618, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.9109947643979057, |
|
"grad_norm": 1.5699982289102938, |
|
"learning_rate": 2.3878026458803047e-07, |
|
"loss": 0.2559, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.912490650710546, |
|
"grad_norm": 1.678827851691801, |
|
"learning_rate": 2.3087013412743998e-07, |
|
"loss": 0.2504, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.9139865370231862, |
|
"grad_norm": 1.732344143690627, |
|
"learning_rate": 2.2309014374372106e-07, |
|
"loss": 0.2556, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.9154824233358265, |
|
"grad_norm": 1.6563146141875156, |
|
"learning_rate": 2.1544050573735153e-07, |
|
"loss": 0.2555, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.9169783096484667, |
|
"grad_norm": 1.8096543479163172, |
|
"learning_rate": 2.079214288517506e-07, |
|
"loss": 0.2553, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.9184741959611069, |
|
"grad_norm": 1.690741820888644, |
|
"learning_rate": 2.0053311826758458e-07, |
|
"loss": 0.256, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.9199700822737472, |
|
"grad_norm": 1.7615351195511213, |
|
"learning_rate": 1.9327577559716815e-07, |
|
"loss": 0.2562, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.9214659685863874, |
|
"grad_norm": 1.8449608271118088, |
|
"learning_rate": 1.8614959887896078e-07, |
|
"loss": 0.2549, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9229618548990277, |
|
"grad_norm": 1.7775694545753302, |
|
"learning_rate": 1.79154782572164e-07, |
|
"loss": 0.247, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.9244577412116679, |
|
"grad_norm": 1.6956013013917148, |
|
"learning_rate": 1.7229151755141394e-07, |
|
"loss": 0.2571, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9259536275243081, |
|
"grad_norm": 1.6373082200013647, |
|
"learning_rate": 1.655599911015754e-07, |
|
"loss": 0.2547, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.9274495138369484, |
|
"grad_norm": 1.7078162984487721, |
|
"learning_rate": 1.5896038691262772e-07, |
|
"loss": 0.2592, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9289454001495886, |
|
"grad_norm": 1.6367425145666301, |
|
"learning_rate": 1.52492885074656e-07, |
|
"loss": 0.2561, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.9304412864622289, |
|
"grad_norm": 1.5872236691558035, |
|
"learning_rate": 1.4615766207293157e-07, |
|
"loss": 0.2518, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9319371727748691, |
|
"grad_norm": 1.714675291765629, |
|
"learning_rate": 1.3995489078310055e-07, |
|
"loss": 0.2633, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.9334330590875094, |
|
"grad_norm": 1.619406307330865, |
|
"learning_rate": 1.338847404664667e-07, |
|
"loss": 0.2548, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9349289454001496, |
|
"grad_norm": 1.5539413237386495, |
|
"learning_rate": 1.2794737676536993e-07, |
|
"loss": 0.2527, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.9364248317127898, |
|
"grad_norm": 1.7495161399355714, |
|
"learning_rate": 1.2214296169866578e-07, |
|
"loss": 0.2515, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9379207180254301, |
|
"grad_norm": 1.641652885536429, |
|
"learning_rate": 1.164716536573074e-07, |
|
"loss": 0.2501, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.9394166043380703, |
|
"grad_norm": 1.753141085715687, |
|
"learning_rate": 1.1093360740002057e-07, |
|
"loss": 0.2515, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.9409124906507106, |
|
"grad_norm": 1.7530034719134988, |
|
"learning_rate": 1.0552897404908391e-07, |
|
"loss": 0.2559, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.9424083769633508, |
|
"grad_norm": 1.5804220071987112, |
|
"learning_rate": 1.0025790108620092e-07, |
|
"loss": 0.2483, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.943904263275991, |
|
"grad_norm": 1.822783043661551, |
|
"learning_rate": 9.512053234847774e-08, |
|
"loss": 0.258, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.9454001495886313, |
|
"grad_norm": 1.7776638404626903, |
|
"learning_rate": 9.01170080244984e-08, |
|
"loss": 0.2463, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9468960359012715, |
|
"grad_norm": 1.7244463932526486, |
|
"learning_rate": 8.52474646504986e-08, |
|
"loss": 0.2506, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.9483919222139118, |
|
"grad_norm": 1.7184065717867174, |
|
"learning_rate": 8.05120351066413e-08, |
|
"loss": 0.2605, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.949887808526552, |
|
"grad_norm": 1.7978606844090408, |
|
"learning_rate": 7.591084861338749e-08, |
|
"loss": 0.2503, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.9513836948391923, |
|
"grad_norm": 1.6764242072572402, |
|
"learning_rate": 7.144403072797346e-08, |
|
"loss": 0.2523, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9528795811518325, |
|
"grad_norm": 1.6752659598734612, |
|
"learning_rate": 6.711170334098294e-08, |
|
"loss": 0.2566, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.9543754674644727, |
|
"grad_norm": 1.7696006414428376, |
|
"learning_rate": 6.291398467302146e-08, |
|
"loss": 0.2579, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.955871353777113, |
|
"grad_norm": 1.6541063626129755, |
|
"learning_rate": 5.885098927148947e-08, |
|
"loss": 0.2505, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.9573672400897532, |
|
"grad_norm": 1.791951476550907, |
|
"learning_rate": 5.492282800745707e-08, |
|
"loss": 0.252, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9588631264023935, |
|
"grad_norm": 1.6998940151846391, |
|
"learning_rate": 5.112960807263978e-08, |
|
"loss": 0.2602, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.9603590127150337, |
|
"grad_norm": 1.739892053817991, |
|
"learning_rate": 4.7471432976471944e-08, |
|
"loss": 0.264, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.9618548990276738, |
|
"grad_norm": 1.5429992279839573, |
|
"learning_rate": 4.3948402543282366e-08, |
|
"loss": 0.2543, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.9633507853403142, |
|
"grad_norm": 1.772813294024904, |
|
"learning_rate": 4.056061290956981e-08, |
|
"loss": 0.2524, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.9648466716529543, |
|
"grad_norm": 1.5751929247313246, |
|
"learning_rate": 3.730815652138231e-08, |
|
"loss": 0.2525, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.9663425579655947, |
|
"grad_norm": 1.7360588122089868, |
|
"learning_rate": 3.4191122131790324e-08, |
|
"loss": 0.255, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.9678384442782348, |
|
"grad_norm": 1.7743122424766984, |
|
"learning_rate": 3.120959479846919e-08, |
|
"loss": 0.2584, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.9693343305908751, |
|
"grad_norm": 1.574467195657007, |
|
"learning_rate": 2.8363655881374906e-08, |
|
"loss": 0.2558, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.9708302169035153, |
|
"grad_norm": 1.80180199036063, |
|
"learning_rate": 2.5653383040524228e-08, |
|
"loss": 0.2568, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.9723261032161555, |
|
"grad_norm": 1.6886241273143858, |
|
"learning_rate": 2.3078850233878015e-08, |
|
"loss": 0.2466, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.9738219895287958, |
|
"grad_norm": 1.7815633691396229, |
|
"learning_rate": 2.064012771532009e-08, |
|
"loss": 0.2536, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.975317875841436, |
|
"grad_norm": 1.6956109134529065, |
|
"learning_rate": 1.83372820327421e-08, |
|
"loss": 0.2592, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.9768137621540763, |
|
"grad_norm": 1.7584432907260417, |
|
"learning_rate": 1.6170376026226065e-08, |
|
"loss": 0.2647, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.9783096484667165, |
|
"grad_norm": 1.687889717075937, |
|
"learning_rate": 1.4139468826331327e-08, |
|
"loss": 0.2529, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.9798055347793567, |
|
"grad_norm": 1.6973842345080912, |
|
"learning_rate": 1.2244615852479158e-08, |
|
"loss": 0.2586, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.981301421091997, |
|
"grad_norm": 1.7860998582475756, |
|
"learning_rate": 1.0485868811441757e-08, |
|
"loss": 0.2596, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.9827973074046372, |
|
"grad_norm": 1.7444036807918029, |
|
"learning_rate": 8.86327569593115e-09, |
|
"loss": 0.253, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.9842931937172775, |
|
"grad_norm": 1.7876798673093501, |
|
"learning_rate": 7.376880783289131e-09, |
|
"loss": 0.2551, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.9857890800299177, |
|
"grad_norm": 1.66622892909602, |
|
"learning_rate": 6.026724634279335e-09, |
|
"loss": 0.2557, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.9872849663425579, |
|
"grad_norm": 1.7386422804846284, |
|
"learning_rate": 4.8128440919792405e-09, |
|
"loss": 0.253, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.9887808526551982, |
|
"grad_norm": 1.5376218727713236, |
|
"learning_rate": 3.73527228077708e-09, |
|
"loss": 0.2501, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.9902767389678384, |
|
"grad_norm": 1.7638583930274379, |
|
"learning_rate": 2.7940386054664537e-09, |
|
"loss": 0.262, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.9917726252804787, |
|
"grad_norm": 1.9162749151140541, |
|
"learning_rate": 1.9891687504436373e-09, |
|
"loss": 0.2575, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.9932685115931189, |
|
"grad_norm": 1.880864088528354, |
|
"learning_rate": 1.320684679008144e-09, |
|
"loss": 0.2602, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.9947643979057592, |
|
"grad_norm": 1.7803280986620529, |
|
"learning_rate": 7.886046327609809e-10, |
|
"loss": 0.2543, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.9962602842183994, |
|
"grad_norm": 1.5859942056150071, |
|
"learning_rate": 3.929431311094911e-10, |
|
"loss": 0.2563, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.9977561705310396, |
|
"grad_norm": 1.6644206261602157, |
|
"learning_rate": 1.337109708704487e-10, |
|
"loss": 0.2515, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.9992520568436799, |
|
"grad_norm": 1.6728854762026557, |
|
"learning_rate": 1.091522597362893e-11, |
|
"loss": 0.2602, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.999850411368736, |
|
"eval_loss": 0.25460532307624817, |
|
"eval_runtime": 342.3221, |
|
"eval_samples_per_second": 3.152, |
|
"eval_steps_per_second": 0.789, |
|
"step": 3342 |
|
}, |
|
{ |
|
"epoch": 0.999850411368736, |
|
"step": 3342, |
|
"total_flos": 699694464368640.0, |
|
"train_loss": 0.43199071664427796, |
|
"train_runtime": 75290.8899, |
|
"train_samples_per_second": 1.421, |
|
"train_steps_per_second": 0.044 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3342, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 699694464368640.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|