|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8310652178429703, |
|
"eval_steps": 500, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003324260871371881, |
|
"grad_norm": 2.5143792629241943, |
|
"learning_rate": 1.6622340425531916e-08, |
|
"loss": 9.0836, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.006648521742743762, |
|
"grad_norm": 2.6173431873321533, |
|
"learning_rate": 3.324468085106383e-08, |
|
"loss": 9.0829, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.009972782614115643, |
|
"grad_norm": 2.5349628925323486, |
|
"learning_rate": 4.9867021276595746e-08, |
|
"loss": 9.0061, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.013297043485487523, |
|
"grad_norm": 2.3916308879852295, |
|
"learning_rate": 6.648936170212767e-08, |
|
"loss": 8.9747, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.016621304356859403, |
|
"grad_norm": 2.5103342533111572, |
|
"learning_rate": 8.311170212765958e-08, |
|
"loss": 9.0057, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.019945565228231286, |
|
"grad_norm": 2.421079397201538, |
|
"learning_rate": 9.973404255319149e-08, |
|
"loss": 8.9885, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.023269826099603166, |
|
"grad_norm": 2.6052393913269043, |
|
"learning_rate": 1.163563829787234e-07, |
|
"loss": 8.9706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.026594086970975046, |
|
"grad_norm": 2.376847505569458, |
|
"learning_rate": 1.3297872340425533e-07, |
|
"loss": 9.0211, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02991834784234693, |
|
"grad_norm": 2.6200971603393555, |
|
"learning_rate": 1.4960106382978723e-07, |
|
"loss": 8.9903, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.033242608713718806, |
|
"grad_norm": 2.515320301055908, |
|
"learning_rate": 1.6622340425531916e-07, |
|
"loss": 8.9643, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03656686958509069, |
|
"grad_norm": 2.4840102195739746, |
|
"learning_rate": 1.8284574468085108e-07, |
|
"loss": 8.9761, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03989113045646257, |
|
"grad_norm": 2.5950074195861816, |
|
"learning_rate": 1.9946808510638298e-07, |
|
"loss": 9.0101, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04321539132783445, |
|
"grad_norm": 2.530604839324951, |
|
"learning_rate": 2.160904255319149e-07, |
|
"loss": 8.961, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04653965219920633, |
|
"grad_norm": 2.5579464435577393, |
|
"learning_rate": 2.327127659574468e-07, |
|
"loss": 8.8733, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04986391307057821, |
|
"grad_norm": 2.638901472091675, |
|
"learning_rate": 2.4933510638297876e-07, |
|
"loss": 8.9534, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05318817394195009, |
|
"grad_norm": 2.6817493438720703, |
|
"learning_rate": 2.6595744680851066e-07, |
|
"loss": 9.0014, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05651243481332197, |
|
"grad_norm": 2.6700024604797363, |
|
"learning_rate": 2.8257978723404256e-07, |
|
"loss": 8.8832, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05983669568469386, |
|
"grad_norm": 2.794243335723877, |
|
"learning_rate": 2.9920212765957446e-07, |
|
"loss": 8.9012, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06316095655606574, |
|
"grad_norm": 3.000873327255249, |
|
"learning_rate": 3.1582446808510636e-07, |
|
"loss": 8.7874, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06648521742743761, |
|
"grad_norm": 2.872612714767456, |
|
"learning_rate": 3.324468085106383e-07, |
|
"loss": 8.8558, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0698094782988095, |
|
"grad_norm": 2.9133315086364746, |
|
"learning_rate": 3.490691489361702e-07, |
|
"loss": 8.8333, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07313373917018139, |
|
"grad_norm": 3.1017534732818604, |
|
"learning_rate": 3.6569148936170217e-07, |
|
"loss": 8.8199, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.07645800004155326, |
|
"grad_norm": 2.9153056144714355, |
|
"learning_rate": 3.8231382978723407e-07, |
|
"loss": 8.8266, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07978226091292515, |
|
"grad_norm": 3.0996434688568115, |
|
"learning_rate": 3.9893617021276597e-07, |
|
"loss": 8.7202, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08310652178429702, |
|
"grad_norm": 3.257809638977051, |
|
"learning_rate": 4.1555851063829787e-07, |
|
"loss": 8.6149, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0864307826556689, |
|
"grad_norm": 3.378631353378296, |
|
"learning_rate": 4.321808510638298e-07, |
|
"loss": 8.5092, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.08975504352704078, |
|
"grad_norm": 3.3546876907348633, |
|
"learning_rate": 4.488031914893618e-07, |
|
"loss": 8.477, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09307930439841267, |
|
"grad_norm": 3.292569637298584, |
|
"learning_rate": 4.654255319148936e-07, |
|
"loss": 8.4594, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09640356526978455, |
|
"grad_norm": 3.190239906311035, |
|
"learning_rate": 4.820478723404255e-07, |
|
"loss": 8.3134, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.09972782614115643, |
|
"grad_norm": 3.2521212100982666, |
|
"learning_rate": 4.986702127659575e-07, |
|
"loss": 8.2896, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10305208701252831, |
|
"grad_norm": 3.399919033050537, |
|
"learning_rate": 5.152925531914893e-07, |
|
"loss": 8.1656, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10637634788390019, |
|
"grad_norm": 3.412688970565796, |
|
"learning_rate": 5.319148936170213e-07, |
|
"loss": 7.9764, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.10970060875527207, |
|
"grad_norm": 3.2669174671173096, |
|
"learning_rate": 5.485372340425532e-07, |
|
"loss": 7.9755, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11302486962664395, |
|
"grad_norm": 3.405444383621216, |
|
"learning_rate": 5.651595744680851e-07, |
|
"loss": 7.8587, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.11634913049801583, |
|
"grad_norm": 3.2224161624908447, |
|
"learning_rate": 5.81781914893617e-07, |
|
"loss": 7.7357, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.11967339136938772, |
|
"grad_norm": 3.230048894882202, |
|
"learning_rate": 5.984042553191489e-07, |
|
"loss": 7.5667, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12299765224075959, |
|
"grad_norm": 3.2728312015533447, |
|
"learning_rate": 6.150265957446809e-07, |
|
"loss": 7.4844, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12632191311213148, |
|
"grad_norm": 3.200800895690918, |
|
"learning_rate": 6.316489361702127e-07, |
|
"loss": 7.3059, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.12964617398350337, |
|
"grad_norm": 3.075329065322876, |
|
"learning_rate": 6.482712765957447e-07, |
|
"loss": 7.1618, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13297043485487522, |
|
"grad_norm": 3.1853721141815186, |
|
"learning_rate": 6.648936170212766e-07, |
|
"loss": 7.1392, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1362946957262471, |
|
"grad_norm": 3.0336828231811523, |
|
"learning_rate": 6.815159574468085e-07, |
|
"loss": 6.974, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.139618956597619, |
|
"grad_norm": 3.027355670928955, |
|
"learning_rate": 6.981382978723404e-07, |
|
"loss": 6.7714, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.14294321746899089, |
|
"grad_norm": 2.99857497215271, |
|
"learning_rate": 7.147606382978723e-07, |
|
"loss": 6.6538, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.14626747834036277, |
|
"grad_norm": 2.840437650680542, |
|
"learning_rate": 7.313829787234043e-07, |
|
"loss": 6.4758, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.14959173921173463, |
|
"grad_norm": 3.076049566268921, |
|
"learning_rate": 7.480053191489362e-07, |
|
"loss": 6.3225, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.15291600008310652, |
|
"grad_norm": 2.8588602542877197, |
|
"learning_rate": 7.646276595744681e-07, |
|
"loss": 6.1646, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1562402609544784, |
|
"grad_norm": 2.9168858528137207, |
|
"learning_rate": 7.8125e-07, |
|
"loss": 6.0828, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1595645218258503, |
|
"grad_norm": 2.795363187789917, |
|
"learning_rate": 7.978723404255319e-07, |
|
"loss": 5.9192, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.16288878269722215, |
|
"grad_norm": 2.3897600173950195, |
|
"learning_rate": 8.144946808510639e-07, |
|
"loss": 5.6675, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.16621304356859404, |
|
"grad_norm": 2.279939651489258, |
|
"learning_rate": 8.311170212765957e-07, |
|
"loss": 5.5443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16953730443996592, |
|
"grad_norm": 2.394994020462036, |
|
"learning_rate": 8.477393617021276e-07, |
|
"loss": 5.4127, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1728615653113378, |
|
"grad_norm": 2.3148529529571533, |
|
"learning_rate": 8.643617021276596e-07, |
|
"loss": 5.2102, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1761858261827097, |
|
"grad_norm": 2.053243637084961, |
|
"learning_rate": 8.809840425531915e-07, |
|
"loss": 5.1204, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.17951008705408156, |
|
"grad_norm": 2.0687060356140137, |
|
"learning_rate": 8.976063829787235e-07, |
|
"loss": 4.9786, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.18283434792545344, |
|
"grad_norm": 1.8042306900024414, |
|
"learning_rate": 9.142287234042553e-07, |
|
"loss": 4.7735, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.18615860879682533, |
|
"grad_norm": 1.8975441455841064, |
|
"learning_rate": 9.308510638297872e-07, |
|
"loss": 4.6749, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.18948286966819722, |
|
"grad_norm": 1.6640989780426025, |
|
"learning_rate": 9.474734042553192e-07, |
|
"loss": 4.529, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1928071305395691, |
|
"grad_norm": 1.9563913345336914, |
|
"learning_rate": 9.64095744680851e-07, |
|
"loss": 4.4255, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.19613139141094096, |
|
"grad_norm": 1.433192253112793, |
|
"learning_rate": 9.80718085106383e-07, |
|
"loss": 4.3369, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.19945565228231285, |
|
"grad_norm": 1.6940258741378784, |
|
"learning_rate": 9.97340425531915e-07, |
|
"loss": 4.2264, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.20277991315368474, |
|
"grad_norm": 1.3721990585327148, |
|
"learning_rate": 1.0139627659574467e-06, |
|
"loss": 4.0771, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.20610417402505662, |
|
"grad_norm": 1.3481799364089966, |
|
"learning_rate": 1.0305851063829786e-06, |
|
"loss": 3.9652, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.20942843489642848, |
|
"grad_norm": 1.3010597229003906, |
|
"learning_rate": 1.0472074468085108e-06, |
|
"loss": 3.9205, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.21275269576780037, |
|
"grad_norm": 1.551216721534729, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"loss": 3.8234, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.21607695663917226, |
|
"grad_norm": 1.3280216455459595, |
|
"learning_rate": 1.0804521276595746e-06, |
|
"loss": 3.6898, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.21940121751054414, |
|
"grad_norm": 1.0909334421157837, |
|
"learning_rate": 1.0970744680851065e-06, |
|
"loss": 3.6489, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.22272547838191603, |
|
"grad_norm": 1.345831036567688, |
|
"learning_rate": 1.1136968085106384e-06, |
|
"loss": 3.5296, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2260497392532879, |
|
"grad_norm": 1.0882962942123413, |
|
"learning_rate": 1.1303191489361703e-06, |
|
"loss": 3.489, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.22937400012465978, |
|
"grad_norm": 0.9840554594993591, |
|
"learning_rate": 1.1469414893617022e-06, |
|
"loss": 3.4164, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.23269826099603166, |
|
"grad_norm": 1.0956693887710571, |
|
"learning_rate": 1.163563829787234e-06, |
|
"loss": 3.3211, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.23602252186740355, |
|
"grad_norm": 0.8875247240066528, |
|
"learning_rate": 1.1801861702127662e-06, |
|
"loss": 3.2647, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.23934678273877544, |
|
"grad_norm": 1.268930196762085, |
|
"learning_rate": 1.1968085106382979e-06, |
|
"loss": 3.2033, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2426710436101473, |
|
"grad_norm": 0.9430990815162659, |
|
"learning_rate": 1.2134308510638298e-06, |
|
"loss": 3.1317, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.24599530448151918, |
|
"grad_norm": 0.9196615219116211, |
|
"learning_rate": 1.2300531914893619e-06, |
|
"loss": 3.0706, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.24931956535289107, |
|
"grad_norm": 0.7046869993209839, |
|
"learning_rate": 1.2466755319148936e-06, |
|
"loss": 3.0142, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.25264382622426296, |
|
"grad_norm": 0.9173153638839722, |
|
"learning_rate": 1.2632978723404255e-06, |
|
"loss": 2.949, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.25596808709563484, |
|
"grad_norm": 0.8014841675758362, |
|
"learning_rate": 1.2799202127659576e-06, |
|
"loss": 2.9325, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.25929234796700673, |
|
"grad_norm": 0.9520502686500549, |
|
"learning_rate": 1.2965425531914895e-06, |
|
"loss": 2.859, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2626166088383786, |
|
"grad_norm": 0.7679387331008911, |
|
"learning_rate": 1.3131648936170214e-06, |
|
"loss": 2.8509, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.26594086970975045, |
|
"grad_norm": 0.7660825252532959, |
|
"learning_rate": 1.3297872340425533e-06, |
|
"loss": 2.7896, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.26926513058112234, |
|
"grad_norm": 0.7754834294319153, |
|
"learning_rate": 1.3464095744680852e-06, |
|
"loss": 2.736, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2725893914524942, |
|
"grad_norm": 0.5802922248840332, |
|
"learning_rate": 1.363031914893617e-06, |
|
"loss": 2.6962, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2759136523238661, |
|
"grad_norm": 0.6394158601760864, |
|
"learning_rate": 1.379654255319149e-06, |
|
"loss": 2.6656, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.279237913195238, |
|
"grad_norm": 0.6503139138221741, |
|
"learning_rate": 1.3962765957446809e-06, |
|
"loss": 2.616, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.2825621740666099, |
|
"grad_norm": 0.6165557503700256, |
|
"learning_rate": 1.412898936170213e-06, |
|
"loss": 2.5971, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.28588643493798177, |
|
"grad_norm": 0.6192012429237366, |
|
"learning_rate": 1.4295212765957447e-06, |
|
"loss": 2.5536, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.28921069580935366, |
|
"grad_norm": 0.6266525983810425, |
|
"learning_rate": 1.4461436170212766e-06, |
|
"loss": 2.5036, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.29253495668072554, |
|
"grad_norm": 0.5376760363578796, |
|
"learning_rate": 1.4627659574468087e-06, |
|
"loss": 2.5136, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2958592175520974, |
|
"grad_norm": 0.6490041613578796, |
|
"learning_rate": 1.4793882978723404e-06, |
|
"loss": 2.4638, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.29918347842346926, |
|
"grad_norm": 0.6368073225021362, |
|
"learning_rate": 1.4960106382978725e-06, |
|
"loss": 2.4258, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.30250773929484115, |
|
"grad_norm": 0.5121726989746094, |
|
"learning_rate": 1.5126329787234044e-06, |
|
"loss": 2.4016, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.30583200016621304, |
|
"grad_norm": 0.5835744738578796, |
|
"learning_rate": 1.5292553191489363e-06, |
|
"loss": 2.4192, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.3091562610375849, |
|
"grad_norm": 0.5275241732597351, |
|
"learning_rate": 1.5458776595744682e-06, |
|
"loss": 2.3687, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3124805219089568, |
|
"grad_norm": 0.4900510609149933, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 2.3208, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3158047827803287, |
|
"grad_norm": 0.4609052240848541, |
|
"learning_rate": 1.5791223404255322e-06, |
|
"loss": 2.3363, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3191290436517006, |
|
"grad_norm": 0.461566299200058, |
|
"learning_rate": 1.5957446808510639e-06, |
|
"loss": 2.2793, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.32245330452307247, |
|
"grad_norm": 0.49795401096343994, |
|
"learning_rate": 1.6123670212765958e-06, |
|
"loss": 2.2845, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3257775653944443, |
|
"grad_norm": 0.4422404170036316, |
|
"learning_rate": 1.6289893617021279e-06, |
|
"loss": 2.2744, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3291018262658162, |
|
"grad_norm": 0.4161861538887024, |
|
"learning_rate": 1.6456117021276596e-06, |
|
"loss": 2.2463, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3324260871371881, |
|
"grad_norm": 0.46071523427963257, |
|
"learning_rate": 1.6622340425531915e-06, |
|
"loss": 2.2271, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33575034800855996, |
|
"grad_norm": 0.3772067129611969, |
|
"learning_rate": 1.6788563829787236e-06, |
|
"loss": 2.2119, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.33907460887993185, |
|
"grad_norm": 0.44782117009162903, |
|
"learning_rate": 1.6954787234042553e-06, |
|
"loss": 2.2022, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.34239886975130374, |
|
"grad_norm": 0.4486360251903534, |
|
"learning_rate": 1.7121010638297872e-06, |
|
"loss": 2.1723, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3457231306226756, |
|
"grad_norm": 0.47423475980758667, |
|
"learning_rate": 1.7287234042553193e-06, |
|
"loss": 2.1295, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.3490473914940475, |
|
"grad_norm": 0.4199342131614685, |
|
"learning_rate": 1.745345744680851e-06, |
|
"loss": 2.1387, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3523716523654194, |
|
"grad_norm": 0.43744415044784546, |
|
"learning_rate": 1.761968085106383e-06, |
|
"loss": 2.1195, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3556959132367913, |
|
"grad_norm": 0.3780044913291931, |
|
"learning_rate": 1.778590425531915e-06, |
|
"loss": 2.1194, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3590201741081631, |
|
"grad_norm": 0.40349099040031433, |
|
"learning_rate": 1.795212765957447e-06, |
|
"loss": 2.1005, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.362344434979535, |
|
"grad_norm": 0.378764271736145, |
|
"learning_rate": 1.8118351063829788e-06, |
|
"loss": 2.0757, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3656686958509069, |
|
"grad_norm": 0.34115588665008545, |
|
"learning_rate": 1.8284574468085107e-06, |
|
"loss": 2.0591, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3689929567222788, |
|
"grad_norm": 0.39553964138031006, |
|
"learning_rate": 1.8450797872340428e-06, |
|
"loss": 2.0298, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.37231721759365066, |
|
"grad_norm": 0.36110466718673706, |
|
"learning_rate": 1.8617021276595745e-06, |
|
"loss": 2.0113, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.37564147846502255, |
|
"grad_norm": 0.33477863669395447, |
|
"learning_rate": 1.8783244680851066e-06, |
|
"loss": 2.0197, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.37896573933639444, |
|
"grad_norm": 0.43919846415519714, |
|
"learning_rate": 1.8949468085106385e-06, |
|
"loss": 1.9794, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3822900002077663, |
|
"grad_norm": 0.3243393898010254, |
|
"learning_rate": 1.9115691489361704e-06, |
|
"loss": 1.9667, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3856142610791382, |
|
"grad_norm": 0.3350262939929962, |
|
"learning_rate": 1.928191489361702e-06, |
|
"loss": 1.978, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.38893852195051004, |
|
"grad_norm": 0.3365063965320587, |
|
"learning_rate": 1.944813829787234e-06, |
|
"loss": 1.9701, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.39226278282188193, |
|
"grad_norm": 0.3240489661693573, |
|
"learning_rate": 1.961436170212766e-06, |
|
"loss": 1.9465, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3955870436932538, |
|
"grad_norm": 0.3239437937736511, |
|
"learning_rate": 1.978058510638298e-06, |
|
"loss": 1.9253, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3989113045646257, |
|
"grad_norm": 0.3397749364376068, |
|
"learning_rate": 1.99468085106383e-06, |
|
"loss": 1.9057, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4022355654359976, |
|
"grad_norm": 0.2915981113910675, |
|
"learning_rate": 2.011303191489362e-06, |
|
"loss": 1.9047, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4055598263073695, |
|
"grad_norm": 0.39456045627593994, |
|
"learning_rate": 2.0279255319148935e-06, |
|
"loss": 1.9144, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.40888408717874136, |
|
"grad_norm": 0.2593387961387634, |
|
"learning_rate": 2.0445478723404256e-06, |
|
"loss": 1.8969, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.41220834805011325, |
|
"grad_norm": 0.30935177206993103, |
|
"learning_rate": 2.0611702127659573e-06, |
|
"loss": 1.8931, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.41553260892148514, |
|
"grad_norm": 0.27917250990867615, |
|
"learning_rate": 2.0777925531914894e-06, |
|
"loss": 1.8899, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.41885686979285697, |
|
"grad_norm": 0.25976502895355225, |
|
"learning_rate": 2.0944148936170215e-06, |
|
"loss": 1.8503, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.42218113066422885, |
|
"grad_norm": 0.31833794713020325, |
|
"learning_rate": 2.111037234042553e-06, |
|
"loss": 1.8527, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.42550539153560074, |
|
"grad_norm": 0.2671976685523987, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"loss": 1.8505, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.42882965240697263, |
|
"grad_norm": 0.3245258629322052, |
|
"learning_rate": 2.144281914893617e-06, |
|
"loss": 1.864, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4321539132783445, |
|
"grad_norm": 0.2622531056404114, |
|
"learning_rate": 2.160904255319149e-06, |
|
"loss": 1.8301, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4354781741497164, |
|
"grad_norm": 0.3247709274291992, |
|
"learning_rate": 2.177526595744681e-06, |
|
"loss": 1.812, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4388024350210883, |
|
"grad_norm": 0.26424384117126465, |
|
"learning_rate": 2.194148936170213e-06, |
|
"loss": 1.7958, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.4421266958924602, |
|
"grad_norm": 0.2569092810153961, |
|
"learning_rate": 2.210771276595745e-06, |
|
"loss": 1.8147, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.44545095676383206, |
|
"grad_norm": 0.2393629103899002, |
|
"learning_rate": 2.2273936170212767e-06, |
|
"loss": 1.7976, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.44877521763520395, |
|
"grad_norm": 0.232402965426445, |
|
"learning_rate": 2.244015957446809e-06, |
|
"loss": 1.7597, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.4520994785065758, |
|
"grad_norm": 0.26385971903800964, |
|
"learning_rate": 2.2606382978723405e-06, |
|
"loss": 1.7781, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.45542373937794767, |
|
"grad_norm": 0.2671038806438446, |
|
"learning_rate": 2.277260638297872e-06, |
|
"loss": 1.7583, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.45874800024931955, |
|
"grad_norm": 0.27096447348594666, |
|
"learning_rate": 2.2938829787234043e-06, |
|
"loss": 1.7402, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.46207226112069144, |
|
"grad_norm": 0.2245018631219864, |
|
"learning_rate": 2.3105053191489364e-06, |
|
"loss": 1.7644, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.46539652199206333, |
|
"grad_norm": 0.20663714408874512, |
|
"learning_rate": 2.327127659574468e-06, |
|
"loss": 1.7519, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4687207828634352, |
|
"grad_norm": 0.26273128390312195, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"loss": 1.7312, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.4720450437348071, |
|
"grad_norm": 0.24725256860256195, |
|
"learning_rate": 2.3603723404255323e-06, |
|
"loss": 1.7217, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.475369304606179, |
|
"grad_norm": 0.25341796875, |
|
"learning_rate": 2.376994680851064e-06, |
|
"loss": 1.7246, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4786935654775509, |
|
"grad_norm": 0.21035414934158325, |
|
"learning_rate": 2.3936170212765957e-06, |
|
"loss": 1.7017, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.4820178263489227, |
|
"grad_norm": 0.21454143524169922, |
|
"learning_rate": 2.410239361702128e-06, |
|
"loss": 1.7049, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.4853420872202946, |
|
"grad_norm": 0.22413010895252228, |
|
"learning_rate": 2.4268617021276595e-06, |
|
"loss": 1.6809, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4886663480916665, |
|
"grad_norm": 0.2039473056793213, |
|
"learning_rate": 2.4434840425531916e-06, |
|
"loss": 1.6873, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.49199060896303837, |
|
"grad_norm": 0.18895457684993744, |
|
"learning_rate": 2.4601063829787237e-06, |
|
"loss": 1.69, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.49531486983441025, |
|
"grad_norm": 0.21047964692115784, |
|
"learning_rate": 2.4767287234042554e-06, |
|
"loss": 1.681, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.49863913070578214, |
|
"grad_norm": 0.2226460874080658, |
|
"learning_rate": 2.493351063829787e-06, |
|
"loss": 1.6613, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.501963391577154, |
|
"grad_norm": 0.21892835199832916, |
|
"learning_rate": 2.5099734042553192e-06, |
|
"loss": 1.6376, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5052876524485259, |
|
"grad_norm": 0.20363831520080566, |
|
"learning_rate": 2.526595744680851e-06, |
|
"loss": 1.6541, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5086119133198977, |
|
"grad_norm": 0.1988699585199356, |
|
"learning_rate": 2.543218085106383e-06, |
|
"loss": 1.6422, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5119361741912697, |
|
"grad_norm": 0.2050096094608307, |
|
"learning_rate": 2.559840425531915e-06, |
|
"loss": 1.6377, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.5152604350626415, |
|
"grad_norm": 0.23265878856182098, |
|
"learning_rate": 2.5764627659574472e-06, |
|
"loss": 1.6251, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5185846959340135, |
|
"grad_norm": 0.2024969905614853, |
|
"learning_rate": 2.593085106382979e-06, |
|
"loss": 1.6048, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5219089568053853, |
|
"grad_norm": 0.21343863010406494, |
|
"learning_rate": 2.6097074468085106e-06, |
|
"loss": 1.6195, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5252332176767572, |
|
"grad_norm": 0.1862565129995346, |
|
"learning_rate": 2.6263297872340427e-06, |
|
"loss": 1.5991, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5285574785481291, |
|
"grad_norm": 0.22765249013900757, |
|
"learning_rate": 2.6429521276595744e-06, |
|
"loss": 1.5957, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5318817394195009, |
|
"grad_norm": 0.19874997437000275, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"loss": 1.5847, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5352060002908728, |
|
"grad_norm": 0.25979486107826233, |
|
"learning_rate": 2.6761968085106386e-06, |
|
"loss": 1.6046, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5385302611622447, |
|
"grad_norm": 0.1831529289484024, |
|
"learning_rate": 2.6928191489361703e-06, |
|
"loss": 1.5835, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5418545220336166, |
|
"grad_norm": 0.2680751085281372, |
|
"learning_rate": 2.7094414893617024e-06, |
|
"loss": 1.6009, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5451787829049884, |
|
"grad_norm": 0.18160907924175262, |
|
"learning_rate": 2.726063829787234e-06, |
|
"loss": 1.5666, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.5485030437763604, |
|
"grad_norm": 0.22875571250915527, |
|
"learning_rate": 2.742686170212766e-06, |
|
"loss": 1.5614, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.5518273046477322, |
|
"grad_norm": 0.21110033988952637, |
|
"learning_rate": 2.759308510638298e-06, |
|
"loss": 1.5707, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.5551515655191042, |
|
"grad_norm": 0.1887374073266983, |
|
"learning_rate": 2.77593085106383e-06, |
|
"loss": 1.5781, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.558475826390476, |
|
"grad_norm": 0.1916954219341278, |
|
"learning_rate": 2.7925531914893617e-06, |
|
"loss": 1.563, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.5618000872618478, |
|
"grad_norm": 0.21001753211021423, |
|
"learning_rate": 2.809175531914894e-06, |
|
"loss": 1.5495, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.5651243481332198, |
|
"grad_norm": 0.1702377200126648, |
|
"learning_rate": 2.825797872340426e-06, |
|
"loss": 1.5427, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.5684486090045916, |
|
"grad_norm": 0.19061295688152313, |
|
"learning_rate": 2.8424202127659576e-06, |
|
"loss": 1.5387, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5717728698759635, |
|
"grad_norm": 0.17503058910369873, |
|
"learning_rate": 2.8590425531914893e-06, |
|
"loss": 1.5154, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5750971307473354, |
|
"grad_norm": 0.1703094244003296, |
|
"learning_rate": 2.8756648936170214e-06, |
|
"loss": 1.5209, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.5784213916187073, |
|
"grad_norm": 0.22713126242160797, |
|
"learning_rate": 2.892287234042553e-06, |
|
"loss": 1.529, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5817456524900791, |
|
"grad_norm": 0.16218431293964386, |
|
"learning_rate": 2.9089095744680852e-06, |
|
"loss": 1.505, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5850699133614511, |
|
"grad_norm": 0.16082778573036194, |
|
"learning_rate": 2.9255319148936174e-06, |
|
"loss": 1.5312, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.5883941742328229, |
|
"grad_norm": 0.19500340521335602, |
|
"learning_rate": 2.942154255319149e-06, |
|
"loss": 1.4971, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5917184351041948, |
|
"grad_norm": 0.16831324994564056, |
|
"learning_rate": 2.9587765957446807e-06, |
|
"loss": 1.5172, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5950426959755667, |
|
"grad_norm": 0.17963413894176483, |
|
"learning_rate": 2.975398936170213e-06, |
|
"loss": 1.5076, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5983669568469385, |
|
"grad_norm": 0.17123515903949738, |
|
"learning_rate": 2.992021276595745e-06, |
|
"loss": 1.4941, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6016912177183105, |
|
"grad_norm": 0.15727902948856354, |
|
"learning_rate": 3.0086436170212766e-06, |
|
"loss": 1.4609, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6050154785896823, |
|
"grad_norm": 0.1833077073097229, |
|
"learning_rate": 3.0252659574468088e-06, |
|
"loss": 1.5042, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6083397394610542, |
|
"grad_norm": 0.16962528228759766, |
|
"learning_rate": 3.041888297872341e-06, |
|
"loss": 1.4651, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6116640003324261, |
|
"grad_norm": 0.17829731106758118, |
|
"learning_rate": 3.0585106382978726e-06, |
|
"loss": 1.4907, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.614988261203798, |
|
"grad_norm": 0.16981306672096252, |
|
"learning_rate": 3.0751329787234042e-06, |
|
"loss": 1.4683, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.6183125220751698, |
|
"grad_norm": 0.20783671736717224, |
|
"learning_rate": 3.0917553191489363e-06, |
|
"loss": 1.463, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6216367829465417, |
|
"grad_norm": 0.20343361794948578, |
|
"learning_rate": 3.108377659574468e-06, |
|
"loss": 1.4632, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6249610438179136, |
|
"grad_norm": 0.18592675030231476, |
|
"learning_rate": 3.125e-06, |
|
"loss": 1.4887, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6282853046892855, |
|
"grad_norm": 0.17272701859474182, |
|
"learning_rate": 3.141622340425532e-06, |
|
"loss": 1.4491, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6316095655606574, |
|
"grad_norm": 0.2021792083978653, |
|
"learning_rate": 3.1582446808510644e-06, |
|
"loss": 1.4537, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6349338264320292, |
|
"grad_norm": 0.16319766640663147, |
|
"learning_rate": 3.174867021276596e-06, |
|
"loss": 1.456, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.6382580873034012, |
|
"grad_norm": 0.2344328761100769, |
|
"learning_rate": 3.1914893617021277e-06, |
|
"loss": 1.4801, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.641582348174773, |
|
"grad_norm": 0.17495407164096832, |
|
"learning_rate": 3.20811170212766e-06, |
|
"loss": 1.4435, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.6449066090461449, |
|
"grad_norm": 0.19222399592399597, |
|
"learning_rate": 3.2247340425531915e-06, |
|
"loss": 1.4391, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.6482308699175168, |
|
"grad_norm": 0.24526530504226685, |
|
"learning_rate": 3.2413563829787232e-06, |
|
"loss": 1.4555, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.6515551307888886, |
|
"grad_norm": 0.18150673806667328, |
|
"learning_rate": 3.2579787234042558e-06, |
|
"loss": 1.4396, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.6548793916602605, |
|
"grad_norm": 0.18334811925888062, |
|
"learning_rate": 3.2746010638297875e-06, |
|
"loss": 1.4139, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.6582036525316324, |
|
"grad_norm": 0.25186312198638916, |
|
"learning_rate": 3.291223404255319e-06, |
|
"loss": 1.439, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.6615279134030043, |
|
"grad_norm": 0.16558600962162018, |
|
"learning_rate": 3.3078457446808513e-06, |
|
"loss": 1.4383, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.6648521742743762, |
|
"grad_norm": 0.2373538315296173, |
|
"learning_rate": 3.324468085106383e-06, |
|
"loss": 1.4334, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.6681764351457481, |
|
"grad_norm": 0.2821474075317383, |
|
"learning_rate": 3.3410904255319146e-06, |
|
"loss": 1.4418, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.6715006960171199, |
|
"grad_norm": 0.2443741410970688, |
|
"learning_rate": 3.357712765957447e-06, |
|
"loss": 1.4071, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.6748249568884919, |
|
"grad_norm": 0.17468735575675964, |
|
"learning_rate": 3.374335106382979e-06, |
|
"loss": 1.4109, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.6781492177598637, |
|
"grad_norm": 0.1655045598745346, |
|
"learning_rate": 3.3909574468085105e-06, |
|
"loss": 1.4049, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.6814734786312356, |
|
"grad_norm": 0.17598801851272583, |
|
"learning_rate": 3.4075797872340427e-06, |
|
"loss": 1.4188, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6847977395026075, |
|
"grad_norm": 0.28528669476509094, |
|
"learning_rate": 3.4242021276595743e-06, |
|
"loss": 1.408, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6881220003739793, |
|
"grad_norm": 0.17654620110988617, |
|
"learning_rate": 3.440824468085106e-06, |
|
"loss": 1.4117, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6914462612453512, |
|
"grad_norm": 0.2636467516422272, |
|
"learning_rate": 3.4574468085106386e-06, |
|
"loss": 1.3947, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6947705221167231, |
|
"grad_norm": 0.26495933532714844, |
|
"learning_rate": 3.4740691489361703e-06, |
|
"loss": 1.398, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.698094782988095, |
|
"grad_norm": 0.3873574435710907, |
|
"learning_rate": 3.490691489361702e-06, |
|
"loss": 1.4204, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7014190438594669, |
|
"grad_norm": 0.327854186296463, |
|
"learning_rate": 3.5073138297872345e-06, |
|
"loss": 1.3744, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7047433047308388, |
|
"grad_norm": 0.308570921421051, |
|
"learning_rate": 3.523936170212766e-06, |
|
"loss": 1.4293, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.7080675656022106, |
|
"grad_norm": 0.21123336255550385, |
|
"learning_rate": 3.5405585106382983e-06, |
|
"loss": 1.3878, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7113918264735826, |
|
"grad_norm": 0.18777534365653992, |
|
"learning_rate": 3.55718085106383e-06, |
|
"loss": 1.3882, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.7147160873449544, |
|
"grad_norm": 0.2535350024700165, |
|
"learning_rate": 3.5738031914893617e-06, |
|
"loss": 1.3974, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7180403482163262, |
|
"grad_norm": 0.15405435860157013, |
|
"learning_rate": 3.590425531914894e-06, |
|
"loss": 1.3853, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.7213646090876982, |
|
"grad_norm": 0.1863648146390915, |
|
"learning_rate": 3.607047872340426e-06, |
|
"loss": 1.3835, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.72468886995907, |
|
"grad_norm": 0.18587157130241394, |
|
"learning_rate": 3.6236702127659576e-06, |
|
"loss": 1.3711, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.728013130830442, |
|
"grad_norm": 0.18254730105400085, |
|
"learning_rate": 3.6402925531914897e-06, |
|
"loss": 1.3768, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.7313373917018138, |
|
"grad_norm": 0.21665969491004944, |
|
"learning_rate": 3.6569148936170214e-06, |
|
"loss": 1.3638, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.7346616525731857, |
|
"grad_norm": 0.15701924264431, |
|
"learning_rate": 3.673537234042553e-06, |
|
"loss": 1.3885, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.7379859134445576, |
|
"grad_norm": 0.19307725131511688, |
|
"learning_rate": 3.6901595744680856e-06, |
|
"loss": 1.3933, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.7413101743159295, |
|
"grad_norm": 0.16837100684642792, |
|
"learning_rate": 3.7067819148936173e-06, |
|
"loss": 1.3685, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.7446344351873013, |
|
"grad_norm": 0.2914402484893799, |
|
"learning_rate": 3.723404255319149e-06, |
|
"loss": 1.3802, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.7479586960586732, |
|
"grad_norm": 0.2770545184612274, |
|
"learning_rate": 3.7400265957446815e-06, |
|
"loss": 1.3575, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.7512829569300451, |
|
"grad_norm": 0.19819234311580658, |
|
"learning_rate": 3.756648936170213e-06, |
|
"loss": 1.3695, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.7546072178014169, |
|
"grad_norm": 0.15371359884738922, |
|
"learning_rate": 3.7732712765957445e-06, |
|
"loss": 1.3514, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.7579314786727889, |
|
"grad_norm": 0.26700448989868164, |
|
"learning_rate": 3.789893617021277e-06, |
|
"loss": 1.3689, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.7612557395441607, |
|
"grad_norm": 0.2938506007194519, |
|
"learning_rate": 3.8065159574468087e-06, |
|
"loss": 1.3518, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.7645800004155326, |
|
"grad_norm": 0.2514606714248657, |
|
"learning_rate": 3.823138297872341e-06, |
|
"loss": 1.3655, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.7679042612869045, |
|
"grad_norm": 0.2503184378147125, |
|
"learning_rate": 3.8397606382978725e-06, |
|
"loss": 1.3511, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.7712285221582764, |
|
"grad_norm": 0.1815042346715927, |
|
"learning_rate": 3.856382978723404e-06, |
|
"loss": 1.383, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.7745527830296483, |
|
"grad_norm": 0.25425419211387634, |
|
"learning_rate": 3.873005319148936e-06, |
|
"loss": 1.3354, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.7778770439010201, |
|
"grad_norm": 0.18466657400131226, |
|
"learning_rate": 3.889627659574468e-06, |
|
"loss": 1.3514, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.781201304772392, |
|
"grad_norm": 0.1782332807779312, |
|
"learning_rate": 3.90625e-06, |
|
"loss": 1.32, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.7845255656437639, |
|
"grad_norm": 0.27637991309165955, |
|
"learning_rate": 3.922872340425532e-06, |
|
"loss": 1.3383, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.7878498265151358, |
|
"grad_norm": 0.17314772307872772, |
|
"learning_rate": 3.939494680851064e-06, |
|
"loss": 1.3314, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.7911740873865076, |
|
"grad_norm": 0.3641667068004608, |
|
"learning_rate": 3.956117021276596e-06, |
|
"loss": 1.3543, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.7944983482578796, |
|
"grad_norm": 0.3088253438472748, |
|
"learning_rate": 3.972739361702128e-06, |
|
"loss": 1.3444, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7978226091292514, |
|
"grad_norm": 0.25276973843574524, |
|
"learning_rate": 3.98936170212766e-06, |
|
"loss": 1.3102, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8011468700006233, |
|
"grad_norm": 0.26414382457733154, |
|
"learning_rate": 4.005984042553192e-06, |
|
"loss": 1.3119, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8044711308719952, |
|
"grad_norm": 0.1684638261795044, |
|
"learning_rate": 4.022606382978724e-06, |
|
"loss": 1.3204, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.807795391743367, |
|
"grad_norm": 0.18500946462154388, |
|
"learning_rate": 4.039228723404256e-06, |
|
"loss": 1.3251, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.811119652614739, |
|
"grad_norm": 0.2754835784435272, |
|
"learning_rate": 4.055851063829787e-06, |
|
"loss": 1.3258, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8144439134861108, |
|
"grad_norm": 0.18949855864048004, |
|
"learning_rate": 4.072473404255319e-06, |
|
"loss": 1.3145, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8177681743574827, |
|
"grad_norm": 0.6927218437194824, |
|
"learning_rate": 4.089095744680851e-06, |
|
"loss": 1.3205, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.8210924352288546, |
|
"grad_norm": 0.36098670959472656, |
|
"learning_rate": 4.105718085106383e-06, |
|
"loss": 1.3295, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.8244166961002265, |
|
"grad_norm": 0.25839686393737793, |
|
"learning_rate": 4.1223404255319146e-06, |
|
"loss": 1.321, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.8277409569715983, |
|
"grad_norm": 0.18720127642154694, |
|
"learning_rate": 4.138962765957447e-06, |
|
"loss": 1.2975, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.8310652178429703, |
|
"grad_norm": 0.17975495755672455, |
|
"learning_rate": 4.155585106382979e-06, |
|
"loss": 1.318, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.4430265344e+18, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|