|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 10818, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0036975411351451285, |
|
"grad_norm": 589585.4375, |
|
"learning_rate": 9.981512294324275e-06, |
|
"loss": 1.1475, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007395082270290257, |
|
"grad_norm": 156054.1875, |
|
"learning_rate": 9.96302458864855e-06, |
|
"loss": 0.8189, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011092623405435386, |
|
"grad_norm": 473015.4375, |
|
"learning_rate": 9.944536882972824e-06, |
|
"loss": 0.8013, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014790164540580514, |
|
"grad_norm": 591414.4375, |
|
"learning_rate": 9.926049177297098e-06, |
|
"loss": 0.7376, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.018487705675725642, |
|
"grad_norm": 277118.34375, |
|
"learning_rate": 9.907561471621374e-06, |
|
"loss": 0.6362, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.022185246810870772, |
|
"grad_norm": 380905.34375, |
|
"learning_rate": 9.889073765945647e-06, |
|
"loss": 0.6071, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0258827879460159, |
|
"grad_norm": 484594.5625, |
|
"learning_rate": 9.870586060269921e-06, |
|
"loss": 0.6172, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.029580329081161028, |
|
"grad_norm": 651200.875, |
|
"learning_rate": 9.852098354594197e-06, |
|
"loss": 0.5612, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.033277870216306155, |
|
"grad_norm": 385674.75, |
|
"learning_rate": 9.83361064891847e-06, |
|
"loss": 0.5435, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.036975411351451284, |
|
"grad_norm": 248445.234375, |
|
"learning_rate": 9.815122943242745e-06, |
|
"loss": 0.5084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.040672952486596414, |
|
"grad_norm": 375588.0, |
|
"learning_rate": 9.79663523756702e-06, |
|
"loss": 0.5284, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.044370493621741544, |
|
"grad_norm": 255991.15625, |
|
"learning_rate": 9.778147531891294e-06, |
|
"loss": 0.5286, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.04806803475688667, |
|
"grad_norm": 428790.03125, |
|
"learning_rate": 9.759659826215568e-06, |
|
"loss": 0.5246, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0517655758920318, |
|
"grad_norm": 374524.40625, |
|
"learning_rate": 9.741172120539842e-06, |
|
"loss": 0.5035, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05546311702717693, |
|
"grad_norm": 545644.125, |
|
"learning_rate": 9.722684414864116e-06, |
|
"loss": 0.5088, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.059160658162322056, |
|
"grad_norm": 433521.78125, |
|
"learning_rate": 9.704196709188391e-06, |
|
"loss": 0.4777, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06285819929746718, |
|
"grad_norm": 1164997.25, |
|
"learning_rate": 9.685709003512665e-06, |
|
"loss": 0.4866, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06655574043261231, |
|
"grad_norm": 393447.1875, |
|
"learning_rate": 9.667221297836939e-06, |
|
"loss": 0.4095, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07025328156775744, |
|
"grad_norm": 1360361.75, |
|
"learning_rate": 9.648733592161213e-06, |
|
"loss": 0.4031, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07395082270290257, |
|
"grad_norm": 533357.375, |
|
"learning_rate": 9.630245886485488e-06, |
|
"loss": 0.3758, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0776483638380477, |
|
"grad_norm": 980064.375, |
|
"learning_rate": 9.611758180809762e-06, |
|
"loss": 0.344, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08134590497319283, |
|
"grad_norm": 656362.5625, |
|
"learning_rate": 9.593270475134036e-06, |
|
"loss": 0.3117, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08504344610833796, |
|
"grad_norm": 661492.875, |
|
"learning_rate": 9.57478276945831e-06, |
|
"loss": 0.3077, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08874098724348309, |
|
"grad_norm": 553728.25, |
|
"learning_rate": 9.556295063782585e-06, |
|
"loss": 0.324, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09243852837862822, |
|
"grad_norm": 1357614.375, |
|
"learning_rate": 9.53780735810686e-06, |
|
"loss": 0.2724, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09613606951377333, |
|
"grad_norm": 519362.1875, |
|
"learning_rate": 9.519319652431133e-06, |
|
"loss": 0.2831, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09983361064891846, |
|
"grad_norm": 637897.4375, |
|
"learning_rate": 9.500831946755409e-06, |
|
"loss": 0.2703, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1035311517840636, |
|
"grad_norm": 823037.125, |
|
"learning_rate": 9.482344241079683e-06, |
|
"loss": 0.2425, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.10722869291920872, |
|
"grad_norm": 905703.5, |
|
"learning_rate": 9.463856535403956e-06, |
|
"loss": 0.2729, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11092623405435385, |
|
"grad_norm": 1196128.75, |
|
"learning_rate": 9.445368829728232e-06, |
|
"loss": 0.2347, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11462377518949898, |
|
"grad_norm": 968333.625, |
|
"learning_rate": 9.426881124052506e-06, |
|
"loss": 0.2454, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11832131632464411, |
|
"grad_norm": 1234395.875, |
|
"learning_rate": 9.40839341837678e-06, |
|
"loss": 0.2284, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12201885745978924, |
|
"grad_norm": 767672.9375, |
|
"learning_rate": 9.389905712701055e-06, |
|
"loss": 0.2095, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12571639859493436, |
|
"grad_norm": 494605.09375, |
|
"learning_rate": 9.37141800702533e-06, |
|
"loss": 0.1952, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1294139397300795, |
|
"grad_norm": 1483855.875, |
|
"learning_rate": 9.352930301349603e-06, |
|
"loss": 0.2304, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13311148086522462, |
|
"grad_norm": 566157.4375, |
|
"learning_rate": 9.334442595673877e-06, |
|
"loss": 0.2251, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.13680902200036976, |
|
"grad_norm": 1291356.0, |
|
"learning_rate": 9.315954889998153e-06, |
|
"loss": 0.1878, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14050656313551488, |
|
"grad_norm": 664807.3125, |
|
"learning_rate": 9.297467184322426e-06, |
|
"loss": 0.1692, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14420410427066002, |
|
"grad_norm": 450389.625, |
|
"learning_rate": 9.2789794786467e-06, |
|
"loss": 0.172, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.14790164540580514, |
|
"grad_norm": 776172.4375, |
|
"learning_rate": 9.260491772970976e-06, |
|
"loss": 0.195, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15159918654095028, |
|
"grad_norm": 582122.625, |
|
"learning_rate": 9.24200406729525e-06, |
|
"loss": 0.183, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1552967276760954, |
|
"grad_norm": 631486.0, |
|
"learning_rate": 9.223516361619524e-06, |
|
"loss": 0.1687, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1589942688112405, |
|
"grad_norm": 502021.9375, |
|
"learning_rate": 9.205028655943799e-06, |
|
"loss": 0.174, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16269180994638566, |
|
"grad_norm": 882197.1875, |
|
"learning_rate": 9.186540950268073e-06, |
|
"loss": 0.1805, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.16638935108153077, |
|
"grad_norm": 642990.0, |
|
"learning_rate": 9.168053244592347e-06, |
|
"loss": 0.1752, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17008689221667592, |
|
"grad_norm": 711188.5625, |
|
"learning_rate": 9.149565538916622e-06, |
|
"loss": 0.1544, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17378443335182103, |
|
"grad_norm": 333228.09375, |
|
"learning_rate": 9.131077833240896e-06, |
|
"loss": 0.1566, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.17748197448696618, |
|
"grad_norm": 890502.75, |
|
"learning_rate": 9.11259012756517e-06, |
|
"loss": 0.1611, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.1811795156221113, |
|
"grad_norm": 1566534.125, |
|
"learning_rate": 9.094102421889444e-06, |
|
"loss": 0.1774, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18487705675725644, |
|
"grad_norm": 726765.6875, |
|
"learning_rate": 9.07561471621372e-06, |
|
"loss": 0.164, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18857459789240155, |
|
"grad_norm": 552103.1875, |
|
"learning_rate": 9.057127010537993e-06, |
|
"loss": 0.1604, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19227213902754667, |
|
"grad_norm": 454113.75, |
|
"learning_rate": 9.038639304862267e-06, |
|
"loss": 0.1568, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.1959696801626918, |
|
"grad_norm": 1157071.125, |
|
"learning_rate": 9.020151599186541e-06, |
|
"loss": 0.1486, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.19966722129783693, |
|
"grad_norm": 1144903.125, |
|
"learning_rate": 9.001663893510815e-06, |
|
"loss": 0.1451, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.20336476243298207, |
|
"grad_norm": 953759.0, |
|
"learning_rate": 8.98317618783509e-06, |
|
"loss": 0.1497, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2070623035681272, |
|
"grad_norm": 280053.78125, |
|
"learning_rate": 8.964688482159364e-06, |
|
"loss": 0.1532, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21075984470327233, |
|
"grad_norm": 725245.9375, |
|
"learning_rate": 8.946200776483638e-06, |
|
"loss": 0.1367, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21445738583841745, |
|
"grad_norm": 453623.09375, |
|
"learning_rate": 8.927713070807912e-06, |
|
"loss": 0.1498, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2181549269735626, |
|
"grad_norm": 675041.0625, |
|
"learning_rate": 8.909225365132188e-06, |
|
"loss": 0.1409, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2218524681087077, |
|
"grad_norm": 652479.5, |
|
"learning_rate": 8.890737659456462e-06, |
|
"loss": 0.1494, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22555000924385285, |
|
"grad_norm": 393038.4375, |
|
"learning_rate": 8.872249953780735e-06, |
|
"loss": 0.146, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.22924755037899797, |
|
"grad_norm": 616304.6875, |
|
"learning_rate": 8.853762248105011e-06, |
|
"loss": 0.1346, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23294509151414308, |
|
"grad_norm": 736962.0625, |
|
"learning_rate": 8.835274542429285e-06, |
|
"loss": 0.1457, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.23664263264928823, |
|
"grad_norm": 527761.5625, |
|
"learning_rate": 8.816786836753559e-06, |
|
"loss": 0.1432, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24034017378443334, |
|
"grad_norm": 593494.5, |
|
"learning_rate": 8.798299131077834e-06, |
|
"loss": 0.1282, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.24403771491957849, |
|
"grad_norm": 515513.5, |
|
"learning_rate": 8.779811425402108e-06, |
|
"loss": 0.1369, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2477352560547236, |
|
"grad_norm": 733587.375, |
|
"learning_rate": 8.761323719726382e-06, |
|
"loss": 0.1309, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.2514327971898687, |
|
"grad_norm": 660185.4375, |
|
"learning_rate": 8.742836014050658e-06, |
|
"loss": 0.1367, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.25513033832501386, |
|
"grad_norm": 692261.375, |
|
"learning_rate": 8.724348308374931e-06, |
|
"loss": 0.1438, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.258827879460159, |
|
"grad_norm": 733542.75, |
|
"learning_rate": 8.705860602699205e-06, |
|
"loss": 0.1414, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26252542059530415, |
|
"grad_norm": 569422.8125, |
|
"learning_rate": 8.687372897023481e-06, |
|
"loss": 0.1356, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.26622296173044924, |
|
"grad_norm": 737659.5625, |
|
"learning_rate": 8.668885191347755e-06, |
|
"loss": 0.1238, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.2699205028655944, |
|
"grad_norm": 588390.3125, |
|
"learning_rate": 8.650397485672029e-06, |
|
"loss": 0.1315, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2736180440007395, |
|
"grad_norm": 469582.0, |
|
"learning_rate": 8.631909779996304e-06, |
|
"loss": 0.1241, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.2773155851358846, |
|
"grad_norm": 877184.875, |
|
"learning_rate": 8.613422074320578e-06, |
|
"loss": 0.1217, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28101312627102976, |
|
"grad_norm": 610305.125, |
|
"learning_rate": 8.594934368644852e-06, |
|
"loss": 0.1197, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.2847106674061749, |
|
"grad_norm": 435616.65625, |
|
"learning_rate": 8.576446662969127e-06, |
|
"loss": 0.128, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.28840820854132004, |
|
"grad_norm": 1002288.1875, |
|
"learning_rate": 8.557958957293401e-06, |
|
"loss": 0.1177, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.29210574967646513, |
|
"grad_norm": 407610.78125, |
|
"learning_rate": 8.539471251617675e-06, |
|
"loss": 0.1296, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.2958032908116103, |
|
"grad_norm": 628192.1875, |
|
"learning_rate": 8.520983545941949e-06, |
|
"loss": 0.121, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.2995008319467554, |
|
"grad_norm": 470853.6875, |
|
"learning_rate": 8.502495840266225e-06, |
|
"loss": 0.113, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.30319837308190056, |
|
"grad_norm": 420262.5, |
|
"learning_rate": 8.484008134590499e-06, |
|
"loss": 0.1156, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.30689591421704565, |
|
"grad_norm": 729730.0, |
|
"learning_rate": 8.465520428914772e-06, |
|
"loss": 0.1227, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.3105934553521908, |
|
"grad_norm": 493498.28125, |
|
"learning_rate": 8.447032723239046e-06, |
|
"loss": 0.1208, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.31429099648733594, |
|
"grad_norm": 690842.625, |
|
"learning_rate": 8.428545017563322e-06, |
|
"loss": 0.1112, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.317988537622481, |
|
"grad_norm": 473233.1875, |
|
"learning_rate": 8.410057311887596e-06, |
|
"loss": 0.1129, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32168607875762617, |
|
"grad_norm": 780393.375, |
|
"learning_rate": 8.39156960621187e-06, |
|
"loss": 0.1126, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.3253836198927713, |
|
"grad_norm": 422343.6875, |
|
"learning_rate": 8.373081900536143e-06, |
|
"loss": 0.106, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.32908116102791646, |
|
"grad_norm": 665555.875, |
|
"learning_rate": 8.354594194860419e-06, |
|
"loss": 0.1148, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.33277870216306155, |
|
"grad_norm": 593427.9375, |
|
"learning_rate": 8.336106489184693e-06, |
|
"loss": 0.113, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3364762432982067, |
|
"grad_norm": 370478.46875, |
|
"learning_rate": 8.317618783508967e-06, |
|
"loss": 0.1072, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34017378443335183, |
|
"grad_norm": 280958.96875, |
|
"learning_rate": 8.29913107783324e-06, |
|
"loss": 0.1064, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.343871325568497, |
|
"grad_norm": 467909.65625, |
|
"learning_rate": 8.280643372157516e-06, |
|
"loss": 0.1051, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.34756886670364207, |
|
"grad_norm": 1186302.125, |
|
"learning_rate": 8.26215566648179e-06, |
|
"loss": 0.1191, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.3512664078387872, |
|
"grad_norm": 346905.46875, |
|
"learning_rate": 8.243667960806064e-06, |
|
"loss": 0.1157, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35496394897393235, |
|
"grad_norm": 639500.4375, |
|
"learning_rate": 8.22518025513034e-06, |
|
"loss": 0.1092, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.35866149010907744, |
|
"grad_norm": 398886.0625, |
|
"learning_rate": 8.206692549454613e-06, |
|
"loss": 0.0992, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.3623590312442226, |
|
"grad_norm": 551855.8125, |
|
"learning_rate": 8.188204843778887e-06, |
|
"loss": 0.1108, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.36605657237936773, |
|
"grad_norm": 520954.9375, |
|
"learning_rate": 8.169717138103163e-06, |
|
"loss": 0.1035, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.36975411351451287, |
|
"grad_norm": 667530.5625, |
|
"learning_rate": 8.151229432427437e-06, |
|
"loss": 0.0984, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37345165464965796, |
|
"grad_norm": 461173.25, |
|
"learning_rate": 8.13274172675171e-06, |
|
"loss": 0.1077, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.3771491957848031, |
|
"grad_norm": 797304.125, |
|
"learning_rate": 8.114254021075984e-06, |
|
"loss": 0.0921, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.38084673691994825, |
|
"grad_norm": 530894.0625, |
|
"learning_rate": 8.09576631540026e-06, |
|
"loss": 0.095, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.38454427805509334, |
|
"grad_norm": 551120.8125, |
|
"learning_rate": 8.077278609724534e-06, |
|
"loss": 0.1042, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3882418191902385, |
|
"grad_norm": 423327.59375, |
|
"learning_rate": 8.058790904048808e-06, |
|
"loss": 0.1086, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.3919393603253836, |
|
"grad_norm": 373503.53125, |
|
"learning_rate": 8.040303198373083e-06, |
|
"loss": 0.0966, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.39563690146052877, |
|
"grad_norm": 431499.09375, |
|
"learning_rate": 8.021815492697357e-06, |
|
"loss": 0.0996, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.39933444259567386, |
|
"grad_norm": 799203.5625, |
|
"learning_rate": 8.003327787021631e-06, |
|
"loss": 0.0988, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.403031983730819, |
|
"grad_norm": 519873.125, |
|
"learning_rate": 7.984840081345906e-06, |
|
"loss": 0.0999, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.40672952486596414, |
|
"grad_norm": 450627.4375, |
|
"learning_rate": 7.96635237567018e-06, |
|
"loss": 0.0817, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4104270660011093, |
|
"grad_norm": 854112.3125, |
|
"learning_rate": 7.947864669994454e-06, |
|
"loss": 0.0983, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.4141246071362544, |
|
"grad_norm": 616815.8125, |
|
"learning_rate": 7.92937696431873e-06, |
|
"loss": 0.099, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.4178221482713995, |
|
"grad_norm": 379120.875, |
|
"learning_rate": 7.910889258643004e-06, |
|
"loss": 0.0931, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.42151968940654466, |
|
"grad_norm": 347684.78125, |
|
"learning_rate": 7.892401552967277e-06, |
|
"loss": 0.0903, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.42521723054168975, |
|
"grad_norm": 394592.6875, |
|
"learning_rate": 7.873913847291551e-06, |
|
"loss": 0.09, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4289147716768349, |
|
"grad_norm": 406101.0625, |
|
"learning_rate": 7.855426141615827e-06, |
|
"loss": 0.1019, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.43261231281198004, |
|
"grad_norm": 392444.125, |
|
"learning_rate": 7.8369384359401e-06, |
|
"loss": 0.0828, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.4363098539471252, |
|
"grad_norm": 751139.6875, |
|
"learning_rate": 7.818450730264375e-06, |
|
"loss": 0.0922, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.44000739508227027, |
|
"grad_norm": 324115.75, |
|
"learning_rate": 7.799963024588648e-06, |
|
"loss": 0.0923, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.4437049362174154, |
|
"grad_norm": 444140.375, |
|
"learning_rate": 7.781475318912924e-06, |
|
"loss": 0.0834, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.44740247735256056, |
|
"grad_norm": 476187.125, |
|
"learning_rate": 7.762987613237198e-06, |
|
"loss": 0.0889, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.4511000184877057, |
|
"grad_norm": 511409.96875, |
|
"learning_rate": 7.744499907561472e-06, |
|
"loss": 0.0828, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4547975596228508, |
|
"grad_norm": 822892.375, |
|
"learning_rate": 7.726012201885746e-06, |
|
"loss": 0.0876, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.45849510075799593, |
|
"grad_norm": 282021.03125, |
|
"learning_rate": 7.707524496210021e-06, |
|
"loss": 0.091, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4621926418931411, |
|
"grad_norm": 585528.8125, |
|
"learning_rate": 7.689036790534295e-06, |
|
"loss": 0.0871, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46589018302828616, |
|
"grad_norm": 387105.9375, |
|
"learning_rate": 7.670549084858569e-06, |
|
"loss": 0.0808, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.4695877241634313, |
|
"grad_norm": 567624.375, |
|
"learning_rate": 7.652061379182843e-06, |
|
"loss": 0.0925, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.47328526529857645, |
|
"grad_norm": 314457.0, |
|
"learning_rate": 7.633573673507118e-06, |
|
"loss": 0.0865, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.4769828064337216, |
|
"grad_norm": 398509.59375, |
|
"learning_rate": 7.615085967831392e-06, |
|
"loss": 0.0904, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.4806803475688667, |
|
"grad_norm": 552215.5625, |
|
"learning_rate": 7.596598262155667e-06, |
|
"loss": 0.0922, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.4843778887040118, |
|
"grad_norm": 259611.421875, |
|
"learning_rate": 7.578110556479942e-06, |
|
"loss": 0.0817, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.48807542983915697, |
|
"grad_norm": 471336.875, |
|
"learning_rate": 7.5596228508042155e-06, |
|
"loss": 0.0819, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.4917729709743021, |
|
"grad_norm": 405907.1875, |
|
"learning_rate": 7.541135145128489e-06, |
|
"loss": 0.0886, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.4954705121094472, |
|
"grad_norm": 471205.25, |
|
"learning_rate": 7.522647439452765e-06, |
|
"loss": 0.0863, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.49916805324459235, |
|
"grad_norm": 357797.8125, |
|
"learning_rate": 7.504159733777039e-06, |
|
"loss": 0.0817, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5028655943797374, |
|
"grad_norm": 490397.75, |
|
"learning_rate": 7.485672028101313e-06, |
|
"loss": 0.0794, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5065631355148826, |
|
"grad_norm": 317551.46875, |
|
"learning_rate": 7.467184322425588e-06, |
|
"loss": 0.0761, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5102606766500277, |
|
"grad_norm": 391744.40625, |
|
"learning_rate": 7.448696616749862e-06, |
|
"loss": 0.0817, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5139582177851728, |
|
"grad_norm": 559249.4375, |
|
"learning_rate": 7.430208911074136e-06, |
|
"loss": 0.0803, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.517655758920318, |
|
"grad_norm": 245447.3125, |
|
"learning_rate": 7.411721205398411e-06, |
|
"loss": 0.0799, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5213533000554631, |
|
"grad_norm": 203768.40625, |
|
"learning_rate": 7.393233499722685e-06, |
|
"loss": 0.0825, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5250508411906083, |
|
"grad_norm": 256229.953125, |
|
"learning_rate": 7.374745794046959e-06, |
|
"loss": 0.0842, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5287483823257534, |
|
"grad_norm": 748886.6875, |
|
"learning_rate": 7.356258088371234e-06, |
|
"loss": 0.0829, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.5324459234608985, |
|
"grad_norm": 392705.1875, |
|
"learning_rate": 7.337770382695508e-06, |
|
"loss": 0.0853, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5361434645960437, |
|
"grad_norm": 372998.21875, |
|
"learning_rate": 7.3192826770197826e-06, |
|
"loss": 0.0763, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5398410057311888, |
|
"grad_norm": 419793.71875, |
|
"learning_rate": 7.300794971344057e-06, |
|
"loss": 0.0809, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.5435385468663338, |
|
"grad_norm": 401173.0, |
|
"learning_rate": 7.282307265668331e-06, |
|
"loss": 0.0652, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.547236088001479, |
|
"grad_norm": 270387.03125, |
|
"learning_rate": 7.263819559992605e-06, |
|
"loss": 0.0742, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5509336291366241, |
|
"grad_norm": 353803.25, |
|
"learning_rate": 7.24533185431688e-06, |
|
"loss": 0.0793, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.5546311702717692, |
|
"grad_norm": 339236.09375, |
|
"learning_rate": 7.2268441486411544e-06, |
|
"loss": 0.0802, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5583287114069144, |
|
"grad_norm": 398772.21875, |
|
"learning_rate": 7.208356442965428e-06, |
|
"loss": 0.0774, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.5620262525420595, |
|
"grad_norm": 403407.84375, |
|
"learning_rate": 7.189868737289702e-06, |
|
"loss": 0.0758, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.5657237936772047, |
|
"grad_norm": 384857.3125, |
|
"learning_rate": 7.171381031613978e-06, |
|
"loss": 0.0692, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5694213348123498, |
|
"grad_norm": 295177.6875, |
|
"learning_rate": 7.152893325938252e-06, |
|
"loss": 0.0748, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5731188759474949, |
|
"grad_norm": 320366.5625, |
|
"learning_rate": 7.1344056202625255e-06, |
|
"loss": 0.0724, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5768164170826401, |
|
"grad_norm": 455169.15625, |
|
"learning_rate": 7.115917914586801e-06, |
|
"loss": 0.0848, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5805139582177852, |
|
"grad_norm": 313038.40625, |
|
"learning_rate": 7.097430208911075e-06, |
|
"loss": 0.0803, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.5842114993529303, |
|
"grad_norm": 545048.125, |
|
"learning_rate": 7.078942503235349e-06, |
|
"loss": 0.0737, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.5879090404880755, |
|
"grad_norm": 519241.8125, |
|
"learning_rate": 7.0604547975596235e-06, |
|
"loss": 0.0718, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5916065816232206, |
|
"grad_norm": 505339.90625, |
|
"learning_rate": 7.041967091883898e-06, |
|
"loss": 0.074, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.5953041227583656, |
|
"grad_norm": 304366.46875, |
|
"learning_rate": 7.023479386208172e-06, |
|
"loss": 0.0685, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5990016638935108, |
|
"grad_norm": 573642.75, |
|
"learning_rate": 7.004991680532447e-06, |
|
"loss": 0.0674, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6026992050286559, |
|
"grad_norm": 440082.625, |
|
"learning_rate": 6.986503974856721e-06, |
|
"loss": 0.0713, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6063967461638011, |
|
"grad_norm": 435252.34375, |
|
"learning_rate": 6.968016269180995e-06, |
|
"loss": 0.0645, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6100942872989462, |
|
"grad_norm": 186664.71875, |
|
"learning_rate": 6.94952856350527e-06, |
|
"loss": 0.0687, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6137918284340913, |
|
"grad_norm": 371479.125, |
|
"learning_rate": 6.931040857829544e-06, |
|
"loss": 0.0708, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.6174893695692365, |
|
"grad_norm": 162696.5, |
|
"learning_rate": 6.912553152153818e-06, |
|
"loss": 0.0638, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6211869107043816, |
|
"grad_norm": 506640.0625, |
|
"learning_rate": 6.894065446478093e-06, |
|
"loss": 0.0707, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6248844518395267, |
|
"grad_norm": 307854.53125, |
|
"learning_rate": 6.875577740802367e-06, |
|
"loss": 0.0648, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6285819929746719, |
|
"grad_norm": 252089.015625, |
|
"learning_rate": 6.857090035126641e-06, |
|
"loss": 0.0785, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.632279534109817, |
|
"grad_norm": 218276.875, |
|
"learning_rate": 6.838602329450915e-06, |
|
"loss": 0.0679, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.635977075244962, |
|
"grad_norm": 262298.65625, |
|
"learning_rate": 6.8201146237751905e-06, |
|
"loss": 0.0725, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6396746163801073, |
|
"grad_norm": 460630.875, |
|
"learning_rate": 6.801626918099464e-06, |
|
"loss": 0.0654, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.6433721575152523, |
|
"grad_norm": 313295.375, |
|
"learning_rate": 6.783139212423738e-06, |
|
"loss": 0.0718, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.6470696986503975, |
|
"grad_norm": 425139.90625, |
|
"learning_rate": 6.764651506748014e-06, |
|
"loss": 0.0648, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6507672397855426, |
|
"grad_norm": 643511.1875, |
|
"learning_rate": 6.746163801072288e-06, |
|
"loss": 0.0667, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.6544647809206877, |
|
"grad_norm": 203010.78125, |
|
"learning_rate": 6.7276760953965615e-06, |
|
"loss": 0.0587, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.6581623220558329, |
|
"grad_norm": 738180.5, |
|
"learning_rate": 6.709188389720836e-06, |
|
"loss": 0.058, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.661859863190978, |
|
"grad_norm": 374890.40625, |
|
"learning_rate": 6.69070068404511e-06, |
|
"loss": 0.0703, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.6655574043261231, |
|
"grad_norm": 321488.34375, |
|
"learning_rate": 6.672212978369385e-06, |
|
"loss": 0.0626, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6692549454612683, |
|
"grad_norm": 421561.0625, |
|
"learning_rate": 6.6537252726936595e-06, |
|
"loss": 0.0728, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.6729524865964134, |
|
"grad_norm": 372063.625, |
|
"learning_rate": 6.635237567017933e-06, |
|
"loss": 0.0637, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.6766500277315585, |
|
"grad_norm": 261809.125, |
|
"learning_rate": 6.616749861342207e-06, |
|
"loss": 0.0619, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.6803475688667037, |
|
"grad_norm": 290989.65625, |
|
"learning_rate": 6.598262155666483e-06, |
|
"loss": 0.0657, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.6840451100018488, |
|
"grad_norm": 242986.140625, |
|
"learning_rate": 6.579774449990757e-06, |
|
"loss": 0.0576, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.687742651136994, |
|
"grad_norm": 406223.15625, |
|
"learning_rate": 6.5612867443150306e-06, |
|
"loss": 0.0599, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.691440192272139, |
|
"grad_norm": 737275.5625, |
|
"learning_rate": 6.542799038639306e-06, |
|
"loss": 0.0687, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6951377334072841, |
|
"grad_norm": 205932.21875, |
|
"learning_rate": 6.52431133296358e-06, |
|
"loss": 0.0654, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.6988352745424293, |
|
"grad_norm": 355624.5, |
|
"learning_rate": 6.505823627287854e-06, |
|
"loss": 0.0616, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7025328156775744, |
|
"grad_norm": 269638.0, |
|
"learning_rate": 6.487335921612129e-06, |
|
"loss": 0.0549, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7062303568127195, |
|
"grad_norm": 520156.78125, |
|
"learning_rate": 6.468848215936403e-06, |
|
"loss": 0.0687, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7099278979478647, |
|
"grad_norm": 435804.65625, |
|
"learning_rate": 6.450360510260677e-06, |
|
"loss": 0.0551, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.7136254390830098, |
|
"grad_norm": 287883.53125, |
|
"learning_rate": 6.431872804584951e-06, |
|
"loss": 0.0585, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.7173229802181549, |
|
"grad_norm": 513667.65625, |
|
"learning_rate": 6.413385098909226e-06, |
|
"loss": 0.0588, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.7210205213533001, |
|
"grad_norm": 560210.9375, |
|
"learning_rate": 6.3948973932335004e-06, |
|
"loss": 0.0601, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7247180624884452, |
|
"grad_norm": 570266.3125, |
|
"learning_rate": 6.376409687557774e-06, |
|
"loss": 0.0656, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7284156036235903, |
|
"grad_norm": 360799.875, |
|
"learning_rate": 6.357921981882049e-06, |
|
"loss": 0.0554, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7321131447587355, |
|
"grad_norm": 477566.0, |
|
"learning_rate": 6.339434276206323e-06, |
|
"loss": 0.0526, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.7358106858938805, |
|
"grad_norm": 333243.9375, |
|
"learning_rate": 6.320946570530598e-06, |
|
"loss": 0.0612, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.7395082270290257, |
|
"grad_norm": 318909.875, |
|
"learning_rate": 6.302458864854872e-06, |
|
"loss": 0.0621, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7432057681641708, |
|
"grad_norm": 359029.1875, |
|
"learning_rate": 6.283971159179146e-06, |
|
"loss": 0.0536, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.7469033092993159, |
|
"grad_norm": 640655.25, |
|
"learning_rate": 6.26548345350342e-06, |
|
"loss": 0.0546, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.7506008504344611, |
|
"grad_norm": 488760.09375, |
|
"learning_rate": 6.246995747827696e-06, |
|
"loss": 0.058, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.7542983915696062, |
|
"grad_norm": 312268.46875, |
|
"learning_rate": 6.2285080421519695e-06, |
|
"loss": 0.0598, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.7579959327047513, |
|
"grad_norm": 342215.8125, |
|
"learning_rate": 6.210020336476243e-06, |
|
"loss": 0.0571, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7616934738398965, |
|
"grad_norm": 287534.5625, |
|
"learning_rate": 6.191532630800519e-06, |
|
"loss": 0.0592, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.7653910149750416, |
|
"grad_norm": 189238.671875, |
|
"learning_rate": 6.173044925124793e-06, |
|
"loss": 0.0557, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.7690885561101867, |
|
"grad_norm": 267354.3125, |
|
"learning_rate": 6.154557219449067e-06, |
|
"loss": 0.052, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.7727860972453319, |
|
"grad_norm": 343019.71875, |
|
"learning_rate": 6.136069513773341e-06, |
|
"loss": 0.0552, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.776483638380477, |
|
"grad_norm": 232639.453125, |
|
"learning_rate": 6.117581808097616e-06, |
|
"loss": 0.0547, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7801811795156222, |
|
"grad_norm": 282819.8125, |
|
"learning_rate": 6.09909410242189e-06, |
|
"loss": 0.0562, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.7838787206507672, |
|
"grad_norm": 230195.125, |
|
"learning_rate": 6.080606396746165e-06, |
|
"loss": 0.06, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.7875762617859123, |
|
"grad_norm": 301125.625, |
|
"learning_rate": 6.0621186910704385e-06, |
|
"loss": 0.0533, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.7912738029210575, |
|
"grad_norm": 460118.625, |
|
"learning_rate": 6.043630985394713e-06, |
|
"loss": 0.0517, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.7949713440562026, |
|
"grad_norm": 432589.125, |
|
"learning_rate": 6.025143279718987e-06, |
|
"loss": 0.0562, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7986688851913477, |
|
"grad_norm": 237171.75, |
|
"learning_rate": 6.006655574043262e-06, |
|
"loss": 0.0554, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8023664263264929, |
|
"grad_norm": 216067.484375, |
|
"learning_rate": 5.988167868367536e-06, |
|
"loss": 0.0521, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.806063967461638, |
|
"grad_norm": 472261.09375, |
|
"learning_rate": 5.9696801626918095e-06, |
|
"loss": 0.0574, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.8097615085967831, |
|
"grad_norm": 278600.25, |
|
"learning_rate": 5.951192457016085e-06, |
|
"loss": 0.0563, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.8134590497319283, |
|
"grad_norm": 308273.0625, |
|
"learning_rate": 5.932704751340359e-06, |
|
"loss": 0.0503, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8171565908670734, |
|
"grad_norm": 367411.625, |
|
"learning_rate": 5.914217045664633e-06, |
|
"loss": 0.0591, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.8208541320022186, |
|
"grad_norm": 246328.625, |
|
"learning_rate": 5.895729339988908e-06, |
|
"loss": 0.0505, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.8245516731373637, |
|
"grad_norm": 208687.65625, |
|
"learning_rate": 5.877241634313182e-06, |
|
"loss": 0.0514, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.8282492142725087, |
|
"grad_norm": 188789.828125, |
|
"learning_rate": 5.858753928637456e-06, |
|
"loss": 0.0484, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.831946755407654, |
|
"grad_norm": 565020.375, |
|
"learning_rate": 5.840266222961732e-06, |
|
"loss": 0.0507, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.835644296542799, |
|
"grad_norm": 253717.375, |
|
"learning_rate": 5.8217785172860055e-06, |
|
"loss": 0.052, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.8393418376779441, |
|
"grad_norm": 240820.609375, |
|
"learning_rate": 5.803290811610279e-06, |
|
"loss": 0.0453, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.8430393788130893, |
|
"grad_norm": 285280.9375, |
|
"learning_rate": 5.784803105934554e-06, |
|
"loss": 0.0462, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.8467369199482344, |
|
"grad_norm": 496788.34375, |
|
"learning_rate": 5.766315400258828e-06, |
|
"loss": 0.0494, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.8504344610833795, |
|
"grad_norm": 270688.40625, |
|
"learning_rate": 5.747827694583103e-06, |
|
"loss": 0.0551, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8541320022185247, |
|
"grad_norm": 352414.625, |
|
"learning_rate": 5.729339988907377e-06, |
|
"loss": 0.0493, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.8578295433536698, |
|
"grad_norm": 236690.390625, |
|
"learning_rate": 5.710852283231651e-06, |
|
"loss": 0.0504, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.861527084488815, |
|
"grad_norm": 354131.8125, |
|
"learning_rate": 5.692364577555925e-06, |
|
"loss": 0.0505, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.8652246256239601, |
|
"grad_norm": 200457.0625, |
|
"learning_rate": 5.673876871880201e-06, |
|
"loss": 0.04, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.8689221667591052, |
|
"grad_norm": 311071.1875, |
|
"learning_rate": 5.6553891662044746e-06, |
|
"loss": 0.0487, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8726197078942504, |
|
"grad_norm": 159538.90625, |
|
"learning_rate": 5.6369014605287484e-06, |
|
"loss": 0.0405, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.8763172490293955, |
|
"grad_norm": 135958.078125, |
|
"learning_rate": 5.618413754853022e-06, |
|
"loss": 0.0425, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.8800147901645405, |
|
"grad_norm": 377985.625, |
|
"learning_rate": 5.599926049177298e-06, |
|
"loss": 0.0478, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.8837123312996857, |
|
"grad_norm": 351438.78125, |
|
"learning_rate": 5.581438343501572e-06, |
|
"loss": 0.0432, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.8874098724348308, |
|
"grad_norm": 203324.046875, |
|
"learning_rate": 5.562950637825846e-06, |
|
"loss": 0.0452, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8911074135699759, |
|
"grad_norm": 265801.84375, |
|
"learning_rate": 5.544462932150121e-06, |
|
"loss": 0.0458, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.8948049547051211, |
|
"grad_norm": 237225.953125, |
|
"learning_rate": 5.525975226474395e-06, |
|
"loss": 0.0441, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.8985024958402662, |
|
"grad_norm": 248215.796875, |
|
"learning_rate": 5.507487520798669e-06, |
|
"loss": 0.0464, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9022000369754114, |
|
"grad_norm": 216602.59375, |
|
"learning_rate": 5.488999815122944e-06, |
|
"loss": 0.0496, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9058975781105565, |
|
"grad_norm": 212512.0, |
|
"learning_rate": 5.470512109447218e-06, |
|
"loss": 0.0426, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9095951192457016, |
|
"grad_norm": 264672.21875, |
|
"learning_rate": 5.452024403771492e-06, |
|
"loss": 0.0431, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.9132926603808468, |
|
"grad_norm": 195740.625, |
|
"learning_rate": 5.433536698095767e-06, |
|
"loss": 0.0454, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.9169902015159919, |
|
"grad_norm": 239070.890625, |
|
"learning_rate": 5.415048992420041e-06, |
|
"loss": 0.0388, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.920687742651137, |
|
"grad_norm": 327097.84375, |
|
"learning_rate": 5.3965612867443155e-06, |
|
"loss": 0.0418, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.9243852837862822, |
|
"grad_norm": 319707.625, |
|
"learning_rate": 5.37807358106859e-06, |
|
"loss": 0.0519, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.9280828249214272, |
|
"grad_norm": 166737.625, |
|
"learning_rate": 5.359585875392864e-06, |
|
"loss": 0.0437, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.9317803660565723, |
|
"grad_norm": 489044.0625, |
|
"learning_rate": 5.341098169717138e-06, |
|
"loss": 0.0438, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.9354779071917175, |
|
"grad_norm": 288754.90625, |
|
"learning_rate": 5.3226104640414135e-06, |
|
"loss": 0.0447, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.9391754483268626, |
|
"grad_norm": 205332.171875, |
|
"learning_rate": 5.304122758365687e-06, |
|
"loss": 0.044, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.9428729894620078, |
|
"grad_norm": 266008.21875, |
|
"learning_rate": 5.285635052689961e-06, |
|
"loss": 0.0448, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9465705305971529, |
|
"grad_norm": 433041.125, |
|
"learning_rate": 5.267147347014237e-06, |
|
"loss": 0.0428, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.950268071732298, |
|
"grad_norm": 176340.359375, |
|
"learning_rate": 5.248659641338511e-06, |
|
"loss": 0.041, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.9539656128674432, |
|
"grad_norm": 238454.6875, |
|
"learning_rate": 5.2301719356627845e-06, |
|
"loss": 0.0372, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.9576631540025883, |
|
"grad_norm": 269030.46875, |
|
"learning_rate": 5.211684229987059e-06, |
|
"loss": 0.0461, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.9613606951377334, |
|
"grad_norm": 270103.5, |
|
"learning_rate": 5.193196524311334e-06, |
|
"loss": 0.039, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9650582362728786, |
|
"grad_norm": 152555.5625, |
|
"learning_rate": 5.174708818635608e-06, |
|
"loss": 0.0425, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.9687557774080237, |
|
"grad_norm": 308841.71875, |
|
"learning_rate": 5.156221112959882e-06, |
|
"loss": 0.0419, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.9724533185431687, |
|
"grad_norm": 298958.28125, |
|
"learning_rate": 5.137733407284156e-06, |
|
"loss": 0.043, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.9761508596783139, |
|
"grad_norm": 293175.53125, |
|
"learning_rate": 5.119245701608431e-06, |
|
"loss": 0.0488, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.979848400813459, |
|
"grad_norm": 306048.71875, |
|
"learning_rate": 5.100757995932705e-06, |
|
"loss": 0.0414, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.9835459419486042, |
|
"grad_norm": 263478.65625, |
|
"learning_rate": 5.08227029025698e-06, |
|
"loss": 0.0377, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.9872434830837493, |
|
"grad_norm": 269423.96875, |
|
"learning_rate": 5.0637825845812535e-06, |
|
"loss": 0.042, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.9909410242188944, |
|
"grad_norm": 373293.375, |
|
"learning_rate": 5.045294878905527e-06, |
|
"loss": 0.0404, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.9946385653540396, |
|
"grad_norm": 230399.25, |
|
"learning_rate": 5.026807173229803e-06, |
|
"loss": 0.0363, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.9983361064891847, |
|
"grad_norm": 297080.53125, |
|
"learning_rate": 5.008319467554077e-06, |
|
"loss": 0.0368, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.00203364762433, |
|
"grad_norm": 193461.09375, |
|
"learning_rate": 4.9898317618783515e-06, |
|
"loss": 0.0318, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.0057311887594749, |
|
"grad_norm": 313585.34375, |
|
"learning_rate": 4.971344056202625e-06, |
|
"loss": 0.0321, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.00942872989462, |
|
"grad_norm": 305049.40625, |
|
"learning_rate": 4.9528563505269e-06, |
|
"loss": 0.0319, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.0131262710297653, |
|
"grad_norm": 184038.984375, |
|
"learning_rate": 4.934368644851175e-06, |
|
"loss": 0.0337, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.0168238121649102, |
|
"grad_norm": 250623.484375, |
|
"learning_rate": 4.915880939175449e-06, |
|
"loss": 0.0339, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.0205213533000554, |
|
"grad_norm": 389090.90625, |
|
"learning_rate": 4.897393233499723e-06, |
|
"loss": 0.0361, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.0242188944352006, |
|
"grad_norm": 496750.34375, |
|
"learning_rate": 4.878905527823997e-06, |
|
"loss": 0.0345, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.0279164355703456, |
|
"grad_norm": 173545.296875, |
|
"learning_rate": 4.860417822148272e-06, |
|
"loss": 0.0335, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.0316139767054908, |
|
"grad_norm": 135819.625, |
|
"learning_rate": 4.841930116472547e-06, |
|
"loss": 0.0341, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.035311517840636, |
|
"grad_norm": 331638.21875, |
|
"learning_rate": 4.8234424107968206e-06, |
|
"loss": 0.031, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.0390090589757812, |
|
"grad_norm": 129892.90625, |
|
"learning_rate": 4.8049547051210944e-06, |
|
"loss": 0.0322, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.0427066001109262, |
|
"grad_norm": 185758.609375, |
|
"learning_rate": 4.786466999445369e-06, |
|
"loss": 0.0329, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.0464041412460714, |
|
"grad_norm": 123809.5390625, |
|
"learning_rate": 4.767979293769643e-06, |
|
"loss": 0.0321, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.0501016823812166, |
|
"grad_norm": 254882.265625, |
|
"learning_rate": 4.749491588093918e-06, |
|
"loss": 0.0345, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.0537992235163616, |
|
"grad_norm": 243718.71875, |
|
"learning_rate": 4.7310038824181924e-06, |
|
"loss": 0.0338, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.0574967646515068, |
|
"grad_norm": 282382.6875, |
|
"learning_rate": 4.712516176742466e-06, |
|
"loss": 0.0314, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.061194305786652, |
|
"grad_norm": 224000.4375, |
|
"learning_rate": 4.694028471066741e-06, |
|
"loss": 0.0276, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.064891846921797, |
|
"grad_norm": 134676.859375, |
|
"learning_rate": 4.675540765391015e-06, |
|
"loss": 0.0313, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.0685893880569421, |
|
"grad_norm": 277227.3125, |
|
"learning_rate": 4.65705305971529e-06, |
|
"loss": 0.0312, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.0722869291920873, |
|
"grad_norm": 193661.1875, |
|
"learning_rate": 4.638565354039564e-06, |
|
"loss": 0.0328, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.0759844703272323, |
|
"grad_norm": 240792.234375, |
|
"learning_rate": 4.620077648363838e-06, |
|
"loss": 0.0299, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.0796820114623775, |
|
"grad_norm": 205885.5625, |
|
"learning_rate": 4.601589942688113e-06, |
|
"loss": 0.0313, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.0833795525975227, |
|
"grad_norm": 325387.34375, |
|
"learning_rate": 4.583102237012388e-06, |
|
"loss": 0.033, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.0870770937326677, |
|
"grad_norm": 141092.296875, |
|
"learning_rate": 4.5646145313366615e-06, |
|
"loss": 0.0339, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.090774634867813, |
|
"grad_norm": 571566.5625, |
|
"learning_rate": 4.546126825660936e-06, |
|
"loss": 0.0336, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.094472176002958, |
|
"grad_norm": 176920.59375, |
|
"learning_rate": 4.52763911998521e-06, |
|
"loss": 0.0312, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.098169717138103, |
|
"grad_norm": 310769.78125, |
|
"learning_rate": 4.509151414309485e-06, |
|
"loss": 0.0324, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.1018672582732483, |
|
"grad_norm": 385363.90625, |
|
"learning_rate": 4.490663708633759e-06, |
|
"loss": 0.0286, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.1055647994083935, |
|
"grad_norm": 245921.84375, |
|
"learning_rate": 4.472176002958033e-06, |
|
"loss": 0.0307, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.1092623405435384, |
|
"grad_norm": 295249.21875, |
|
"learning_rate": 4.453688297282307e-06, |
|
"loss": 0.0275, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.1129598816786836, |
|
"grad_norm": 200136.734375, |
|
"learning_rate": 4.435200591606582e-06, |
|
"loss": 0.0321, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.1166574228138288, |
|
"grad_norm": 317951.15625, |
|
"learning_rate": 4.416712885930856e-06, |
|
"loss": 0.0295, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.1203549639489738, |
|
"grad_norm": 407278.71875, |
|
"learning_rate": 4.3982251802551305e-06, |
|
"loss": 0.0262, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.124052505084119, |
|
"grad_norm": 197725.1875, |
|
"learning_rate": 4.379737474579405e-06, |
|
"loss": 0.0254, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.1277500462192642, |
|
"grad_norm": 209825.9375, |
|
"learning_rate": 4.361249768903679e-06, |
|
"loss": 0.0283, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.1314475873544092, |
|
"grad_norm": 165184.359375, |
|
"learning_rate": 4.342762063227954e-06, |
|
"loss": 0.0265, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.1351451284895544, |
|
"grad_norm": 176458.5, |
|
"learning_rate": 4.3242743575522285e-06, |
|
"loss": 0.0285, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.1388426696246996, |
|
"grad_norm": 177583.390625, |
|
"learning_rate": 4.305786651876502e-06, |
|
"loss": 0.0295, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.1425402107598448, |
|
"grad_norm": 229201.890625, |
|
"learning_rate": 4.287298946200777e-06, |
|
"loss": 0.0279, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.1462377518949898, |
|
"grad_norm": 407142.15625, |
|
"learning_rate": 4.268811240525051e-06, |
|
"loss": 0.031, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.149935293030135, |
|
"grad_norm": 150456.34375, |
|
"learning_rate": 4.250323534849326e-06, |
|
"loss": 0.0315, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.1536328341652802, |
|
"grad_norm": 148167.5, |
|
"learning_rate": 4.2318358291736e-06, |
|
"loss": 0.0274, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.1573303753004252, |
|
"grad_norm": 207488.640625, |
|
"learning_rate": 4.213348123497874e-06, |
|
"loss": 0.0285, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.1610279164355704, |
|
"grad_norm": 193723.734375, |
|
"learning_rate": 4.194860417822149e-06, |
|
"loss": 0.0273, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.1647254575707155, |
|
"grad_norm": 367727.40625, |
|
"learning_rate": 4.176372712146423e-06, |
|
"loss": 0.0286, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.1684229987058605, |
|
"grad_norm": 170541.34375, |
|
"learning_rate": 4.1578850064706975e-06, |
|
"loss": 0.0242, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.1721205398410057, |
|
"grad_norm": 183535.828125, |
|
"learning_rate": 4.139397300794971e-06, |
|
"loss": 0.0262, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.175818080976151, |
|
"grad_norm": 286020.03125, |
|
"learning_rate": 4.120909595119246e-06, |
|
"loss": 0.0291, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.179515622111296, |
|
"grad_norm": 173424.8125, |
|
"learning_rate": 4.10242188944352e-06, |
|
"loss": 0.0259, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.183213163246441, |
|
"grad_norm": 127936.703125, |
|
"learning_rate": 4.083934183767795e-06, |
|
"loss": 0.0261, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.1869107043815863, |
|
"grad_norm": 293457.0625, |
|
"learning_rate": 4.065446478092069e-06, |
|
"loss": 0.0289, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.1906082455167313, |
|
"grad_norm": 175406.0625, |
|
"learning_rate": 4.046958772416343e-06, |
|
"loss": 0.0281, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.1943057866518765, |
|
"grad_norm": 163937.984375, |
|
"learning_rate": 4.028471066740618e-06, |
|
"loss": 0.0247, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.1980033277870217, |
|
"grad_norm": 229977.171875, |
|
"learning_rate": 4.009983361064892e-06, |
|
"loss": 0.026, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.2017008689221669, |
|
"grad_norm": 343183.40625, |
|
"learning_rate": 3.991495655389167e-06, |
|
"loss": 0.0305, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2053984100573119, |
|
"grad_norm": 303342.8125, |
|
"learning_rate": 3.973007949713441e-06, |
|
"loss": 0.0269, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.209095951192457, |
|
"grad_norm": 175805.6875, |
|
"learning_rate": 3.954520244037715e-06, |
|
"loss": 0.0267, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.2127934923276023, |
|
"grad_norm": 266918.03125, |
|
"learning_rate": 3.93603253836199e-06, |
|
"loss": 0.03, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.2164910334627472, |
|
"grad_norm": 81738.5546875, |
|
"learning_rate": 3.917544832686265e-06, |
|
"loss": 0.0239, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.2201885745978924, |
|
"grad_norm": 159759.4375, |
|
"learning_rate": 3.8990571270105384e-06, |
|
"loss": 0.0262, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.2238861157330376, |
|
"grad_norm": 131182.578125, |
|
"learning_rate": 3.880569421334812e-06, |
|
"loss": 0.0252, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.2275836568681826, |
|
"grad_norm": 215293.34375, |
|
"learning_rate": 3.862081715659087e-06, |
|
"loss": 0.025, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.2312811980033278, |
|
"grad_norm": 246891.640625, |
|
"learning_rate": 3.843594009983361e-06, |
|
"loss": 0.0267, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.234978739138473, |
|
"grad_norm": 203203.875, |
|
"learning_rate": 3.825106304307636e-06, |
|
"loss": 0.0269, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.238676280273618, |
|
"grad_norm": 218348.234375, |
|
"learning_rate": 3.80661859863191e-06, |
|
"loss": 0.0264, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.2423738214087632, |
|
"grad_norm": 378088.65625, |
|
"learning_rate": 3.7881308929561846e-06, |
|
"loss": 0.0262, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.2460713625439084, |
|
"grad_norm": 142539.5625, |
|
"learning_rate": 3.769643187280459e-06, |
|
"loss": 0.0226, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.2497689036790534, |
|
"grad_norm": 285223.40625, |
|
"learning_rate": 3.7511554816047328e-06, |
|
"loss": 0.0256, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.2534664448141986, |
|
"grad_norm": 355714.0625, |
|
"learning_rate": 3.7326677759290075e-06, |
|
"loss": 0.0244, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.2571639859493438, |
|
"grad_norm": 227273.59375, |
|
"learning_rate": 3.714180070253282e-06, |
|
"loss": 0.027, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.2608615270844887, |
|
"grad_norm": 155393.6875, |
|
"learning_rate": 3.695692364577556e-06, |
|
"loss": 0.0203, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.264559068219634, |
|
"grad_norm": 325504.15625, |
|
"learning_rate": 3.6772046589018308e-06, |
|
"loss": 0.0245, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.2682566093547791, |
|
"grad_norm": 240933.125, |
|
"learning_rate": 3.658716953226105e-06, |
|
"loss": 0.0258, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.271954150489924, |
|
"grad_norm": 106693.859375, |
|
"learning_rate": 3.6402292475503794e-06, |
|
"loss": 0.0226, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.2756516916250693, |
|
"grad_norm": 146650.640625, |
|
"learning_rate": 3.6217415418746536e-06, |
|
"loss": 0.0194, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.2793492327602145, |
|
"grad_norm": 310544.5625, |
|
"learning_rate": 3.603253836198928e-06, |
|
"loss": 0.0273, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.2830467738953595, |
|
"grad_norm": 176867.46875, |
|
"learning_rate": 3.5847661305232022e-06, |
|
"loss": 0.0245, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.2867443150305047, |
|
"grad_norm": 128429.8125, |
|
"learning_rate": 3.566278424847477e-06, |
|
"loss": 0.022, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.2904418561656499, |
|
"grad_norm": 139811.3125, |
|
"learning_rate": 3.547790719171751e-06, |
|
"loss": 0.0229, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.2941393973007949, |
|
"grad_norm": 252133.390625, |
|
"learning_rate": 3.5293030134960255e-06, |
|
"loss": 0.0254, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.29783693843594, |
|
"grad_norm": 151203.578125, |
|
"learning_rate": 3.5108153078203e-06, |
|
"loss": 0.0232, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.3015344795710853, |
|
"grad_norm": 147495.046875, |
|
"learning_rate": 3.492327602144574e-06, |
|
"loss": 0.0222, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.3052320207062302, |
|
"grad_norm": 278537.0, |
|
"learning_rate": 3.4738398964688484e-06, |
|
"loss": 0.0204, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.3089295618413754, |
|
"grad_norm": 201168.078125, |
|
"learning_rate": 3.455352190793123e-06, |
|
"loss": 0.0258, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.3126271029765206, |
|
"grad_norm": 291186.3125, |
|
"learning_rate": 3.436864485117397e-06, |
|
"loss": 0.0255, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.3163246441116656, |
|
"grad_norm": 105182.578125, |
|
"learning_rate": 3.4183767794416717e-06, |
|
"loss": 0.0213, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.3200221852468108, |
|
"grad_norm": 192964.09375, |
|
"learning_rate": 3.3998890737659455e-06, |
|
"loss": 0.0234, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.323719726381956, |
|
"grad_norm": 187349.546875, |
|
"learning_rate": 3.3814013680902203e-06, |
|
"loss": 0.0231, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.3274172675171012, |
|
"grad_norm": 91186.453125, |
|
"learning_rate": 3.362913662414495e-06, |
|
"loss": 0.0216, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.3311148086522462, |
|
"grad_norm": 197646.78125, |
|
"learning_rate": 3.344425956738769e-06, |
|
"loss": 0.0216, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.3348123497873914, |
|
"grad_norm": 113568.28125, |
|
"learning_rate": 3.3259382510630435e-06, |
|
"loss": 0.0167, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.3385098909225366, |
|
"grad_norm": 171078.3125, |
|
"learning_rate": 3.307450545387318e-06, |
|
"loss": 0.0245, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.3422074320576816, |
|
"grad_norm": 280644.15625, |
|
"learning_rate": 3.2889628397115917e-06, |
|
"loss": 0.0196, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.3459049731928268, |
|
"grad_norm": 107269.703125, |
|
"learning_rate": 3.2704751340358664e-06, |
|
"loss": 0.0191, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.349602514327972, |
|
"grad_norm": 152088.6875, |
|
"learning_rate": 3.251987428360141e-06, |
|
"loss": 0.0214, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.3533000554631172, |
|
"grad_norm": 169087.515625, |
|
"learning_rate": 3.233499722684415e-06, |
|
"loss": 0.0226, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.3569975965982621, |
|
"grad_norm": 121175.640625, |
|
"learning_rate": 3.2150120170086897e-06, |
|
"loss": 0.0209, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.3606951377334073, |
|
"grad_norm": 149657.40625, |
|
"learning_rate": 3.1965243113329636e-06, |
|
"loss": 0.0188, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.3643926788685525, |
|
"grad_norm": 102583.3125, |
|
"learning_rate": 3.1780366056572383e-06, |
|
"loss": 0.0227, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.3680902200036975, |
|
"grad_norm": 220809.03125, |
|
"learning_rate": 3.1595488999815126e-06, |
|
"loss": 0.0184, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.3717877611388427, |
|
"grad_norm": 164637.875, |
|
"learning_rate": 3.141061194305787e-06, |
|
"loss": 0.0206, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.375485302273988, |
|
"grad_norm": 160252.15625, |
|
"learning_rate": 3.122573488630061e-06, |
|
"loss": 0.0179, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.3791828434091329, |
|
"grad_norm": 113583.3984375, |
|
"learning_rate": 3.104085782954336e-06, |
|
"loss": 0.0191, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.382880384544278, |
|
"grad_norm": 171301.390625, |
|
"learning_rate": 3.0855980772786097e-06, |
|
"loss": 0.0189, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.3865779256794233, |
|
"grad_norm": 141760.046875, |
|
"learning_rate": 3.0671103716028845e-06, |
|
"loss": 0.0189, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.3902754668145683, |
|
"grad_norm": 164532.734375, |
|
"learning_rate": 3.0486226659271587e-06, |
|
"loss": 0.0211, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.3939730079497135, |
|
"grad_norm": 201083.53125, |
|
"learning_rate": 3.030134960251433e-06, |
|
"loss": 0.0203, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.3976705490848587, |
|
"grad_norm": 114150.078125, |
|
"learning_rate": 3.0116472545757073e-06, |
|
"loss": 0.0202, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.4013680902200036, |
|
"grad_norm": 105227.3046875, |
|
"learning_rate": 2.9931595488999816e-06, |
|
"loss": 0.0179, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.4050656313551488, |
|
"grad_norm": 137791.65625, |
|
"learning_rate": 2.974671843224256e-06, |
|
"loss": 0.0178, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.408763172490294, |
|
"grad_norm": 93458.9921875, |
|
"learning_rate": 2.9561841375485306e-06, |
|
"loss": 0.0188, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.412460713625439, |
|
"grad_norm": 132947.59375, |
|
"learning_rate": 2.9376964318728045e-06, |
|
"loss": 0.0231, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.4161582547605842, |
|
"grad_norm": 137827.40625, |
|
"learning_rate": 2.919208726197079e-06, |
|
"loss": 0.0188, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.4198557958957294, |
|
"grad_norm": 221912.953125, |
|
"learning_rate": 2.900721020521354e-06, |
|
"loss": 0.0192, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.4235533370308744, |
|
"grad_norm": 167596.15625, |
|
"learning_rate": 2.8822333148456278e-06, |
|
"loss": 0.0188, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.4272508781660196, |
|
"grad_norm": 222441.71875, |
|
"learning_rate": 2.8637456091699025e-06, |
|
"loss": 0.0215, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.4309484193011648, |
|
"grad_norm": 133030.28125, |
|
"learning_rate": 2.8452579034941768e-06, |
|
"loss": 0.0175, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.4346459604363098, |
|
"grad_norm": 170401.625, |
|
"learning_rate": 2.826770197818451e-06, |
|
"loss": 0.0184, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.438343501571455, |
|
"grad_norm": 249026.03125, |
|
"learning_rate": 2.8082824921427254e-06, |
|
"loss": 0.0187, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.4420410427066002, |
|
"grad_norm": 153504.90625, |
|
"learning_rate": 2.7897947864669992e-06, |
|
"loss": 0.0184, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.4457385838417451, |
|
"grad_norm": 116464.4765625, |
|
"learning_rate": 2.771307080791274e-06, |
|
"loss": 0.0195, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.4494361249768903, |
|
"grad_norm": 134998.984375, |
|
"learning_rate": 2.7528193751155486e-06, |
|
"loss": 0.0191, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.4531336661120355, |
|
"grad_norm": 177865.984375, |
|
"learning_rate": 2.7343316694398225e-06, |
|
"loss": 0.0169, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.4568312072471805, |
|
"grad_norm": 217818.40625, |
|
"learning_rate": 2.7158439637640972e-06, |
|
"loss": 0.0165, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.4605287483823257, |
|
"grad_norm": 267455.9375, |
|
"learning_rate": 2.6973562580883715e-06, |
|
"loss": 0.0202, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.464226289517471, |
|
"grad_norm": 91404.4296875, |
|
"learning_rate": 2.678868552412646e-06, |
|
"loss": 0.017, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.467923830652616, |
|
"grad_norm": 253760.109375, |
|
"learning_rate": 2.66038084673692e-06, |
|
"loss": 0.0157, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.471621371787761, |
|
"grad_norm": 108044.6015625, |
|
"learning_rate": 2.641893141061195e-06, |
|
"loss": 0.0187, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.4753189129229063, |
|
"grad_norm": 114390.0, |
|
"learning_rate": 2.6234054353854687e-06, |
|
"loss": 0.0168, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.4790164540580513, |
|
"grad_norm": 213150.25, |
|
"learning_rate": 2.6049177297097434e-06, |
|
"loss": 0.0165, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.4827139951931965, |
|
"grad_norm": 124014.53125, |
|
"learning_rate": 2.5864300240340173e-06, |
|
"loss": 0.0138, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.4864115363283417, |
|
"grad_norm": 125034.8984375, |
|
"learning_rate": 2.567942318358292e-06, |
|
"loss": 0.0151, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.4901090774634869, |
|
"grad_norm": 93910.4453125, |
|
"learning_rate": 2.5494546126825663e-06, |
|
"loss": 0.0175, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.4938066185986318, |
|
"grad_norm": 99099.5390625, |
|
"learning_rate": 2.5309669070068405e-06, |
|
"loss": 0.017, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.497504159733777, |
|
"grad_norm": 101766.75, |
|
"learning_rate": 2.512479201331115e-06, |
|
"loss": 0.0158, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.501201700868922, |
|
"grad_norm": 146969.875, |
|
"learning_rate": 2.493991495655389e-06, |
|
"loss": 0.0171, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.5048992420040674, |
|
"grad_norm": 113886.953125, |
|
"learning_rate": 2.475503789979664e-06, |
|
"loss": 0.0157, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.5085967831392124, |
|
"grad_norm": 310139.90625, |
|
"learning_rate": 2.457016084303938e-06, |
|
"loss": 0.0163, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.5122943242743574, |
|
"grad_norm": 78983.7890625, |
|
"learning_rate": 2.4385283786282124e-06, |
|
"loss": 0.0177, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.5159918654095028, |
|
"grad_norm": 155035.984375, |
|
"learning_rate": 2.4200406729524867e-06, |
|
"loss": 0.0172, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.5196894065446478, |
|
"grad_norm": 111692.59375, |
|
"learning_rate": 2.4015529672767614e-06, |
|
"loss": 0.0135, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.523386947679793, |
|
"grad_norm": 89581.5546875, |
|
"learning_rate": 2.3830652616010357e-06, |
|
"loss": 0.016, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.5270844888149382, |
|
"grad_norm": 74326.9140625, |
|
"learning_rate": 2.36457755592531e-06, |
|
"loss": 0.0146, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.5307820299500832, |
|
"grad_norm": 86257.5625, |
|
"learning_rate": 2.3460898502495843e-06, |
|
"loss": 0.0158, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.5344795710852284, |
|
"grad_norm": 204998.53125, |
|
"learning_rate": 2.3276021445738586e-06, |
|
"loss": 0.0166, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.5381771122203736, |
|
"grad_norm": 131477.296875, |
|
"learning_rate": 2.309114438898133e-06, |
|
"loss": 0.0163, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.5418746533555185, |
|
"grad_norm": 62000.34375, |
|
"learning_rate": 2.290626733222407e-06, |
|
"loss": 0.0141, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.5455721944906637, |
|
"grad_norm": 131954.5, |
|
"learning_rate": 2.272139027546682e-06, |
|
"loss": 0.0143, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.549269735625809, |
|
"grad_norm": 176431.921875, |
|
"learning_rate": 2.253651321870956e-06, |
|
"loss": 0.014, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.552967276760954, |
|
"grad_norm": 168082.734375, |
|
"learning_rate": 2.2351636161952305e-06, |
|
"loss": 0.0163, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.5566648178960991, |
|
"grad_norm": 213557.40625, |
|
"learning_rate": 2.2166759105195047e-06, |
|
"loss": 0.0138, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.5603623590312443, |
|
"grad_norm": 149194.671875, |
|
"learning_rate": 2.198188204843779e-06, |
|
"loss": 0.0145, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.5640599001663893, |
|
"grad_norm": 108979.765625, |
|
"learning_rate": 2.1797004991680533e-06, |
|
"loss": 0.015, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.5677574413015345, |
|
"grad_norm": 113606.09375, |
|
"learning_rate": 2.1612127934923276e-06, |
|
"loss": 0.0148, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.5714549824366797, |
|
"grad_norm": 54511.078125, |
|
"learning_rate": 2.1427250878166023e-06, |
|
"loss": 0.013, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.5751525235718247, |
|
"grad_norm": 223038.484375, |
|
"learning_rate": 2.1242373821408766e-06, |
|
"loss": 0.0123, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.5788500647069699, |
|
"grad_norm": 81127.296875, |
|
"learning_rate": 2.105749676465151e-06, |
|
"loss": 0.0179, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.582547605842115, |
|
"grad_norm": 205924.984375, |
|
"learning_rate": 2.087261970789425e-06, |
|
"loss": 0.0139, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.58624514697726, |
|
"grad_norm": 136897.609375, |
|
"learning_rate": 2.0687742651136995e-06, |
|
"loss": 0.013, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.5899426881124052, |
|
"grad_norm": 74720.484375, |
|
"learning_rate": 2.0502865594379738e-06, |
|
"loss": 0.0144, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.5936402292475504, |
|
"grad_norm": 131154.421875, |
|
"learning_rate": 2.031798853762248e-06, |
|
"loss": 0.0141, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.5973377703826954, |
|
"grad_norm": 141421.515625, |
|
"learning_rate": 2.0133111480865224e-06, |
|
"loss": 0.0141, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.6010353115178406, |
|
"grad_norm": 95285.7890625, |
|
"learning_rate": 1.994823442410797e-06, |
|
"loss": 0.0149, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.6047328526529858, |
|
"grad_norm": 259090.546875, |
|
"learning_rate": 1.9763357367350714e-06, |
|
"loss": 0.0143, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.6084303937881308, |
|
"grad_norm": 129890.265625, |
|
"learning_rate": 1.9578480310593456e-06, |
|
"loss": 0.0124, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.612127934923276, |
|
"grad_norm": 199939.984375, |
|
"learning_rate": 1.9393603253836204e-06, |
|
"loss": 0.0124, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.6158254760584212, |
|
"grad_norm": 105627.1640625, |
|
"learning_rate": 1.9208726197078946e-06, |
|
"loss": 0.0146, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.6195230171935662, |
|
"grad_norm": 111494.3125, |
|
"learning_rate": 1.9023849140321687e-06, |
|
"loss": 0.0118, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.6232205583287114, |
|
"grad_norm": 162020.4375, |
|
"learning_rate": 1.883897208356443e-06, |
|
"loss": 0.0133, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.6269180994638566, |
|
"grad_norm": 151192.859375, |
|
"learning_rate": 1.8654095026807175e-06, |
|
"loss": 0.0146, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.6306156405990015, |
|
"grad_norm": 92355.328125, |
|
"learning_rate": 1.8469217970049918e-06, |
|
"loss": 0.0125, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.6343131817341467, |
|
"grad_norm": 86571.875, |
|
"learning_rate": 1.828434091329266e-06, |
|
"loss": 0.0128, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.638010722869292, |
|
"grad_norm": 155513.75, |
|
"learning_rate": 1.8099463856535404e-06, |
|
"loss": 0.0149, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.641708264004437, |
|
"grad_norm": 140762.703125, |
|
"learning_rate": 1.7914586799778149e-06, |
|
"loss": 0.0141, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.6454058051395821, |
|
"grad_norm": 129466.796875, |
|
"learning_rate": 1.7729709743020892e-06, |
|
"loss": 0.0122, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.6491033462747273, |
|
"grad_norm": 134758.546875, |
|
"learning_rate": 1.7544832686263635e-06, |
|
"loss": 0.0133, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.6528008874098723, |
|
"grad_norm": 81380.0546875, |
|
"learning_rate": 1.7359955629506382e-06, |
|
"loss": 0.0131, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.6564984285450177, |
|
"grad_norm": 102433.5234375, |
|
"learning_rate": 1.7175078572749125e-06, |
|
"loss": 0.0126, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.6601959696801627, |
|
"grad_norm": 95113.1640625, |
|
"learning_rate": 1.6990201515991865e-06, |
|
"loss": 0.0133, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.6638935108153077, |
|
"grad_norm": 194246.734375, |
|
"learning_rate": 1.6805324459234608e-06, |
|
"loss": 0.0116, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.667591051950453, |
|
"grad_norm": 128436.3125, |
|
"learning_rate": 1.6620447402477356e-06, |
|
"loss": 0.0139, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.671288593085598, |
|
"grad_norm": 93527.5546875, |
|
"learning_rate": 1.6435570345720098e-06, |
|
"loss": 0.0112, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.674986134220743, |
|
"grad_norm": 85030.171875, |
|
"learning_rate": 1.6250693288962841e-06, |
|
"loss": 0.0117, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.6786836753558885, |
|
"grad_norm": 193011.28125, |
|
"learning_rate": 1.6065816232205584e-06, |
|
"loss": 0.0122, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.6823812164910334, |
|
"grad_norm": 167224.15625, |
|
"learning_rate": 1.588093917544833e-06, |
|
"loss": 0.0137, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.6860787576261784, |
|
"grad_norm": 97394.6328125, |
|
"learning_rate": 1.5696062118691072e-06, |
|
"loss": 0.0114, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.6897762987613238, |
|
"grad_norm": 61188.08203125, |
|
"learning_rate": 1.5511185061933815e-06, |
|
"loss": 0.0123, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.6934738398964688, |
|
"grad_norm": 109640.2265625, |
|
"learning_rate": 1.532630800517656e-06, |
|
"loss": 0.0119, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.697171381031614, |
|
"grad_norm": 96847.4296875, |
|
"learning_rate": 1.5141430948419303e-06, |
|
"loss": 0.0109, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.7008689221667592, |
|
"grad_norm": 138873.15625, |
|
"learning_rate": 1.4956553891662046e-06, |
|
"loss": 0.0125, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.7045664633019042, |
|
"grad_norm": 92997.3203125, |
|
"learning_rate": 1.4771676834904789e-06, |
|
"loss": 0.0115, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.7082640044370494, |
|
"grad_norm": 108250.3359375, |
|
"learning_rate": 1.4586799778147534e-06, |
|
"loss": 0.0116, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.7119615455721946, |
|
"grad_norm": 96745.375, |
|
"learning_rate": 1.4401922721390277e-06, |
|
"loss": 0.0108, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.7156590867073396, |
|
"grad_norm": 71503.3515625, |
|
"learning_rate": 1.421704566463302e-06, |
|
"loss": 0.0117, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.7193566278424848, |
|
"grad_norm": 131563.34375, |
|
"learning_rate": 1.4032168607875762e-06, |
|
"loss": 0.0111, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.72305416897763, |
|
"grad_norm": 152288.3125, |
|
"learning_rate": 1.3847291551118507e-06, |
|
"loss": 0.0131, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.726751710112775, |
|
"grad_norm": 188523.875, |
|
"learning_rate": 1.366241449436125e-06, |
|
"loss": 0.0117, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.7304492512479202, |
|
"grad_norm": 135298.65625, |
|
"learning_rate": 1.3477537437603993e-06, |
|
"loss": 0.0098, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.7341467923830653, |
|
"grad_norm": 73664.140625, |
|
"learning_rate": 1.3292660380846738e-06, |
|
"loss": 0.0113, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.7378443335182103, |
|
"grad_norm": 157000.625, |
|
"learning_rate": 1.3107783324089481e-06, |
|
"loss": 0.0111, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.7415418746533555, |
|
"grad_norm": 114460.046875, |
|
"learning_rate": 1.2922906267332224e-06, |
|
"loss": 0.0101, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.7452394157885007, |
|
"grad_norm": 86580.3046875, |
|
"learning_rate": 1.2738029210574967e-06, |
|
"loss": 0.0109, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.7489369569236457, |
|
"grad_norm": 105799.8984375, |
|
"learning_rate": 1.2553152153817714e-06, |
|
"loss": 0.0113, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.752634498058791, |
|
"grad_norm": 141433.953125, |
|
"learning_rate": 1.2368275097060457e-06, |
|
"loss": 0.0104, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.756332039193936, |
|
"grad_norm": 232147.828125, |
|
"learning_rate": 1.2183398040303198e-06, |
|
"loss": 0.0105, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.760029580329081, |
|
"grad_norm": 91419.984375, |
|
"learning_rate": 1.1998520983545943e-06, |
|
"loss": 0.0099, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.7637271214642263, |
|
"grad_norm": 105158.8515625, |
|
"learning_rate": 1.1813643926788688e-06, |
|
"loss": 0.0086, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.7674246625993715, |
|
"grad_norm": 99868.90625, |
|
"learning_rate": 1.162876687003143e-06, |
|
"loss": 0.0103, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.7711222037345165, |
|
"grad_norm": 91508.3046875, |
|
"learning_rate": 1.1443889813274174e-06, |
|
"loss": 0.0097, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.7748197448696617, |
|
"grad_norm": 124763.6640625, |
|
"learning_rate": 1.1259012756516916e-06, |
|
"loss": 0.0101, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.7785172860048069, |
|
"grad_norm": 68321.0078125, |
|
"learning_rate": 1.1074135699759661e-06, |
|
"loss": 0.0106, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.7822148271399518, |
|
"grad_norm": 54323.26171875, |
|
"learning_rate": 1.0889258643002404e-06, |
|
"loss": 0.0104, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.785912368275097, |
|
"grad_norm": 170615.03125, |
|
"learning_rate": 1.0704381586245147e-06, |
|
"loss": 0.009, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.7896099094102422, |
|
"grad_norm": 54507.4921875, |
|
"learning_rate": 1.051950452948789e-06, |
|
"loss": 0.0113, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.7933074505453872, |
|
"grad_norm": 224390.078125, |
|
"learning_rate": 1.0334627472730635e-06, |
|
"loss": 0.0081, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.7970049916805324, |
|
"grad_norm": 114423.484375, |
|
"learning_rate": 1.0149750415973378e-06, |
|
"loss": 0.0092, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.8007025328156776, |
|
"grad_norm": 40952.4609375, |
|
"learning_rate": 9.96487335921612e-07, |
|
"loss": 0.008, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.8044000739508226, |
|
"grad_norm": 125467.2421875, |
|
"learning_rate": 9.779996302458866e-07, |
|
"loss": 0.012, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.8080976150859678, |
|
"grad_norm": 67106.8046875, |
|
"learning_rate": 9.595119245701609e-07, |
|
"loss": 0.0093, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.811795156221113, |
|
"grad_norm": 53429.3359375, |
|
"learning_rate": 9.410242188944353e-07, |
|
"loss": 0.0099, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.815492697356258, |
|
"grad_norm": 66503.3515625, |
|
"learning_rate": 9.225365132187096e-07, |
|
"loss": 0.0098, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.8191902384914034, |
|
"grad_norm": 86430.2734375, |
|
"learning_rate": 9.04048807542984e-07, |
|
"loss": 0.009, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.8228877796265484, |
|
"grad_norm": 165746.984375, |
|
"learning_rate": 8.855611018672583e-07, |
|
"loss": 0.0091, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.8265853207616933, |
|
"grad_norm": 105324.359375, |
|
"learning_rate": 8.670733961915328e-07, |
|
"loss": 0.0091, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.8302828618968388, |
|
"grad_norm": 89842.6015625, |
|
"learning_rate": 8.485856905158069e-07, |
|
"loss": 0.0092, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.8339804030319837, |
|
"grad_norm": 206000.046875, |
|
"learning_rate": 8.300979848400814e-07, |
|
"loss": 0.0098, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.8376779441671287, |
|
"grad_norm": 79479.296875, |
|
"learning_rate": 8.116102791643558e-07, |
|
"loss": 0.0081, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.8413754853022741, |
|
"grad_norm": 42255.9140625, |
|
"learning_rate": 7.931225734886301e-07, |
|
"loss": 0.0092, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.845073026437419, |
|
"grad_norm": 48692.4453125, |
|
"learning_rate": 7.746348678129045e-07, |
|
"loss": 0.0096, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.848770567572564, |
|
"grad_norm": 60314.51171875, |
|
"learning_rate": 7.561471621371788e-07, |
|
"loss": 0.0087, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.8524681087077095, |
|
"grad_norm": 67695.015625, |
|
"learning_rate": 7.376594564614532e-07, |
|
"loss": 0.0095, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.8561656498428545, |
|
"grad_norm": 57189.34765625, |
|
"learning_rate": 7.191717507857275e-07, |
|
"loss": 0.008, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.8598631909779997, |
|
"grad_norm": 47331.48046875, |
|
"learning_rate": 7.006840451100019e-07, |
|
"loss": 0.0087, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.8635607321131449, |
|
"grad_norm": 78001.078125, |
|
"learning_rate": 6.821963394342762e-07, |
|
"loss": 0.0086, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.8672582732482899, |
|
"grad_norm": 134432.046875, |
|
"learning_rate": 6.637086337585506e-07, |
|
"loss": 0.0101, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.870955814383435, |
|
"grad_norm": 70737.28125, |
|
"learning_rate": 6.452209280828249e-07, |
|
"loss": 0.0091, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.8746533555185803, |
|
"grad_norm": 100655.2578125, |
|
"learning_rate": 6.267332224070994e-07, |
|
"loss": 0.0072, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.8783508966537252, |
|
"grad_norm": 86067.2890625, |
|
"learning_rate": 6.082455167313737e-07, |
|
"loss": 0.0081, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.8820484377888704, |
|
"grad_norm": 113390.1640625, |
|
"learning_rate": 5.897578110556481e-07, |
|
"loss": 0.0096, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.8857459789240156, |
|
"grad_norm": 148825.921875, |
|
"learning_rate": 5.712701053799224e-07, |
|
"loss": 0.0102, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.8894435200591606, |
|
"grad_norm": 81128.765625, |
|
"learning_rate": 5.527823997041967e-07, |
|
"loss": 0.0089, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.8931410611943058, |
|
"grad_norm": 91735.953125, |
|
"learning_rate": 5.342946940284711e-07, |
|
"loss": 0.0081, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.896838602329451, |
|
"grad_norm": 171614.328125, |
|
"learning_rate": 5.158069883527455e-07, |
|
"loss": 0.0078, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.900536143464596, |
|
"grad_norm": 136330.6875, |
|
"learning_rate": 4.973192826770198e-07, |
|
"loss": 0.0075, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.9042336845997412, |
|
"grad_norm": 72486.4453125, |
|
"learning_rate": 4.788315770012942e-07, |
|
"loss": 0.0096, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.9079312257348864, |
|
"grad_norm": 119471.4453125, |
|
"learning_rate": 4.6034387132556857e-07, |
|
"loss": 0.0079, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.9116287668700314, |
|
"grad_norm": 83491.46875, |
|
"learning_rate": 4.418561656498429e-07, |
|
"loss": 0.0087, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.9153263080051766, |
|
"grad_norm": 161448.078125, |
|
"learning_rate": 4.2336845997411725e-07, |
|
"loss": 0.0088, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.9190238491403218, |
|
"grad_norm": 190009.40625, |
|
"learning_rate": 4.048807542983916e-07, |
|
"loss": 0.009, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.9227213902754667, |
|
"grad_norm": 105300.2421875, |
|
"learning_rate": 3.8639304862266594e-07, |
|
"loss": 0.0087, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.926418931410612, |
|
"grad_norm": 59896.84375, |
|
"learning_rate": 3.679053429469403e-07, |
|
"loss": 0.0082, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.9301164725457571, |
|
"grad_norm": 86433.484375, |
|
"learning_rate": 3.494176372712146e-07, |
|
"loss": 0.008, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.933814013680902, |
|
"grad_norm": 82433.390625, |
|
"learning_rate": 3.3092993159548907e-07, |
|
"loss": 0.0073, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.9375115548160473, |
|
"grad_norm": 158433.75, |
|
"learning_rate": 3.1244222591976336e-07, |
|
"loss": 0.0087, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.9412090959511925, |
|
"grad_norm": 74148.828125, |
|
"learning_rate": 2.9395452024403776e-07, |
|
"loss": 0.0078, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.9449066370863375, |
|
"grad_norm": 122634.9140625, |
|
"learning_rate": 2.754668145683121e-07, |
|
"loss": 0.0088, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.9486041782214827, |
|
"grad_norm": 81306.5703125, |
|
"learning_rate": 2.5697910889258644e-07, |
|
"loss": 0.0082, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.9523017193566279, |
|
"grad_norm": 62461.99609375, |
|
"learning_rate": 2.384914032168608e-07, |
|
"loss": 0.0074, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.9559992604917729, |
|
"grad_norm": 293536.875, |
|
"learning_rate": 2.2000369754113515e-07, |
|
"loss": 0.0076, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.959696801626918, |
|
"grad_norm": 98041.9765625, |
|
"learning_rate": 2.015159918654095e-07, |
|
"loss": 0.009, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.9633943427620633, |
|
"grad_norm": 102565.984375, |
|
"learning_rate": 1.830282861896839e-07, |
|
"loss": 0.0083, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.9670918838972082, |
|
"grad_norm": 53710.390625, |
|
"learning_rate": 1.6454058051395823e-07, |
|
"loss": 0.007, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.9707894250323534, |
|
"grad_norm": 150167.515625, |
|
"learning_rate": 1.4605287483823258e-07, |
|
"loss": 0.0072, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.9744869661674986, |
|
"grad_norm": 134856.953125, |
|
"learning_rate": 1.2756516916250695e-07, |
|
"loss": 0.0095, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.9781845073026436, |
|
"grad_norm": 86079.8125, |
|
"learning_rate": 1.090774634867813e-07, |
|
"loss": 0.0075, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.981882048437789, |
|
"grad_norm": 142126.03125, |
|
"learning_rate": 9.058975781105564e-08, |
|
"loss": 0.0072, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.985579589572934, |
|
"grad_norm": 62874.44140625, |
|
"learning_rate": 7.210205213533001e-08, |
|
"loss": 0.0071, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.989277130708079, |
|
"grad_norm": 87455.859375, |
|
"learning_rate": 5.361434645960437e-08, |
|
"loss": 0.0082, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.9929746718432244, |
|
"grad_norm": 109955.4140625, |
|
"learning_rate": 3.5126640783878725e-08, |
|
"loss": 0.0082, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.9966722129783694, |
|
"grad_norm": 66628.1484375, |
|
"learning_rate": 1.663893510815308e-08, |
|
"loss": 0.0076, |
|
"step": 10800 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 10818, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.922032294823199e+17, |
|
"train_batch_size": 200, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|