|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994141769185706, |
|
"eval_steps": 500, |
|
"global_step": 853, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0023432923257176333, |
|
"grad_norm": 0.00011052378977183253, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0046865846514352666, |
|
"grad_norm": 0.00020697808940894902, |
|
"learning_rate": 4.9941245593419514e-06, |
|
"loss": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.007029876977152899, |
|
"grad_norm": 0.0012532881228253245, |
|
"learning_rate": 4.982373678025853e-06, |
|
"loss": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009373169302870533, |
|
"grad_norm": 0.0008086035377345979, |
|
"learning_rate": 4.970622796709754e-06, |
|
"loss": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.011716461628588167, |
|
"grad_norm": 0.0021155672147870064, |
|
"learning_rate": 4.958871915393655e-06, |
|
"loss": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014059753954305799, |
|
"grad_norm": 0.0012233309680595994, |
|
"learning_rate": 4.947121034077556e-06, |
|
"loss": 0.0, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.016403046280023433, |
|
"grad_norm": 0.0027737286873161793, |
|
"learning_rate": 4.9353701527614576e-06, |
|
"loss": 0.0, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.018746338605741066, |
|
"grad_norm": 0.0042906939052045345, |
|
"learning_rate": 4.923619271445359e-06, |
|
"loss": 0.0, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0210896309314587, |
|
"grad_norm": 0.0005172386299818754, |
|
"learning_rate": 4.91186839012926e-06, |
|
"loss": 0.0, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.023432923257176334, |
|
"grad_norm": 0.002410772955045104, |
|
"learning_rate": 4.900117508813161e-06, |
|
"loss": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.025776215582893967, |
|
"grad_norm": 0.6443753242492676, |
|
"learning_rate": 4.8883666274970625e-06, |
|
"loss": 0.0027, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.028119507908611598, |
|
"grad_norm": 0.004394118674099445, |
|
"learning_rate": 4.876615746180964e-06, |
|
"loss": 0.0001, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03046280023432923, |
|
"grad_norm": 0.006466630846261978, |
|
"learning_rate": 4.864864864864866e-06, |
|
"loss": 0.0001, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.032806092560046865, |
|
"grad_norm": 0.011924203485250473, |
|
"learning_rate": 4.853113983548767e-06, |
|
"loss": 0.0001, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0351493848857645, |
|
"grad_norm": 0.23746930062770844, |
|
"learning_rate": 4.841363102232668e-06, |
|
"loss": 0.0001, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03749267721148213, |
|
"grad_norm": 0.0031001348979771137, |
|
"learning_rate": 4.8296122209165694e-06, |
|
"loss": 0.0, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03983596953719976, |
|
"grad_norm": 0.0029028633143752813, |
|
"learning_rate": 4.817861339600471e-06, |
|
"loss": 0.0, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0421792618629174, |
|
"grad_norm": 0.014626468531787395, |
|
"learning_rate": 4.806110458284372e-06, |
|
"loss": 0.0001, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04452255418863503, |
|
"grad_norm": 0.001155451056547463, |
|
"learning_rate": 4.794359576968273e-06, |
|
"loss": 0.0, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04686584651435267, |
|
"grad_norm": 0.003476829966530204, |
|
"learning_rate": 4.782608695652174e-06, |
|
"loss": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0492091388400703, |
|
"grad_norm": 0.0002227002551080659, |
|
"learning_rate": 4.7708578143360756e-06, |
|
"loss": 0.0, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.051552431165787935, |
|
"grad_norm": 0.0001427282695658505, |
|
"learning_rate": 4.759106933019977e-06, |
|
"loss": 0.0, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.053895723491505565, |
|
"grad_norm": 0.0027408564928919077, |
|
"learning_rate": 4.747356051703878e-06, |
|
"loss": 0.0002, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.056239015817223195, |
|
"grad_norm": 0.0020253027323633432, |
|
"learning_rate": 4.735605170387779e-06, |
|
"loss": 0.0, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05858230814294083, |
|
"grad_norm": 0.001760220737196505, |
|
"learning_rate": 4.723854289071681e-06, |
|
"loss": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06092560046865846, |
|
"grad_norm": 0.0010492791188880801, |
|
"learning_rate": 4.7121034077555825e-06, |
|
"loss": 0.0, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0632688927943761, |
|
"grad_norm": 0.002001305343583226, |
|
"learning_rate": 4.700352526439484e-06, |
|
"loss": 0.0, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06561218512009373, |
|
"grad_norm": 0.18566887080669403, |
|
"learning_rate": 4.688601645123384e-06, |
|
"loss": 0.0009, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06795547744581136, |
|
"grad_norm": 0.0009072807151824236, |
|
"learning_rate": 4.676850763807285e-06, |
|
"loss": 0.0, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.070298769771529, |
|
"grad_norm": 0.003983665257692337, |
|
"learning_rate": 4.665099882491187e-06, |
|
"loss": 0.0006, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07264206209724663, |
|
"grad_norm": 0.01946200616657734, |
|
"learning_rate": 4.653349001175089e-06, |
|
"loss": 0.0001, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07498535442296426, |
|
"grad_norm": 0.004048655740916729, |
|
"learning_rate": 4.64159811985899e-06, |
|
"loss": 0.0, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0773286467486819, |
|
"grad_norm": 0.0005872617475688457, |
|
"learning_rate": 4.629847238542891e-06, |
|
"loss": 0.0001, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07967193907439953, |
|
"grad_norm": 0.008831903338432312, |
|
"learning_rate": 4.618096357226792e-06, |
|
"loss": 0.0001, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08201523140011717, |
|
"grad_norm": 0.006819219794124365, |
|
"learning_rate": 4.6063454759106936e-06, |
|
"loss": 0.0001, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0843585237258348, |
|
"grad_norm": 0.0007863900391384959, |
|
"learning_rate": 4.594594594594596e-06, |
|
"loss": 0.0, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08670181605155243, |
|
"grad_norm": 0.032210394740104675, |
|
"learning_rate": 4.582843713278496e-06, |
|
"loss": 0.0001, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08904510837727006, |
|
"grad_norm": 0.2614983916282654, |
|
"learning_rate": 4.571092831962397e-06, |
|
"loss": 0.0008, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0913884007029877, |
|
"grad_norm": 0.0012551415711641312, |
|
"learning_rate": 4.5593419506462985e-06, |
|
"loss": 0.0, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09373169302870533, |
|
"grad_norm": 0.0019108065171167254, |
|
"learning_rate": 4.5475910693302e-06, |
|
"loss": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09607498535442296, |
|
"grad_norm": 0.02294810675084591, |
|
"learning_rate": 4.535840188014101e-06, |
|
"loss": 0.0001, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0984182776801406, |
|
"grad_norm": 0.0012388118775561452, |
|
"learning_rate": 4.524089306698003e-06, |
|
"loss": 0.0, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10076157000585823, |
|
"grad_norm": 0.001227575121447444, |
|
"learning_rate": 4.512338425381904e-06, |
|
"loss": 0.0001, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10310486233157587, |
|
"grad_norm": 0.004755712114274502, |
|
"learning_rate": 4.5005875440658054e-06, |
|
"loss": 0.0001, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1054481546572935, |
|
"grad_norm": 0.00837083999067545, |
|
"learning_rate": 4.488836662749707e-06, |
|
"loss": 0.0001, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10779144698301113, |
|
"grad_norm": 0.48219314217567444, |
|
"learning_rate": 4.477085781433608e-06, |
|
"loss": 0.0017, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11013473930872876, |
|
"grad_norm": 0.022060217335820198, |
|
"learning_rate": 4.465334900117509e-06, |
|
"loss": 0.0001, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11247803163444639, |
|
"grad_norm": 0.0019385352497920394, |
|
"learning_rate": 4.45358401880141e-06, |
|
"loss": 0.0, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11482132396016403, |
|
"grad_norm": 0.01225442998111248, |
|
"learning_rate": 4.4418331374853116e-06, |
|
"loss": 0.0001, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11716461628588166, |
|
"grad_norm": 0.0005759520572610199, |
|
"learning_rate": 4.430082256169213e-06, |
|
"loss": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1195079086115993, |
|
"grad_norm": 0.02452813647687435, |
|
"learning_rate": 4.418331374853114e-06, |
|
"loss": 0.0001, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12185120093731693, |
|
"grad_norm": 0.0078084710985422134, |
|
"learning_rate": 4.406580493537015e-06, |
|
"loss": 0.0001, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.12419449326303457, |
|
"grad_norm": 0.004263446666300297, |
|
"learning_rate": 4.394829612220917e-06, |
|
"loss": 0.0001, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.1265377855887522, |
|
"grad_norm": 0.0016304058954119682, |
|
"learning_rate": 4.3830787309048185e-06, |
|
"loss": 0.0001, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12888107791446984, |
|
"grad_norm": 0.011672005988657475, |
|
"learning_rate": 4.37132784958872e-06, |
|
"loss": 0.0002, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13122437024018746, |
|
"grad_norm": 0.002603155327960849, |
|
"learning_rate": 4.359576968272621e-06, |
|
"loss": 0.0, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1335676625659051, |
|
"grad_norm": 0.005059251096099615, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 0.0001, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.13591095489162272, |
|
"grad_norm": 0.0005816388293169439, |
|
"learning_rate": 4.3360752056404234e-06, |
|
"loss": 0.0001, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13825424721734036, |
|
"grad_norm": 0.019756818190217018, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 0.0001, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.140597539543058, |
|
"grad_norm": 0.0023519208189100027, |
|
"learning_rate": 4.312573443008226e-06, |
|
"loss": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14294083186877563, |
|
"grad_norm": 0.0028086318634450436, |
|
"learning_rate": 4.300822561692127e-06, |
|
"loss": 0.0, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.14528412419449327, |
|
"grad_norm": 0.0022307527251541615, |
|
"learning_rate": 4.289071680376028e-06, |
|
"loss": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14762741652021089, |
|
"grad_norm": 0.014247684739530087, |
|
"learning_rate": 4.2773207990599296e-06, |
|
"loss": 0.0001, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14997070884592853, |
|
"grad_norm": 0.00011139630805701017, |
|
"learning_rate": 4.265569917743831e-06, |
|
"loss": 0.0, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.15231400117164617, |
|
"grad_norm": 0.000514341751113534, |
|
"learning_rate": 4.253819036427733e-06, |
|
"loss": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1546572934973638, |
|
"grad_norm": 0.002176255453377962, |
|
"learning_rate": 4.242068155111634e-06, |
|
"loss": 0.0001, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15700058582308143, |
|
"grad_norm": 0.018497969955205917, |
|
"learning_rate": 4.230317273795535e-06, |
|
"loss": 0.0001, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.15934387814879905, |
|
"grad_norm": 0.013157431036233902, |
|
"learning_rate": 4.2185663924794365e-06, |
|
"loss": 0.0001, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.1616871704745167, |
|
"grad_norm": 0.007630129344761372, |
|
"learning_rate": 4.206815511163338e-06, |
|
"loss": 0.0, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16403046280023434, |
|
"grad_norm": 0.0008055138750933111, |
|
"learning_rate": 4.195064629847239e-06, |
|
"loss": 0.0001, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16637375512595196, |
|
"grad_norm": 0.006306421477347612, |
|
"learning_rate": 4.18331374853114e-06, |
|
"loss": 0.0, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1687170474516696, |
|
"grad_norm": 0.020266445353627205, |
|
"learning_rate": 4.1715628672150414e-06, |
|
"loss": 0.0001, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.17106033977738722, |
|
"grad_norm": 0.00037427974166348577, |
|
"learning_rate": 4.159811985898943e-06, |
|
"loss": 0.0, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17340363210310486, |
|
"grad_norm": 0.004259356763213873, |
|
"learning_rate": 4.148061104582844e-06, |
|
"loss": 0.0001, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1757469244288225, |
|
"grad_norm": 0.0010232679778710008, |
|
"learning_rate": 4.136310223266745e-06, |
|
"loss": 0.0001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17809021675454012, |
|
"grad_norm": 0.003952402155846357, |
|
"learning_rate": 4.124559341950647e-06, |
|
"loss": 0.0, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.18043350908025776, |
|
"grad_norm": 0.0013295585522428155, |
|
"learning_rate": 4.112808460634548e-06, |
|
"loss": 0.0, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1827768014059754, |
|
"grad_norm": 0.013831949792802334, |
|
"learning_rate": 4.10105757931845e-06, |
|
"loss": 0.0001, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.18512009373169303, |
|
"grad_norm": 0.0036904062144458294, |
|
"learning_rate": 4.089306698002351e-06, |
|
"loss": 0.0, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.18746338605741067, |
|
"grad_norm": 0.002993196714669466, |
|
"learning_rate": 4.077555816686252e-06, |
|
"loss": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18980667838312829, |
|
"grad_norm": 0.0016740068094804883, |
|
"learning_rate": 4.0658049353701525e-06, |
|
"loss": 0.0001, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.19214997070884593, |
|
"grad_norm": 0.012307717464864254, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 0.0001, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.19449326303456357, |
|
"grad_norm": 0.0012654109159484506, |
|
"learning_rate": 4.042303172737956e-06, |
|
"loss": 0.0, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1968365553602812, |
|
"grad_norm": 0.12437883019447327, |
|
"learning_rate": 4.030552291421857e-06, |
|
"loss": 0.0006, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.19917984768599883, |
|
"grad_norm": 8.974138472694904e-05, |
|
"learning_rate": 4.018801410105758e-06, |
|
"loss": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20152314001171645, |
|
"grad_norm": 0.0011903212871402502, |
|
"learning_rate": 4.007050528789659e-06, |
|
"loss": 0.0001, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2038664323374341, |
|
"grad_norm": 0.012350277975201607, |
|
"learning_rate": 3.995299647473561e-06, |
|
"loss": 0.0001, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.20620972466315174, |
|
"grad_norm": 0.01664598099887371, |
|
"learning_rate": 3.983548766157463e-06, |
|
"loss": 0.0001, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.20855301698886936, |
|
"grad_norm": 0.0064240009523928165, |
|
"learning_rate": 3.971797884841364e-06, |
|
"loss": 0.0001, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.210896309314587, |
|
"grad_norm": 0.0031362581066787243, |
|
"learning_rate": 3.960047003525264e-06, |
|
"loss": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21323960164030462, |
|
"grad_norm": 0.00012566300574690104, |
|
"learning_rate": 3.9482961222091655e-06, |
|
"loss": 0.0001, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.21558289396602226, |
|
"grad_norm": 0.0018261070363223553, |
|
"learning_rate": 3.936545240893067e-06, |
|
"loss": 0.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2179261862917399, |
|
"grad_norm": 0.0010897299507632852, |
|
"learning_rate": 3.924794359576969e-06, |
|
"loss": 0.0, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.22026947861745752, |
|
"grad_norm": 0.006528445053845644, |
|
"learning_rate": 3.91304347826087e-06, |
|
"loss": 0.0, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.22261277094317516, |
|
"grad_norm": 0.4626096785068512, |
|
"learning_rate": 3.901292596944771e-06, |
|
"loss": 0.0009, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22495606326889278, |
|
"grad_norm": 0.002359338803216815, |
|
"learning_rate": 3.8895417156286725e-06, |
|
"loss": 0.0, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.22729935559461042, |
|
"grad_norm": 0.004821418318897486, |
|
"learning_rate": 3.877790834312574e-06, |
|
"loss": 0.0, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.22964264792032807, |
|
"grad_norm": 0.0011465001152828336, |
|
"learning_rate": 3.866039952996475e-06, |
|
"loss": 0.0008, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.23198594024604569, |
|
"grad_norm": 0.0007381247123703361, |
|
"learning_rate": 3.854289071680376e-06, |
|
"loss": 0.0001, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.23432923257176333, |
|
"grad_norm": 0.0023091183975338936, |
|
"learning_rate": 3.842538190364277e-06, |
|
"loss": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23667252489748097, |
|
"grad_norm": 0.0005714365397579968, |
|
"learning_rate": 3.830787309048179e-06, |
|
"loss": 0.0, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.2390158172231986, |
|
"grad_norm": 0.00351692084223032, |
|
"learning_rate": 3.81903642773208e-06, |
|
"loss": 0.0, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.24135910954891623, |
|
"grad_norm": 5.926425728830509e-05, |
|
"learning_rate": 3.8072855464159815e-06, |
|
"loss": 0.0, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.24370240187463385, |
|
"grad_norm": 0.0016421001637354493, |
|
"learning_rate": 3.7955346650998827e-06, |
|
"loss": 0.0, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.2460456942003515, |
|
"grad_norm": 0.012118808925151825, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 0.0001, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.24838898652606914, |
|
"grad_norm": 0.00024874648079276085, |
|
"learning_rate": 3.7720329024676856e-06, |
|
"loss": 0.0002, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.2507322788517868, |
|
"grad_norm": 0.0017625248292461038, |
|
"learning_rate": 3.760282021151587e-06, |
|
"loss": 0.0, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2530755711775044, |
|
"grad_norm": 0.0007431196281686425, |
|
"learning_rate": 3.748531139835488e-06, |
|
"loss": 0.0, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.255418863503222, |
|
"grad_norm": 0.0007026457460597157, |
|
"learning_rate": 3.7367802585193893e-06, |
|
"loss": 0.0, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.2577621558289397, |
|
"grad_norm": 0.002397920237854123, |
|
"learning_rate": 3.72502937720329e-06, |
|
"loss": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2601054481546573, |
|
"grad_norm": 0.003177257487550378, |
|
"learning_rate": 3.713278495887192e-06, |
|
"loss": 0.0, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.2624487404803749, |
|
"grad_norm": 0.003142025787383318, |
|
"learning_rate": 3.7015276145710934e-06, |
|
"loss": 0.0001, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.26479203280609254, |
|
"grad_norm": 0.03788410872220993, |
|
"learning_rate": 3.6897767332549946e-06, |
|
"loss": 0.0002, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.2671353251318102, |
|
"grad_norm": 0.005685464479029179, |
|
"learning_rate": 3.6780258519388954e-06, |
|
"loss": 0.0003, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.2694786174575278, |
|
"grad_norm": 0.0010328789940103889, |
|
"learning_rate": 3.6662749706227966e-06, |
|
"loss": 0.0003, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.27182190978324544, |
|
"grad_norm": 0.0052024442702531815, |
|
"learning_rate": 3.6545240893066987e-06, |
|
"loss": 0.0, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2741652021089631, |
|
"grad_norm": 0.006033598445355892, |
|
"learning_rate": 3.6427732079906e-06, |
|
"loss": 0.0, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.27650849443468073, |
|
"grad_norm": 0.00023948443413246423, |
|
"learning_rate": 3.6310223266745007e-06, |
|
"loss": 0.0001, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.27885178676039835, |
|
"grad_norm": 0.00016467843670397997, |
|
"learning_rate": 3.619271445358402e-06, |
|
"loss": 0.0, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.281195079086116, |
|
"grad_norm": 0.003566320287063718, |
|
"learning_rate": 3.607520564042303e-06, |
|
"loss": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.28353837141183363, |
|
"grad_norm": 0.00033969045034609735, |
|
"learning_rate": 3.5957696827262044e-06, |
|
"loss": 0.0, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.28588166373755125, |
|
"grad_norm": 0.0033994223922491074, |
|
"learning_rate": 3.5840188014101065e-06, |
|
"loss": 0.0, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.28822495606326887, |
|
"grad_norm": 0.14746786653995514, |
|
"learning_rate": 3.5722679200940073e-06, |
|
"loss": 0.0008, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.29056824838898654, |
|
"grad_norm": 0.012470235116779804, |
|
"learning_rate": 3.5605170387779085e-06, |
|
"loss": 0.0, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.29291154071470415, |
|
"grad_norm": 0.08307931572198868, |
|
"learning_rate": 3.5487661574618097e-06, |
|
"loss": 0.0003, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.29525483304042177, |
|
"grad_norm": 0.00033245363738387823, |
|
"learning_rate": 3.537015276145711e-06, |
|
"loss": 0.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.29759812536613944, |
|
"grad_norm": 0.0018247144762426615, |
|
"learning_rate": 3.525264394829612e-06, |
|
"loss": 0.0, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.29994141769185706, |
|
"grad_norm": 0.0011103990254923701, |
|
"learning_rate": 3.513513513513514e-06, |
|
"loss": 0.0001, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.3022847100175747, |
|
"grad_norm": 0.0010811882093548775, |
|
"learning_rate": 3.501762632197415e-06, |
|
"loss": 0.0, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.30462800234329235, |
|
"grad_norm": 0.011172047816216946, |
|
"learning_rate": 3.4900117508813163e-06, |
|
"loss": 0.0001, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.30697129466900996, |
|
"grad_norm": 0.0013676233356818557, |
|
"learning_rate": 3.4782608695652175e-06, |
|
"loss": 0.0, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3093145869947276, |
|
"grad_norm": 0.002147970488294959, |
|
"learning_rate": 3.4665099882491187e-06, |
|
"loss": 0.0, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.31165787932044525, |
|
"grad_norm": 0.0009826518362388015, |
|
"learning_rate": 3.4547591069330204e-06, |
|
"loss": 0.0, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.31400117164616287, |
|
"grad_norm": 0.001499099307693541, |
|
"learning_rate": 3.4430082256169216e-06, |
|
"loss": 0.0, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.3163444639718805, |
|
"grad_norm": 0.001323301112279296, |
|
"learning_rate": 3.431257344300823e-06, |
|
"loss": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3186877562975981, |
|
"grad_norm": 0.018010340631008148, |
|
"learning_rate": 3.419506462984724e-06, |
|
"loss": 0.0005, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3210310486233158, |
|
"grad_norm": 0.0024064648896455765, |
|
"learning_rate": 3.4077555816686253e-06, |
|
"loss": 0.0, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3233743409490334, |
|
"grad_norm": 0.02396260015666485, |
|
"learning_rate": 3.3960047003525265e-06, |
|
"loss": 0.0001, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.325717633274751, |
|
"grad_norm": 0.002070352202281356, |
|
"learning_rate": 3.384253819036428e-06, |
|
"loss": 0.0, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.3280609256004687, |
|
"grad_norm": 0.0003108434902969748, |
|
"learning_rate": 3.3725029377203294e-06, |
|
"loss": 0.0001, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3304042179261863, |
|
"grad_norm": 0.006573045626282692, |
|
"learning_rate": 3.3607520564042306e-06, |
|
"loss": 0.0001, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.3327475102519039, |
|
"grad_norm": 0.0004413512069731951, |
|
"learning_rate": 3.349001175088132e-06, |
|
"loss": 0.0001, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3350908025776216, |
|
"grad_norm": 0.0005645502242259681, |
|
"learning_rate": 3.337250293772033e-06, |
|
"loss": 0.0, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3374340949033392, |
|
"grad_norm": 0.00034579774364829063, |
|
"learning_rate": 3.3254994124559343e-06, |
|
"loss": 0.0, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3397773872290568, |
|
"grad_norm": 0.003136229468509555, |
|
"learning_rate": 3.313748531139836e-06, |
|
"loss": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.34212067955477443, |
|
"grad_norm": 0.0031148705165833235, |
|
"learning_rate": 3.301997649823737e-06, |
|
"loss": 0.0, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.3444639718804921, |
|
"grad_norm": 0.0012612566351890564, |
|
"learning_rate": 3.2902467685076384e-06, |
|
"loss": 0.0, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.3468072642062097, |
|
"grad_norm": 0.0007469533011317253, |
|
"learning_rate": 3.2784958871915396e-06, |
|
"loss": 0.0, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.34915055653192734, |
|
"grad_norm": 0.04412250965833664, |
|
"learning_rate": 3.266745005875441e-06, |
|
"loss": 0.0003, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.351493848857645, |
|
"grad_norm": 0.004462533164769411, |
|
"learning_rate": 3.2549941245593425e-06, |
|
"loss": 0.0088, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3538371411833626, |
|
"grad_norm": 0.002911294111981988, |
|
"learning_rate": 3.2432432432432437e-06, |
|
"loss": 0.0006, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.35618043350908024, |
|
"grad_norm": 0.0015191801358014345, |
|
"learning_rate": 3.231492361927145e-06, |
|
"loss": 0.0, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.3585237258347979, |
|
"grad_norm": 0.017380721867084503, |
|
"learning_rate": 3.219741480611046e-06, |
|
"loss": 0.0094, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.36086701816051553, |
|
"grad_norm": 0.002749436302110553, |
|
"learning_rate": 3.2079905992949474e-06, |
|
"loss": 0.0001, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.36321031048623315, |
|
"grad_norm": 0.0008673086995258927, |
|
"learning_rate": 3.1962397179788486e-06, |
|
"loss": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.3655536028119508, |
|
"grad_norm": 0.00361701101064682, |
|
"learning_rate": 3.1844888366627503e-06, |
|
"loss": 0.0, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.36789689513766843, |
|
"grad_norm": 0.006906528025865555, |
|
"learning_rate": 3.1727379553466515e-06, |
|
"loss": 0.0, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.37024018746338605, |
|
"grad_norm": 2.259305238723755, |
|
"learning_rate": 3.1609870740305527e-06, |
|
"loss": 0.0157, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.37258347978910367, |
|
"grad_norm": 0.00017454673070460558, |
|
"learning_rate": 3.149236192714454e-06, |
|
"loss": 0.0, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.37492677211482134, |
|
"grad_norm": 0.16197967529296875, |
|
"learning_rate": 3.137485311398355e-06, |
|
"loss": 0.0009, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.37727006444053895, |
|
"grad_norm": 0.002247605938464403, |
|
"learning_rate": 3.1257344300822564e-06, |
|
"loss": 0.0, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.37961335676625657, |
|
"grad_norm": 0.023727795109152794, |
|
"learning_rate": 3.113983548766158e-06, |
|
"loss": 0.0001, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.38195664909197424, |
|
"grad_norm": 0.008455273695290089, |
|
"learning_rate": 3.1022326674500592e-06, |
|
"loss": 0.0001, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.38429994141769186, |
|
"grad_norm": 0.00022873218404129148, |
|
"learning_rate": 3.0904817861339605e-06, |
|
"loss": 0.0, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.3866432337434095, |
|
"grad_norm": 3.000872850418091, |
|
"learning_rate": 3.0787309048178617e-06, |
|
"loss": 0.055, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.38898652606912715, |
|
"grad_norm": 0.002177221467718482, |
|
"learning_rate": 3.066980023501763e-06, |
|
"loss": 0.0, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.39132981839484476, |
|
"grad_norm": 0.002786975121125579, |
|
"learning_rate": 3.0552291421856637e-06, |
|
"loss": 0.0, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.3936731107205624, |
|
"grad_norm": 0.004335256293416023, |
|
"learning_rate": 3.043478260869566e-06, |
|
"loss": 0.0, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.39601640304628, |
|
"grad_norm": 0.007627409417182207, |
|
"learning_rate": 3.031727379553467e-06, |
|
"loss": 0.0001, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.39835969537199767, |
|
"grad_norm": 0.002631911775097251, |
|
"learning_rate": 3.0199764982373682e-06, |
|
"loss": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.4007029876977153, |
|
"grad_norm": 0.009561799466609955, |
|
"learning_rate": 3.008225616921269e-06, |
|
"loss": 0.0001, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.4030462800234329, |
|
"grad_norm": 0.0026635443791747093, |
|
"learning_rate": 2.9964747356051703e-06, |
|
"loss": 0.0001, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.4053895723491506, |
|
"grad_norm": 0.0001533351169200614, |
|
"learning_rate": 2.9847238542890723e-06, |
|
"loss": 0.0, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.4077328646748682, |
|
"grad_norm": 0.0835270956158638, |
|
"learning_rate": 2.9729729729729736e-06, |
|
"loss": 0.0005, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.4100761570005858, |
|
"grad_norm": 0.003761101048439741, |
|
"learning_rate": 2.9612220916568744e-06, |
|
"loss": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4124194493263035, |
|
"grad_norm": 0.01136633288115263, |
|
"learning_rate": 2.9494712103407756e-06, |
|
"loss": 0.0002, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.4147627416520211, |
|
"grad_norm": 0.007711971178650856, |
|
"learning_rate": 2.937720329024677e-06, |
|
"loss": 0.0001, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.4171060339777387, |
|
"grad_norm": 0.0003854953683912754, |
|
"learning_rate": 2.925969447708578e-06, |
|
"loss": 0.0, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.4194493263034564, |
|
"grad_norm": 0.019140860065817833, |
|
"learning_rate": 2.91421856639248e-06, |
|
"loss": 0.0001, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.421792618629174, |
|
"grad_norm": 0.0013410028768703341, |
|
"learning_rate": 2.902467685076381e-06, |
|
"loss": 0.0003, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4241359109548916, |
|
"grad_norm": 0.0011243935441598296, |
|
"learning_rate": 2.890716803760282e-06, |
|
"loss": 0.0001, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.42647920328060923, |
|
"grad_norm": 0.012134709395468235, |
|
"learning_rate": 2.8789659224441834e-06, |
|
"loss": 0.0001, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4288224956063269, |
|
"grad_norm": 0.0028234529308974743, |
|
"learning_rate": 2.8672150411280846e-06, |
|
"loss": 0.0, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.4311657879320445, |
|
"grad_norm": 0.004319467581808567, |
|
"learning_rate": 2.855464159811986e-06, |
|
"loss": 0.0, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.43350908025776214, |
|
"grad_norm": 0.0068093533627688885, |
|
"learning_rate": 2.8437132784958875e-06, |
|
"loss": 0.0001, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4358523725834798, |
|
"grad_norm": 0.016774361953139305, |
|
"learning_rate": 2.8319623971797887e-06, |
|
"loss": 0.0001, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.4381956649091974, |
|
"grad_norm": 0.014978869818150997, |
|
"learning_rate": 2.82021151586369e-06, |
|
"loss": 0.0001, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.44053895723491504, |
|
"grad_norm": 0.0010881100315600634, |
|
"learning_rate": 2.808460634547591e-06, |
|
"loss": 0.0004, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.4428822495606327, |
|
"grad_norm": 0.05522293969988823, |
|
"learning_rate": 2.7967097532314924e-06, |
|
"loss": 0.0002, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.44522554188635033, |
|
"grad_norm": 0.0027575818821787834, |
|
"learning_rate": 2.784958871915394e-06, |
|
"loss": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.44756883421206795, |
|
"grad_norm": 0.0006020054570399225, |
|
"learning_rate": 2.7732079905992952e-06, |
|
"loss": 0.0005, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.44991212653778556, |
|
"grad_norm": 0.0025616425555199385, |
|
"learning_rate": 2.7614571092831965e-06, |
|
"loss": 0.0, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.45225541886350323, |
|
"grad_norm": 0.0018823420396074653, |
|
"learning_rate": 2.7497062279670977e-06, |
|
"loss": 0.0, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.45459871118922085, |
|
"grad_norm": 0.003241207217797637, |
|
"learning_rate": 2.737955346650999e-06, |
|
"loss": 0.0, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.45694200351493847, |
|
"grad_norm": 0.0010485474485903978, |
|
"learning_rate": 2.7262044653349e-06, |
|
"loss": 0.0002, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.45928529584065614, |
|
"grad_norm": 0.013366922736167908, |
|
"learning_rate": 2.714453584018802e-06, |
|
"loss": 0.0001, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.46162858816637375, |
|
"grad_norm": 0.0005886501166969538, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.0, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.46397188049209137, |
|
"grad_norm": 7.603697304148227e-05, |
|
"learning_rate": 2.6909518213866042e-06, |
|
"loss": 0.0, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.46631517281780904, |
|
"grad_norm": 0.000614571908954531, |
|
"learning_rate": 2.6792009400705055e-06, |
|
"loss": 0.0023, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.46865846514352666, |
|
"grad_norm": 0.046423882246017456, |
|
"learning_rate": 2.6674500587544067e-06, |
|
"loss": 0.0002, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4710017574692443, |
|
"grad_norm": 0.0005994020029902458, |
|
"learning_rate": 2.655699177438308e-06, |
|
"loss": 0.0, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.47334504979496195, |
|
"grad_norm": 0.011609828099608421, |
|
"learning_rate": 2.6439482961222096e-06, |
|
"loss": 0.0001, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.47568834212067956, |
|
"grad_norm": 0.007135775871574879, |
|
"learning_rate": 2.632197414806111e-06, |
|
"loss": 0.0002, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.4780316344463972, |
|
"grad_norm": 0.0028773818630725145, |
|
"learning_rate": 2.620446533490012e-06, |
|
"loss": 0.0, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.4803749267721148, |
|
"grad_norm": 0.13341404497623444, |
|
"learning_rate": 2.6086956521739132e-06, |
|
"loss": 0.0008, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.48271821909783247, |
|
"grad_norm": 0.03130058944225311, |
|
"learning_rate": 2.5969447708578145e-06, |
|
"loss": 0.0001, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.4850615114235501, |
|
"grad_norm": 0.006637818645685911, |
|
"learning_rate": 2.5851938895417157e-06, |
|
"loss": 0.0001, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.4874048037492677, |
|
"grad_norm": 0.0006390800117515028, |
|
"learning_rate": 2.5734430082256173e-06, |
|
"loss": 0.0001, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.4897480960749854, |
|
"grad_norm": 0.02106345072388649, |
|
"learning_rate": 2.5616921269095186e-06, |
|
"loss": 0.0002, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.492091388400703, |
|
"grad_norm": 0.0009213433368131518, |
|
"learning_rate": 2.5499412455934198e-06, |
|
"loss": 0.0001, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4944346807264206, |
|
"grad_norm": 2.5962471961975098, |
|
"learning_rate": 2.538190364277321e-06, |
|
"loss": 0.1436, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.4967779730521383, |
|
"grad_norm": 0.009386847727000713, |
|
"learning_rate": 2.5264394829612222e-06, |
|
"loss": 0.0001, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.4991212653778559, |
|
"grad_norm": 0.01308267842978239, |
|
"learning_rate": 2.514688601645124e-06, |
|
"loss": 0.0001, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.5014645577035736, |
|
"grad_norm": 0.006409250665456057, |
|
"learning_rate": 2.502937720329025e-06, |
|
"loss": 0.0, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5038078500292912, |
|
"grad_norm": 0.0018047624034807086, |
|
"learning_rate": 2.4911868390129263e-06, |
|
"loss": 0.0001, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5061511423550088, |
|
"grad_norm": 0.007056268397718668, |
|
"learning_rate": 2.4794359576968276e-06, |
|
"loss": 0.0, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.5084944346807264, |
|
"grad_norm": 2.4651243686676025, |
|
"learning_rate": 2.4676850763807288e-06, |
|
"loss": 0.0245, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.510837727006444, |
|
"grad_norm": 0.0025760605931282043, |
|
"learning_rate": 2.45593419506463e-06, |
|
"loss": 0.0, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.5131810193321616, |
|
"grad_norm": 0.059660654515028, |
|
"learning_rate": 2.4441833137485312e-06, |
|
"loss": 0.0003, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.5155243116578794, |
|
"grad_norm": 0.032668206840753555, |
|
"learning_rate": 2.432432432432433e-06, |
|
"loss": 0.0002, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.517867603983597, |
|
"grad_norm": 0.002476097084581852, |
|
"learning_rate": 2.420681551116334e-06, |
|
"loss": 0.0, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.5202108963093146, |
|
"grad_norm": 0.0005356927285902202, |
|
"learning_rate": 2.4089306698002353e-06, |
|
"loss": 0.0, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.5225541886350322, |
|
"grad_norm": 0.01949264481663704, |
|
"learning_rate": 2.3971797884841366e-06, |
|
"loss": 0.0001, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.5248974809607498, |
|
"grad_norm": 0.4609091281890869, |
|
"learning_rate": 2.3854289071680378e-06, |
|
"loss": 0.0013, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.5272407732864675, |
|
"grad_norm": 0.002268969314172864, |
|
"learning_rate": 2.373678025851939e-06, |
|
"loss": 0.027, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5295840656121851, |
|
"grad_norm": 0.42679542303085327, |
|
"learning_rate": 2.3619271445358407e-06, |
|
"loss": 0.002, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.5319273579379028, |
|
"grad_norm": 0.030775954946875572, |
|
"learning_rate": 2.350176263219742e-06, |
|
"loss": 0.0001, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.5342706502636204, |
|
"grad_norm": 0.006208465900272131, |
|
"learning_rate": 2.3384253819036427e-06, |
|
"loss": 0.0001, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.536613942589338, |
|
"grad_norm": 0.001203950378112495, |
|
"learning_rate": 2.3266745005875443e-06, |
|
"loss": 0.0, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.5389572349150556, |
|
"grad_norm": 0.0013062539510428905, |
|
"learning_rate": 2.3149236192714456e-06, |
|
"loss": 0.0001, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5413005272407733, |
|
"grad_norm": 0.014242034405469894, |
|
"learning_rate": 2.3031727379553468e-06, |
|
"loss": 0.0001, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.5436438195664909, |
|
"grad_norm": 0.0024558689910918474, |
|
"learning_rate": 2.291421856639248e-06, |
|
"loss": 0.0, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5459871118922085, |
|
"grad_norm": 0.006871205288916826, |
|
"learning_rate": 2.2796709753231492e-06, |
|
"loss": 0.0, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.5483304042179262, |
|
"grad_norm": 0.016744021326303482, |
|
"learning_rate": 2.2679200940070505e-06, |
|
"loss": 0.0001, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5506736965436438, |
|
"grad_norm": 0.0025478950701653957, |
|
"learning_rate": 2.256169212690952e-06, |
|
"loss": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5530169888693615, |
|
"grad_norm": 0.002553507685661316, |
|
"learning_rate": 2.2444183313748533e-06, |
|
"loss": 0.0, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.5553602811950791, |
|
"grad_norm": 0.0018396044615656137, |
|
"learning_rate": 2.2326674500587546e-06, |
|
"loss": 0.0002, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.5577035735207967, |
|
"grad_norm": 0.002036860678344965, |
|
"learning_rate": 2.2209165687426558e-06, |
|
"loss": 0.0, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.5600468658465143, |
|
"grad_norm": 0.0024688418488949537, |
|
"learning_rate": 2.209165687426557e-06, |
|
"loss": 0.0, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.562390158172232, |
|
"grad_norm": 0.0028820293955504894, |
|
"learning_rate": 2.1974148061104587e-06, |
|
"loss": 0.0001, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5647334504979497, |
|
"grad_norm": 0.00978305283933878, |
|
"learning_rate": 2.18566392479436e-06, |
|
"loss": 0.0001, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.5670767428236673, |
|
"grad_norm": 0.147267147898674, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 0.0014, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.5694200351493849, |
|
"grad_norm": 0.005025573540478945, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 0.0006, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.5717633274751025, |
|
"grad_norm": 0.0010051846038550138, |
|
"learning_rate": 2.1504112808460636e-06, |
|
"loss": 0.0003, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.5741066198008201, |
|
"grad_norm": 0.009055075235664845, |
|
"learning_rate": 2.1386603995299648e-06, |
|
"loss": 0.0001, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5764499121265377, |
|
"grad_norm": 0.0077414545230567455, |
|
"learning_rate": 2.1269095182138664e-06, |
|
"loss": 0.0001, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.5787932044522555, |
|
"grad_norm": 0.0059761228039860725, |
|
"learning_rate": 2.1151586368977677e-06, |
|
"loss": 0.0001, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.5811364967779731, |
|
"grad_norm": 0.0014180493308231235, |
|
"learning_rate": 2.103407755581669e-06, |
|
"loss": 0.0, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.5834797891036907, |
|
"grad_norm": 0.0022345769684761763, |
|
"learning_rate": 2.09165687426557e-06, |
|
"loss": 0.0, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.5858230814294083, |
|
"grad_norm": 0.005645833443850279, |
|
"learning_rate": 2.0799059929494713e-06, |
|
"loss": 0.0001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5881663737551259, |
|
"grad_norm": 0.011956258676946163, |
|
"learning_rate": 2.0681551116333726e-06, |
|
"loss": 0.0001, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.5905096660808435, |
|
"grad_norm": 0.01774289458990097, |
|
"learning_rate": 2.056404230317274e-06, |
|
"loss": 0.0002, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.5928529584065613, |
|
"grad_norm": 0.21751126646995544, |
|
"learning_rate": 2.0446533490011754e-06, |
|
"loss": 0.0012, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.5951962507322789, |
|
"grad_norm": 0.00307491235435009, |
|
"learning_rate": 2.0329024676850762e-06, |
|
"loss": 0.0, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.5975395430579965, |
|
"grad_norm": 0.021330738440155983, |
|
"learning_rate": 2.021151586368978e-06, |
|
"loss": 0.0002, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5998828353837141, |
|
"grad_norm": 0.020080704241991043, |
|
"learning_rate": 2.009400705052879e-06, |
|
"loss": 0.0001, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.6022261277094317, |
|
"grad_norm": 0.020522406324744225, |
|
"learning_rate": 1.9976498237367803e-06, |
|
"loss": 0.0002, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.6045694200351494, |
|
"grad_norm": 0.0004171329492237419, |
|
"learning_rate": 1.985898942420682e-06, |
|
"loss": 0.0, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.606912712360867, |
|
"grad_norm": 0.0027696220204234123, |
|
"learning_rate": 1.9741480611045828e-06, |
|
"loss": 0.0, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.6092560046865847, |
|
"grad_norm": 0.021467505022883415, |
|
"learning_rate": 1.9623971797884844e-06, |
|
"loss": 0.0002, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.6115992970123023, |
|
"grad_norm": 0.011968536302447319, |
|
"learning_rate": 1.9506462984723856e-06, |
|
"loss": 0.0001, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.6139425893380199, |
|
"grad_norm": 0.0011503971181809902, |
|
"learning_rate": 1.938895417156287e-06, |
|
"loss": 0.0004, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.6162858816637375, |
|
"grad_norm": 0.02280554361641407, |
|
"learning_rate": 1.927144535840188e-06, |
|
"loss": 0.0002, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.6186291739894552, |
|
"grad_norm": 0.008415359072387218, |
|
"learning_rate": 1.9153936545240893e-06, |
|
"loss": 0.0001, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.6209724663151728, |
|
"grad_norm": 0.0024012764915823936, |
|
"learning_rate": 1.9036427732079908e-06, |
|
"loss": 0.0001, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.6233157586408905, |
|
"grad_norm": 0.010776808485388756, |
|
"learning_rate": 1.8918918918918922e-06, |
|
"loss": 0.0001, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.6256590509666081, |
|
"grad_norm": 0.017337538301944733, |
|
"learning_rate": 1.8801410105757934e-06, |
|
"loss": 0.0001, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.6280023432923257, |
|
"grad_norm": 0.0019926901441067457, |
|
"learning_rate": 1.8683901292596946e-06, |
|
"loss": 0.0001, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.6303456356180434, |
|
"grad_norm": 0.013480707071721554, |
|
"learning_rate": 1.856639247943596e-06, |
|
"loss": 0.0002, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.632688927943761, |
|
"grad_norm": 0.005608106963336468, |
|
"learning_rate": 1.8448883666274973e-06, |
|
"loss": 0.0002, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.6350322202694786, |
|
"grad_norm": 0.002639380283653736, |
|
"learning_rate": 1.8331374853113983e-06, |
|
"loss": 0.0001, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.6373755125951962, |
|
"grad_norm": 0.0022652854677289724, |
|
"learning_rate": 1.8213866039953e-06, |
|
"loss": 0.0002, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.6397188049209139, |
|
"grad_norm": 0.003624632954597473, |
|
"learning_rate": 1.809635722679201e-06, |
|
"loss": 0.0001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.6420620972466315, |
|
"grad_norm": 0.007647163700312376, |
|
"learning_rate": 1.7978848413631022e-06, |
|
"loss": 0.0004, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.6444053895723492, |
|
"grad_norm": 0.012163680978119373, |
|
"learning_rate": 1.7861339600470036e-06, |
|
"loss": 0.0002, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6467486818980668, |
|
"grad_norm": 0.09023822844028473, |
|
"learning_rate": 1.7743830787309049e-06, |
|
"loss": 0.0009, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6490919742237844, |
|
"grad_norm": 0.006924999412149191, |
|
"learning_rate": 1.762632197414806e-06, |
|
"loss": 0.0001, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.651435266549502, |
|
"grad_norm": 0.0006185275269672275, |
|
"learning_rate": 1.7508813160987075e-06, |
|
"loss": 0.0001, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.6537785588752196, |
|
"grad_norm": 0.011605402454733849, |
|
"learning_rate": 1.7391304347826088e-06, |
|
"loss": 0.0006, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.6561218512009374, |
|
"grad_norm": 0.024394473060965538, |
|
"learning_rate": 1.7273795534665102e-06, |
|
"loss": 0.0001, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.658465143526655, |
|
"grad_norm": 0.023466341197490692, |
|
"learning_rate": 1.7156286721504114e-06, |
|
"loss": 0.0002, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.6608084358523726, |
|
"grad_norm": 0.010153519921004772, |
|
"learning_rate": 1.7038777908343126e-06, |
|
"loss": 0.0004, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.6631517281780902, |
|
"grad_norm": 0.43800845742225647, |
|
"learning_rate": 1.692126909518214e-06, |
|
"loss": 0.0012, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.6654950205038078, |
|
"grad_norm": 0.008404972031712532, |
|
"learning_rate": 1.6803760282021153e-06, |
|
"loss": 0.0001, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.6678383128295254, |
|
"grad_norm": 0.10615257918834686, |
|
"learning_rate": 1.6686251468860165e-06, |
|
"loss": 0.0005, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6701816051552432, |
|
"grad_norm": 0.019307592883706093, |
|
"learning_rate": 1.656874265569918e-06, |
|
"loss": 0.0003, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.6725248974809608, |
|
"grad_norm": 0.012227280996739864, |
|
"learning_rate": 1.6451233842538192e-06, |
|
"loss": 0.0002, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.6748681898066784, |
|
"grad_norm": 0.002821948379278183, |
|
"learning_rate": 1.6333725029377204e-06, |
|
"loss": 0.0, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.677211482132396, |
|
"grad_norm": 0.010473825968801975, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 0.0003, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.6795547744581136, |
|
"grad_norm": 0.014046385884284973, |
|
"learning_rate": 1.609870740305523e-06, |
|
"loss": 0.0236, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6818980667838312, |
|
"grad_norm": 0.0017795696621760726, |
|
"learning_rate": 1.5981198589894243e-06, |
|
"loss": 0.0001, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.6842413591095489, |
|
"grad_norm": 0.0006959863239899278, |
|
"learning_rate": 1.5863689776733257e-06, |
|
"loss": 0.0002, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.6865846514352666, |
|
"grad_norm": 0.019652947783470154, |
|
"learning_rate": 1.574618096357227e-06, |
|
"loss": 0.0003, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.6889279437609842, |
|
"grad_norm": 0.002340570092201233, |
|
"learning_rate": 1.5628672150411282e-06, |
|
"loss": 0.0, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.6912712360867018, |
|
"grad_norm": 0.011190817691385746, |
|
"learning_rate": 1.5511163337250296e-06, |
|
"loss": 0.0002, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6936145284124194, |
|
"grad_norm": 0.001152676297351718, |
|
"learning_rate": 1.5393654524089308e-06, |
|
"loss": 0.0001, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.6959578207381371, |
|
"grad_norm": 0.003393592080101371, |
|
"learning_rate": 1.5276145710928319e-06, |
|
"loss": 0.0001, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.6983011130638547, |
|
"grad_norm": 0.007921353913843632, |
|
"learning_rate": 1.5158636897767335e-06, |
|
"loss": 0.0001, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.7006444053895724, |
|
"grad_norm": 0.1039208471775055, |
|
"learning_rate": 1.5041128084606345e-06, |
|
"loss": 0.0002, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.70298769771529, |
|
"grad_norm": 0.0011576958931982517, |
|
"learning_rate": 1.4923619271445362e-06, |
|
"loss": 0.0001, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7053309900410076, |
|
"grad_norm": 0.06407307088375092, |
|
"learning_rate": 1.4806110458284372e-06, |
|
"loss": 0.0003, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.7076742823667252, |
|
"grad_norm": 0.012639104388654232, |
|
"learning_rate": 1.4688601645123384e-06, |
|
"loss": 0.0002, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.7100175746924429, |
|
"grad_norm": 0.0019591290038079023, |
|
"learning_rate": 1.45710928319624e-06, |
|
"loss": 0.0068, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.7123608670181605, |
|
"grad_norm": 0.0008327167597599328, |
|
"learning_rate": 1.445358401880141e-06, |
|
"loss": 0.0001, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.7147041593438781, |
|
"grad_norm": 0.0013139324728399515, |
|
"learning_rate": 1.4336075205640423e-06, |
|
"loss": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.7170474516695958, |
|
"grad_norm": 0.00803992711007595, |
|
"learning_rate": 1.4218566392479437e-06, |
|
"loss": 0.0002, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.7193907439953134, |
|
"grad_norm": 0.011399227194488049, |
|
"learning_rate": 1.410105757931845e-06, |
|
"loss": 0.0002, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.7217340363210311, |
|
"grad_norm": 0.007171169854700565, |
|
"learning_rate": 1.3983548766157462e-06, |
|
"loss": 0.0002, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.7240773286467487, |
|
"grad_norm": 0.7272996306419373, |
|
"learning_rate": 1.3866039952996476e-06, |
|
"loss": 0.0028, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.7264206209724663, |
|
"grad_norm": 0.0037387118209153414, |
|
"learning_rate": 1.3748531139835488e-06, |
|
"loss": 0.0001, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.7287639132981839, |
|
"grad_norm": 0.015048849396407604, |
|
"learning_rate": 1.36310223266745e-06, |
|
"loss": 0.0002, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.7311072056239016, |
|
"grad_norm": 0.0023705060593783855, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 0.0001, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.7334504979496193, |
|
"grad_norm": 0.03966263309121132, |
|
"learning_rate": 1.3396004700352527e-06, |
|
"loss": 0.0003, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.7357937902753369, |
|
"grad_norm": 0.0033043306320905685, |
|
"learning_rate": 1.327849588719154e-06, |
|
"loss": 0.0004, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.7381370826010545, |
|
"grad_norm": 0.35459718108177185, |
|
"learning_rate": 1.3160987074030554e-06, |
|
"loss": 0.0034, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.7404803749267721, |
|
"grad_norm": 0.016441915184259415, |
|
"learning_rate": 1.3043478260869566e-06, |
|
"loss": 0.0002, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.7428236672524897, |
|
"grad_norm": 0.0045352657325565815, |
|
"learning_rate": 1.2925969447708578e-06, |
|
"loss": 0.0002, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.7451669595782073, |
|
"grad_norm": 0.06311573088169098, |
|
"learning_rate": 1.2808460634547593e-06, |
|
"loss": 0.0005, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.7475102519039251, |
|
"grad_norm": 0.11154340207576752, |
|
"learning_rate": 1.2690951821386605e-06, |
|
"loss": 0.0009, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.7498535442296427, |
|
"grad_norm": 0.01816423609852791, |
|
"learning_rate": 1.257344300822562e-06, |
|
"loss": 0.0006, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7521968365553603, |
|
"grad_norm": 0.027273530140519142, |
|
"learning_rate": 1.2455934195064632e-06, |
|
"loss": 0.0005, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.7545401288810779, |
|
"grad_norm": 0.006555743515491486, |
|
"learning_rate": 1.2338425381903644e-06, |
|
"loss": 0.0003, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.7568834212067955, |
|
"grad_norm": 0.0030812753830105066, |
|
"learning_rate": 1.2220916568742656e-06, |
|
"loss": 0.0279, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.7592267135325131, |
|
"grad_norm": 0.01702543906867504, |
|
"learning_rate": 1.210340775558167e-06, |
|
"loss": 0.0001, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.7615700058582309, |
|
"grad_norm": 0.02607725001871586, |
|
"learning_rate": 1.1985898942420683e-06, |
|
"loss": 0.0001, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7639132981839485, |
|
"grad_norm": 0.006388965994119644, |
|
"learning_rate": 1.1868390129259695e-06, |
|
"loss": 0.0001, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.7662565905096661, |
|
"grad_norm": 0.008253968320786953, |
|
"learning_rate": 1.175088131609871e-06, |
|
"loss": 0.0001, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.7685998828353837, |
|
"grad_norm": 0.004699599463492632, |
|
"learning_rate": 1.1633372502937722e-06, |
|
"loss": 0.0002, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.7709431751611013, |
|
"grad_norm": 0.0012458263663575053, |
|
"learning_rate": 1.1515863689776734e-06, |
|
"loss": 0.0122, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.773286467486819, |
|
"grad_norm": 0.02383268252015114, |
|
"learning_rate": 1.1398354876615746e-06, |
|
"loss": 0.0003, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7756297598125366, |
|
"grad_norm": 0.015058089047670364, |
|
"learning_rate": 1.128084606345476e-06, |
|
"loss": 0.0001, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.7779730521382543, |
|
"grad_norm": 0.01569475792348385, |
|
"learning_rate": 1.1163337250293773e-06, |
|
"loss": 0.0003, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.7803163444639719, |
|
"grad_norm": 0.04253750294446945, |
|
"learning_rate": 1.1045828437132785e-06, |
|
"loss": 0.0002, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.7826596367896895, |
|
"grad_norm": 0.015156907960772514, |
|
"learning_rate": 1.09283196239718e-06, |
|
"loss": 0.0002, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.7850029291154071, |
|
"grad_norm": 0.03742622211575508, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 0.0005, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7873462214411248, |
|
"grad_norm": 0.027262985706329346, |
|
"learning_rate": 1.0693301997649824e-06, |
|
"loss": 0.0002, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.7896895137668424, |
|
"grad_norm": 0.007641313597559929, |
|
"learning_rate": 1.0575793184488838e-06, |
|
"loss": 0.0002, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.79203280609256, |
|
"grad_norm": 0.04441560059785843, |
|
"learning_rate": 1.045828437132785e-06, |
|
"loss": 0.0005, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.7943760984182777, |
|
"grad_norm": 0.020478103309869766, |
|
"learning_rate": 1.0340775558166863e-06, |
|
"loss": 0.0002, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.7967193907439953, |
|
"grad_norm": 0.10936477035284042, |
|
"learning_rate": 1.0223266745005877e-06, |
|
"loss": 0.001, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.799062683069713, |
|
"grad_norm": 0.01284460723400116, |
|
"learning_rate": 1.010575793184489e-06, |
|
"loss": 0.0015, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.8014059753954306, |
|
"grad_norm": 0.003440434578806162, |
|
"learning_rate": 9.988249118683902e-07, |
|
"loss": 0.0, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.8037492677211482, |
|
"grad_norm": 0.013081365264952183, |
|
"learning_rate": 9.870740305522914e-07, |
|
"loss": 0.0009, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.8060925600468658, |
|
"grad_norm": 0.013380183838307858, |
|
"learning_rate": 9.753231492361928e-07, |
|
"loss": 0.0002, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.8084358523725835, |
|
"grad_norm": 0.03771582618355751, |
|
"learning_rate": 9.63572267920094e-07, |
|
"loss": 0.0003, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.8107791446983011, |
|
"grad_norm": 0.0009556732256896794, |
|
"learning_rate": 9.518213866039954e-07, |
|
"loss": 0.0005, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.8131224370240188, |
|
"grad_norm": 0.0019481348572298884, |
|
"learning_rate": 9.400705052878967e-07, |
|
"loss": 0.0001, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.8154657293497364, |
|
"grad_norm": 0.0021866948809474707, |
|
"learning_rate": 9.28319623971798e-07, |
|
"loss": 0.0002, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.817809021675454, |
|
"grad_norm": 0.007546517997980118, |
|
"learning_rate": 9.165687426556992e-07, |
|
"loss": 0.0007, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.8201523140011716, |
|
"grad_norm": 2.074432611465454, |
|
"learning_rate": 9.048178613396005e-07, |
|
"loss": 0.0251, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8224956063268892, |
|
"grad_norm": 0.003374068532139063, |
|
"learning_rate": 8.930669800235018e-07, |
|
"loss": 0.0001, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.824838898652607, |
|
"grad_norm": 0.010109562426805496, |
|
"learning_rate": 8.81316098707403e-07, |
|
"loss": 0.0006, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.8271821909783246, |
|
"grad_norm": 0.017352379858493805, |
|
"learning_rate": 8.695652173913044e-07, |
|
"loss": 0.0001, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.8295254833040422, |
|
"grad_norm": 0.016872087493538857, |
|
"learning_rate": 8.578143360752057e-07, |
|
"loss": 0.0002, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.8318687756297598, |
|
"grad_norm": 0.041937246918678284, |
|
"learning_rate": 8.46063454759107e-07, |
|
"loss": 0.0228, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.8342120679554774, |
|
"grad_norm": 0.02908233553171158, |
|
"learning_rate": 8.343125734430083e-07, |
|
"loss": 0.0002, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.836555360281195, |
|
"grad_norm": 0.0012463816674426198, |
|
"learning_rate": 8.225616921269096e-07, |
|
"loss": 0.0004, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.8388986526069128, |
|
"grad_norm": 0.04300675913691521, |
|
"learning_rate": 8.108108108108109e-07, |
|
"loss": 0.0006, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.8412419449326304, |
|
"grad_norm": 2.7622828483581543, |
|
"learning_rate": 7.990599294947122e-07, |
|
"loss": 0.149, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.843585237258348, |
|
"grad_norm": 0.010049765929579735, |
|
"learning_rate": 7.873090481786135e-07, |
|
"loss": 0.0002, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.8459285295840656, |
|
"grad_norm": 0.011876920238137245, |
|
"learning_rate": 7.755581668625148e-07, |
|
"loss": 0.0001, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.8482718219097832, |
|
"grad_norm": 0.014826681464910507, |
|
"learning_rate": 7.638072855464159e-07, |
|
"loss": 0.0003, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.8506151142355008, |
|
"grad_norm": 0.16368882358074188, |
|
"learning_rate": 7.520564042303173e-07, |
|
"loss": 0.0013, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.8529584065612185, |
|
"grad_norm": 0.02603282406926155, |
|
"learning_rate": 7.403055229142186e-07, |
|
"loss": 0.0004, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.8553016988869362, |
|
"grad_norm": 0.7740702629089355, |
|
"learning_rate": 7.2855464159812e-07, |
|
"loss": 0.0043, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.8576449912126538, |
|
"grad_norm": 0.010226438753306866, |
|
"learning_rate": 7.168037602820211e-07, |
|
"loss": 0.0002, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.8599882835383714, |
|
"grad_norm": 0.02008165791630745, |
|
"learning_rate": 7.050528789659225e-07, |
|
"loss": 0.0002, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.862331575864089, |
|
"grad_norm": 0.09208586066961288, |
|
"learning_rate": 6.933019976498238e-07, |
|
"loss": 0.0008, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.8646748681898067, |
|
"grad_norm": 0.01933148130774498, |
|
"learning_rate": 6.81551116333725e-07, |
|
"loss": 0.0011, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.8670181605155243, |
|
"grad_norm": 0.04433580860495567, |
|
"learning_rate": 6.698002350176264e-07, |
|
"loss": 0.0003, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.869361452841242, |
|
"grad_norm": 0.01631711982190609, |
|
"learning_rate": 6.580493537015277e-07, |
|
"loss": 0.0003, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.8717047451669596, |
|
"grad_norm": 0.042307399213314056, |
|
"learning_rate": 6.462984723854289e-07, |
|
"loss": 0.0004, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.8740480374926772, |
|
"grad_norm": 0.22414757311344147, |
|
"learning_rate": 6.345475910693303e-07, |
|
"loss": 0.0018, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.8763913298183948, |
|
"grad_norm": 0.17513447999954224, |
|
"learning_rate": 6.227967097532316e-07, |
|
"loss": 0.0015, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.8787346221441125, |
|
"grad_norm": 0.3218580186367035, |
|
"learning_rate": 6.110458284371328e-07, |
|
"loss": 0.0029, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8810779144698301, |
|
"grad_norm": 0.026706017553806305, |
|
"learning_rate": 5.992949471210341e-07, |
|
"loss": 0.0004, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.8834212067955477, |
|
"grad_norm": 0.4114263951778412, |
|
"learning_rate": 5.875440658049355e-07, |
|
"loss": 0.0035, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.8857644991212654, |
|
"grad_norm": 0.25009235739707947, |
|
"learning_rate": 5.757931844888367e-07, |
|
"loss": 0.0016, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.888107791446983, |
|
"grad_norm": 1.2960833311080933, |
|
"learning_rate": 5.64042303172738e-07, |
|
"loss": 0.0059, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.8904510837727007, |
|
"grad_norm": 0.28417083621025085, |
|
"learning_rate": 5.522914218566393e-07, |
|
"loss": 0.0059, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8927943760984183, |
|
"grad_norm": 0.2292051613330841, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 0.0015, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.8951376684241359, |
|
"grad_norm": 0.012189504690468311, |
|
"learning_rate": 5.287896592244419e-07, |
|
"loss": 0.0007, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.8974809607498535, |
|
"grad_norm": 0.09458251297473907, |
|
"learning_rate": 5.170387779083431e-07, |
|
"loss": 0.0004, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.8998242530755711, |
|
"grad_norm": 0.027070222422480583, |
|
"learning_rate": 5.052878965922445e-07, |
|
"loss": 0.0012, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.9021675454012889, |
|
"grad_norm": 0.047401878982782364, |
|
"learning_rate": 4.935370152761457e-07, |
|
"loss": 0.0003, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.9045108377270065, |
|
"grad_norm": 0.06239737570285797, |
|
"learning_rate": 4.81786133960047e-07, |
|
"loss": 0.0012, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.9068541300527241, |
|
"grad_norm": 2.6842846870422363, |
|
"learning_rate": 4.7003525264394836e-07, |
|
"loss": 0.1103, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.9091974223784417, |
|
"grad_norm": 0.057395774871110916, |
|
"learning_rate": 4.582843713278496e-07, |
|
"loss": 0.0004, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.9115407147041593, |
|
"grad_norm": 0.16248440742492676, |
|
"learning_rate": 4.465334900117509e-07, |
|
"loss": 0.0018, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.9138840070298769, |
|
"grad_norm": 0.11067284643650055, |
|
"learning_rate": 4.347826086956522e-07, |
|
"loss": 0.0011, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.9162272993555947, |
|
"grad_norm": 0.07208680361509323, |
|
"learning_rate": 4.230317273795535e-07, |
|
"loss": 0.0011, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.9185705916813123, |
|
"grad_norm": 0.4830150604248047, |
|
"learning_rate": 4.112808460634548e-07, |
|
"loss": 0.0022, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.9209138840070299, |
|
"grad_norm": 0.01794450171291828, |
|
"learning_rate": 3.995299647473561e-07, |
|
"loss": 0.0011, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.9232571763327475, |
|
"grad_norm": 3.0485081672668457, |
|
"learning_rate": 3.877790834312574e-07, |
|
"loss": 0.0508, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.9256004686584651, |
|
"grad_norm": 3.130112648010254, |
|
"learning_rate": 3.7602820211515863e-07, |
|
"loss": 0.0194, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.9279437609841827, |
|
"grad_norm": 3.5992815494537354, |
|
"learning_rate": 3.6427732079906e-07, |
|
"loss": 0.1036, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.9302870533099004, |
|
"grad_norm": 0.0751647800207138, |
|
"learning_rate": 3.5252643948296124e-07, |
|
"loss": 0.0003, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.9326303456356181, |
|
"grad_norm": 0.03622612729668617, |
|
"learning_rate": 3.407755581668625e-07, |
|
"loss": 0.0011, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.9349736379613357, |
|
"grad_norm": 0.22365981340408325, |
|
"learning_rate": 3.2902467685076385e-07, |
|
"loss": 0.0028, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.9373169302870533, |
|
"grad_norm": 0.04666091129183769, |
|
"learning_rate": 3.172737955346651e-07, |
|
"loss": 0.0041, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9396602226127709, |
|
"grad_norm": 5.363467693328857, |
|
"learning_rate": 3.055229142185664e-07, |
|
"loss": 0.2217, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.9420035149384886, |
|
"grad_norm": 0.06753694266080856, |
|
"learning_rate": 2.9377203290246774e-07, |
|
"loss": 0.0026, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.9443468072642062, |
|
"grad_norm": 2.554419994354248, |
|
"learning_rate": 2.82021151586369e-07, |
|
"loss": 0.0791, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.9466900995899239, |
|
"grad_norm": 0.14563411474227905, |
|
"learning_rate": 2.702702702702703e-07, |
|
"loss": 0.0208, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.9490333919156415, |
|
"grad_norm": 2.30971360206604, |
|
"learning_rate": 2.5851938895417157e-07, |
|
"loss": 0.1119, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.9513766842413591, |
|
"grad_norm": 4.073694229125977, |
|
"learning_rate": 2.4676850763807285e-07, |
|
"loss": 0.1057, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.9537199765670767, |
|
"grad_norm": 2.3215789794921875, |
|
"learning_rate": 2.3501762632197418e-07, |
|
"loss": 0.0286, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.9560632688927944, |
|
"grad_norm": 0.46727773547172546, |
|
"learning_rate": 2.2326674500587546e-07, |
|
"loss": 0.0714, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.958406561218512, |
|
"grad_norm": 2.0026137828826904, |
|
"learning_rate": 2.1151586368977676e-07, |
|
"loss": 0.0455, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.9607498535442296, |
|
"grad_norm": 3.2537143230438232, |
|
"learning_rate": 1.9976498237367804e-07, |
|
"loss": 0.0765, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.9630931458699473, |
|
"grad_norm": 3.485633134841919, |
|
"learning_rate": 1.8801410105757932e-07, |
|
"loss": 0.0493, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.9654364381956649, |
|
"grad_norm": 2.769423246383667, |
|
"learning_rate": 1.7626321974148062e-07, |
|
"loss": 0.0602, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.9677797305213826, |
|
"grad_norm": 2.236210823059082, |
|
"learning_rate": 1.6451233842538192e-07, |
|
"loss": 0.1404, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.9701230228471002, |
|
"grad_norm": 0.06197360157966614, |
|
"learning_rate": 1.527614571092832e-07, |
|
"loss": 0.0472, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.9724663151728178, |
|
"grad_norm": 0.8206185698509216, |
|
"learning_rate": 1.410105757931845e-07, |
|
"loss": 0.0686, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9748096074985354, |
|
"grad_norm": 2.434030771255493, |
|
"learning_rate": 1.2925969447708578e-07, |
|
"loss": 0.1322, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.9771528998242531, |
|
"grad_norm": 0.03143630549311638, |
|
"learning_rate": 1.1750881316098709e-07, |
|
"loss": 0.1134, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.9794961921499707, |
|
"grad_norm": 0.1770186424255371, |
|
"learning_rate": 1.0575793184488838e-07, |
|
"loss": 0.0011, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.9818394844756884, |
|
"grad_norm": 6.03350830078125, |
|
"learning_rate": 9.400705052878966e-08, |
|
"loss": 0.4193, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.984182776801406, |
|
"grad_norm": 4.842612266540527, |
|
"learning_rate": 8.225616921269096e-08, |
|
"loss": 0.0951, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9865260691271236, |
|
"grad_norm": 3.111945629119873, |
|
"learning_rate": 7.050528789659225e-08, |
|
"loss": 0.1375, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.9888693614528412, |
|
"grad_norm": 3.4468753337860107, |
|
"learning_rate": 5.8754406580493544e-08, |
|
"loss": 0.157, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.9912126537785588, |
|
"grad_norm": 5.563467502593994, |
|
"learning_rate": 4.700352526439483e-08, |
|
"loss": 0.1989, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.9935559461042766, |
|
"grad_norm": 0.20900146663188934, |
|
"learning_rate": 3.5252643948296127e-08, |
|
"loss": 0.169, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.9958992384299942, |
|
"grad_norm": 2.651283025741577, |
|
"learning_rate": 2.3501762632197414e-08, |
|
"loss": 0.0203, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9982425307557118, |
|
"grad_norm": 3.192451000213623, |
|
"learning_rate": 1.1750881316098707e-08, |
|
"loss": 0.0786, |
|
"step": 852 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 853, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 20000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|