|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.753199268738574, |
|
"eval_steps": 500, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.003656307129798903, |
|
"grad_norm": 101.57550811767578, |
|
"learning_rate": 2.9991224862888483e-05, |
|
"loss": 1.5598, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007312614259597806, |
|
"grad_norm": 36.13117218017578, |
|
"learning_rate": 2.996928702010969e-05, |
|
"loss": 5.0075, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.010968921389396709, |
|
"grad_norm": 171.64637756347656, |
|
"learning_rate": 2.9947349177330895e-05, |
|
"loss": 1.9036, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.014625228519195612, |
|
"grad_norm": 30.211172103881836, |
|
"learning_rate": 2.99254113345521e-05, |
|
"loss": 0.7468, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.018281535648994516, |
|
"grad_norm": 0.8975659012794495, |
|
"learning_rate": 2.990347349177331e-05, |
|
"loss": 1.0939, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.021937842778793418, |
|
"grad_norm": 16.802227020263672, |
|
"learning_rate": 2.9881535648994517e-05, |
|
"loss": 4.4483, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.025594149908592323, |
|
"grad_norm": 22.710329055786133, |
|
"learning_rate": 2.9859597806215723e-05, |
|
"loss": 2.2961, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.029250457038391225, |
|
"grad_norm": 42.57908630371094, |
|
"learning_rate": 2.983765996343693e-05, |
|
"loss": 2.2599, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03290676416819013, |
|
"grad_norm": 22.469757080078125, |
|
"learning_rate": 2.981572212065814e-05, |
|
"loss": 0.6366, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03656307129798903, |
|
"grad_norm": 0.616607666015625, |
|
"learning_rate": 2.9793784277879342e-05, |
|
"loss": 2.6768, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04021937842778794, |
|
"grad_norm": 38.607276916503906, |
|
"learning_rate": 2.9776234003656307e-05, |
|
"loss": 3.233, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.043875685557586835, |
|
"grad_norm": 20.329072952270508, |
|
"learning_rate": 2.9754296160877513e-05, |
|
"loss": 0.7113, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04753199268738574, |
|
"grad_norm": 12.583466529846191, |
|
"learning_rate": 2.9732358318098722e-05, |
|
"loss": 0.6635, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.051188299817184646, |
|
"grad_norm": 16.894561767578125, |
|
"learning_rate": 2.971042047531993e-05, |
|
"loss": 2.5268, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.054844606946983544, |
|
"grad_norm": 21.74227523803711, |
|
"learning_rate": 2.9688482632541135e-05, |
|
"loss": 0.6855, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05850091407678245, |
|
"grad_norm": 21.068952560424805, |
|
"learning_rate": 2.966654478976234e-05, |
|
"loss": 0.7735, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.062157221206581355, |
|
"grad_norm": 21.1742000579834, |
|
"learning_rate": 2.964460694698355e-05, |
|
"loss": 0.7335, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.06581352833638025, |
|
"grad_norm": 21.325294494628906, |
|
"learning_rate": 2.9622669104204753e-05, |
|
"loss": 0.7411, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.06946983546617916, |
|
"grad_norm": 14.202475547790527, |
|
"learning_rate": 2.960073126142596e-05, |
|
"loss": 0.6385, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.07312614259597806, |
|
"grad_norm": 14.897246360778809, |
|
"learning_rate": 2.9578793418647165e-05, |
|
"loss": 0.6281, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07678244972577697, |
|
"grad_norm": 14.886935234069824, |
|
"learning_rate": 2.955685557586837e-05, |
|
"loss": 0.6687, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.08043875685557587, |
|
"grad_norm": 11.127037048339844, |
|
"learning_rate": 2.953491773308958e-05, |
|
"loss": 0.6167, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.08409506398537477, |
|
"grad_norm": 15.06424617767334, |
|
"learning_rate": 2.9512979890310787e-05, |
|
"loss": 0.6776, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08775137111517367, |
|
"grad_norm": 16.624258041381836, |
|
"learning_rate": 2.9491042047531993e-05, |
|
"loss": 0.8723, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.09140767824497258, |
|
"grad_norm": 15.640335083007812, |
|
"learning_rate": 2.94691042047532e-05, |
|
"loss": 0.6707, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.09506398537477148, |
|
"grad_norm": 14.502679824829102, |
|
"learning_rate": 2.944716636197441e-05, |
|
"loss": 0.7391, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.09872029250457039, |
|
"grad_norm": 16.92255973815918, |
|
"learning_rate": 2.9425228519195615e-05, |
|
"loss": 0.8533, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.10237659963436929, |
|
"grad_norm": 12.30309772491455, |
|
"learning_rate": 2.940329067641682e-05, |
|
"loss": 0.7523, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.10603290676416818, |
|
"grad_norm": 10.335432052612305, |
|
"learning_rate": 2.9381352833638024e-05, |
|
"loss": 0.619, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10968921389396709, |
|
"grad_norm": 10.138907432556152, |
|
"learning_rate": 2.935941499085923e-05, |
|
"loss": 0.6184, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.113345521023766, |
|
"grad_norm": 10.479036331176758, |
|
"learning_rate": 2.933747714808044e-05, |
|
"loss": 0.8974, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1170018281535649, |
|
"grad_norm": 11.521620750427246, |
|
"learning_rate": 2.9315539305301646e-05, |
|
"loss": 0.9033, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1206581352833638, |
|
"grad_norm": 10.446819305419922, |
|
"learning_rate": 2.9293601462522852e-05, |
|
"loss": 0.7302, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.12431444241316271, |
|
"grad_norm": 8.162124633789062, |
|
"learning_rate": 2.9271663619744058e-05, |
|
"loss": 0.6672, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.12797074954296161, |
|
"grad_norm": 10.791855812072754, |
|
"learning_rate": 2.9249725776965268e-05, |
|
"loss": 0.7167, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1316270566727605, |
|
"grad_norm": 11.913755416870117, |
|
"learning_rate": 2.9227787934186474e-05, |
|
"loss": 0.7323, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.13528336380255943, |
|
"grad_norm": 13.401154518127441, |
|
"learning_rate": 2.920585009140768e-05, |
|
"loss": 0.7404, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.13893967093235832, |
|
"grad_norm": 11.721502304077148, |
|
"learning_rate": 2.9183912248628886e-05, |
|
"loss": 0.7038, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1425959780621572, |
|
"grad_norm": 11.148709297180176, |
|
"learning_rate": 2.9161974405850092e-05, |
|
"loss": 0.7144, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.14625228519195613, |
|
"grad_norm": 11.626866340637207, |
|
"learning_rate": 2.91400365630713e-05, |
|
"loss": 0.6527, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.14990859232175502, |
|
"grad_norm": 14.398078918457031, |
|
"learning_rate": 2.9118098720292505e-05, |
|
"loss": 0.6695, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.15356489945155394, |
|
"grad_norm": 14.848665237426758, |
|
"learning_rate": 2.909616087751371e-05, |
|
"loss": 0.7791, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.15722120658135283, |
|
"grad_norm": 12.27662181854248, |
|
"learning_rate": 2.9074223034734917e-05, |
|
"loss": 0.7028, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.16087751371115175, |
|
"grad_norm": 6.21640157699585, |
|
"learning_rate": 2.9052285191956126e-05, |
|
"loss": 0.6076, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.16453382084095064, |
|
"grad_norm": 10.96060562133789, |
|
"learning_rate": 2.9030347349177333e-05, |
|
"loss": 0.6555, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.16819012797074953, |
|
"grad_norm": 12.066696166992188, |
|
"learning_rate": 2.900840950639854e-05, |
|
"loss": 0.7395, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.17184643510054845, |
|
"grad_norm": 14.306533813476562, |
|
"learning_rate": 2.8986471663619745e-05, |
|
"loss": 0.835, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.17550274223034734, |
|
"grad_norm": 15.683281898498535, |
|
"learning_rate": 2.8964533820840954e-05, |
|
"loss": 0.7374, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.17915904936014626, |
|
"grad_norm": 9.928467750549316, |
|
"learning_rate": 2.8942595978062157e-05, |
|
"loss": 0.6113, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.18281535648994515, |
|
"grad_norm": 12.735904693603516, |
|
"learning_rate": 2.8920658135283363e-05, |
|
"loss": 0.8668, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18647166361974407, |
|
"grad_norm": 5.431804180145264, |
|
"learning_rate": 2.889872029250457e-05, |
|
"loss": 0.7009, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.19012797074954296, |
|
"grad_norm": 9.146883010864258, |
|
"learning_rate": 2.887678244972578e-05, |
|
"loss": 0.8343, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.19378427787934185, |
|
"grad_norm": 9.630278587341309, |
|
"learning_rate": 2.8854844606946985e-05, |
|
"loss": 0.7074, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.19744058500914077, |
|
"grad_norm": 6.3954901695251465, |
|
"learning_rate": 2.883290676416819e-05, |
|
"loss": 0.7857, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.20109689213893966, |
|
"grad_norm": 10.803849220275879, |
|
"learning_rate": 2.8810968921389397e-05, |
|
"loss": 0.6814, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.20475319926873858, |
|
"grad_norm": 5.025099277496338, |
|
"learning_rate": 2.8789031078610603e-05, |
|
"loss": 0.8558, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.20840950639853748, |
|
"grad_norm": 10.094544410705566, |
|
"learning_rate": 2.8767093235831813e-05, |
|
"loss": 0.9323, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.21206581352833637, |
|
"grad_norm": 9.443562507629395, |
|
"learning_rate": 2.874515539305302e-05, |
|
"loss": 0.715, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.21572212065813529, |
|
"grad_norm": 11.677664756774902, |
|
"learning_rate": 2.8723217550274222e-05, |
|
"loss": 0.8864, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.21937842778793418, |
|
"grad_norm": 4.913455009460449, |
|
"learning_rate": 2.8701279707495428e-05, |
|
"loss": 0.6386, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2230347349177331, |
|
"grad_norm": 6.794945240020752, |
|
"learning_rate": 2.8679341864716638e-05, |
|
"loss": 0.6967, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.226691042047532, |
|
"grad_norm": 8.743935585021973, |
|
"learning_rate": 2.8657404021937844e-05, |
|
"loss": 0.6984, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2303473491773309, |
|
"grad_norm": 8.499006271362305, |
|
"learning_rate": 2.863546617915905e-05, |
|
"loss": 0.7081, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.2340036563071298, |
|
"grad_norm": 7.359218597412109, |
|
"learning_rate": 2.8613528336380256e-05, |
|
"loss": 0.7312, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.2376599634369287, |
|
"grad_norm": 8.67283821105957, |
|
"learning_rate": 2.8591590493601462e-05, |
|
"loss": 0.73, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.2413162705667276, |
|
"grad_norm": 9.145535469055176, |
|
"learning_rate": 2.856965265082267e-05, |
|
"loss": 0.6741, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2449725776965265, |
|
"grad_norm": 14.087048530578613, |
|
"learning_rate": 2.8547714808043878e-05, |
|
"loss": 0.7393, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.24862888482632542, |
|
"grad_norm": 11.732462882995605, |
|
"learning_rate": 2.8525776965265084e-05, |
|
"loss": 0.6989, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.2522851919561243, |
|
"grad_norm": 7.398434638977051, |
|
"learning_rate": 2.8503839122486287e-05, |
|
"loss": 0.611, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.25594149908592323, |
|
"grad_norm": 5.068675994873047, |
|
"learning_rate": 2.8481901279707496e-05, |
|
"loss": 0.7415, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2595978062157221, |
|
"grad_norm": 9.75862979888916, |
|
"learning_rate": 2.8459963436928702e-05, |
|
"loss": 0.8388, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.263254113345521, |
|
"grad_norm": 9.038466453552246, |
|
"learning_rate": 2.843802559414991e-05, |
|
"loss": 0.7232, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.26691042047531993, |
|
"grad_norm": 13.121977806091309, |
|
"learning_rate": 2.8416087751371115e-05, |
|
"loss": 0.6463, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.27056672760511885, |
|
"grad_norm": 10.064229011535645, |
|
"learning_rate": 2.8394149908592324e-05, |
|
"loss": 0.8457, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2742230347349177, |
|
"grad_norm": 10.455716133117676, |
|
"learning_rate": 2.837221206581353e-05, |
|
"loss": 0.7311, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.27787934186471663, |
|
"grad_norm": 9.248018264770508, |
|
"learning_rate": 2.8350274223034736e-05, |
|
"loss": 0.8482, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.28153564899451555, |
|
"grad_norm": 7.202044486999512, |
|
"learning_rate": 2.8328336380255943e-05, |
|
"loss": 0.7483, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2851919561243144, |
|
"grad_norm": 5.500239849090576, |
|
"learning_rate": 2.830639853747715e-05, |
|
"loss": 0.812, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.28884826325411334, |
|
"grad_norm": 14.437928199768066, |
|
"learning_rate": 2.8284460694698355e-05, |
|
"loss": 0.6839, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.29250457038391225, |
|
"grad_norm": 8.881915092468262, |
|
"learning_rate": 2.826252285191956e-05, |
|
"loss": 0.8167, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2961608775137112, |
|
"grad_norm": 13.634603500366211, |
|
"learning_rate": 2.8240585009140767e-05, |
|
"loss": 0.9943, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.29981718464351004, |
|
"grad_norm": 11.794356346130371, |
|
"learning_rate": 2.8218647166361973e-05, |
|
"loss": 0.8036, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.30347349177330896, |
|
"grad_norm": 9.6803617477417, |
|
"learning_rate": 2.8196709323583183e-05, |
|
"loss": 0.7858, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3071297989031079, |
|
"grad_norm": 7.423046588897705, |
|
"learning_rate": 2.817477148080439e-05, |
|
"loss": 0.7126, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.31078610603290674, |
|
"grad_norm": 6.547556400299072, |
|
"learning_rate": 2.8152833638025595e-05, |
|
"loss": 0.886, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.31444241316270566, |
|
"grad_norm": 10.207584381103516, |
|
"learning_rate": 2.81308957952468e-05, |
|
"loss": 0.7013, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3180987202925046, |
|
"grad_norm": 9.12232494354248, |
|
"learning_rate": 2.810895795246801e-05, |
|
"loss": 0.821, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3217550274223035, |
|
"grad_norm": 8.086636543273926, |
|
"learning_rate": 2.8087020109689217e-05, |
|
"loss": 0.8873, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.32541133455210236, |
|
"grad_norm": 9.748858451843262, |
|
"learning_rate": 2.806508226691042e-05, |
|
"loss": 0.8942, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.3290676416819013, |
|
"grad_norm": 11.087379455566406, |
|
"learning_rate": 2.8043144424131626e-05, |
|
"loss": 0.8282, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3327239488117002, |
|
"grad_norm": 10.066028594970703, |
|
"learning_rate": 2.8021206581352832e-05, |
|
"loss": 0.7694, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.33638025594149906, |
|
"grad_norm": 10.349629402160645, |
|
"learning_rate": 2.799926873857404e-05, |
|
"loss": 0.9706, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.340036563071298, |
|
"grad_norm": 5.540337562561035, |
|
"learning_rate": 2.7977330895795248e-05, |
|
"loss": 0.6998, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.3436928702010969, |
|
"grad_norm": 3.4147696495056152, |
|
"learning_rate": 2.7955393053016454e-05, |
|
"loss": 0.6818, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.3473491773308958, |
|
"grad_norm": 13.466970443725586, |
|
"learning_rate": 2.793345521023766e-05, |
|
"loss": 0.8013, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.3510054844606947, |
|
"grad_norm": 6.585829734802246, |
|
"learning_rate": 2.791151736745887e-05, |
|
"loss": 0.6507, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.3546617915904936, |
|
"grad_norm": 3.3851397037506104, |
|
"learning_rate": 2.7889579524680076e-05, |
|
"loss": 0.8193, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.3583180987202925, |
|
"grad_norm": 12.482742309570312, |
|
"learning_rate": 2.7867641681901282e-05, |
|
"loss": 0.7622, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.3619744058500914, |
|
"grad_norm": 9.126582145690918, |
|
"learning_rate": 2.7845703839122484e-05, |
|
"loss": 0.6539, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.3656307129798903, |
|
"grad_norm": 6.254278182983398, |
|
"learning_rate": 2.7823765996343694e-05, |
|
"loss": 0.6231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3692870201096892, |
|
"grad_norm": 5.566930294036865, |
|
"learning_rate": 2.78018281535649e-05, |
|
"loss": 0.8925, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.37294332723948814, |
|
"grad_norm": 11.380731582641602, |
|
"learning_rate": 2.7779890310786106e-05, |
|
"loss": 0.8145, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.376599634369287, |
|
"grad_norm": 5.229077339172363, |
|
"learning_rate": 2.7757952468007312e-05, |
|
"loss": 0.6471, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.3802559414990859, |
|
"grad_norm": 7.065961837768555, |
|
"learning_rate": 2.773601462522852e-05, |
|
"loss": 0.658, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.38391224862888484, |
|
"grad_norm": 7.386284828186035, |
|
"learning_rate": 2.7714076782449728e-05, |
|
"loss": 0.6973, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3875685557586837, |
|
"grad_norm": 4.258168697357178, |
|
"learning_rate": 2.7692138939670934e-05, |
|
"loss": 0.6318, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.3912248628884826, |
|
"grad_norm": 10.302197456359863, |
|
"learning_rate": 2.767020109689214e-05, |
|
"loss": 0.5522, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.39488117001828155, |
|
"grad_norm": 6.281784534454346, |
|
"learning_rate": 2.7648263254113347e-05, |
|
"loss": 0.7723, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.39853747714808047, |
|
"grad_norm": 8.805102348327637, |
|
"learning_rate": 2.7626325411334553e-05, |
|
"loss": 0.668, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.40219378427787933, |
|
"grad_norm": 14.54948902130127, |
|
"learning_rate": 2.760438756855576e-05, |
|
"loss": 0.921, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.40585009140767825, |
|
"grad_norm": 7.115081310272217, |
|
"learning_rate": 2.7582449725776965e-05, |
|
"loss": 0.7194, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.40950639853747717, |
|
"grad_norm": 2.9493892192840576, |
|
"learning_rate": 2.756051188299817e-05, |
|
"loss": 0.6247, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.41316270566727603, |
|
"grad_norm": 16.915966033935547, |
|
"learning_rate": 2.7538574040219377e-05, |
|
"loss": 0.8615, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.41681901279707495, |
|
"grad_norm": 5.787754535675049, |
|
"learning_rate": 2.7516636197440587e-05, |
|
"loss": 0.6051, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.42047531992687387, |
|
"grad_norm": 10.545123100280762, |
|
"learning_rate": 2.7494698354661793e-05, |
|
"loss": 0.7797, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.42413162705667273, |
|
"grad_norm": 15.382741928100586, |
|
"learning_rate": 2.7472760511883e-05, |
|
"loss": 1.0864, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.42778793418647165, |
|
"grad_norm": 5.235750198364258, |
|
"learning_rate": 2.7450822669104205e-05, |
|
"loss": 0.7217, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.43144424131627057, |
|
"grad_norm": 6.794938564300537, |
|
"learning_rate": 2.7428884826325415e-05, |
|
"loss": 0.9402, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.4351005484460695, |
|
"grad_norm": 11.024620056152344, |
|
"learning_rate": 2.7406946983546617e-05, |
|
"loss": 0.7542, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.43875685557586835, |
|
"grad_norm": 11.393266677856445, |
|
"learning_rate": 2.7385009140767824e-05, |
|
"loss": 0.6272, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4424131627056673, |
|
"grad_norm": 8.483016967773438, |
|
"learning_rate": 2.736307129798903e-05, |
|
"loss": 1.2218, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.4460694698354662, |
|
"grad_norm": 12.325540542602539, |
|
"learning_rate": 2.734113345521024e-05, |
|
"loss": 0.6524, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.44972577696526506, |
|
"grad_norm": 5.426061630249023, |
|
"learning_rate": 2.7319195612431445e-05, |
|
"loss": 0.8768, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.453382084095064, |
|
"grad_norm": 6.959734916687012, |
|
"learning_rate": 2.729725776965265e-05, |
|
"loss": 0.5997, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.4570383912248629, |
|
"grad_norm": 14.661490440368652, |
|
"learning_rate": 2.7275319926873858e-05, |
|
"loss": 0.8667, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.4606946983546618, |
|
"grad_norm": 10.735424995422363, |
|
"learning_rate": 2.7253382084095064e-05, |
|
"loss": 0.6064, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.4643510054844607, |
|
"grad_norm": 10.7152681350708, |
|
"learning_rate": 2.7231444241316273e-05, |
|
"loss": 0.8671, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.4680073126142596, |
|
"grad_norm": 8.87678050994873, |
|
"learning_rate": 2.720950639853748e-05, |
|
"loss": 0.9788, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.4716636197440585, |
|
"grad_norm": 1.8030093908309937, |
|
"learning_rate": 2.7187568555758682e-05, |
|
"loss": 0.7143, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.4753199268738574, |
|
"grad_norm": 10.601454734802246, |
|
"learning_rate": 2.716563071297989e-05, |
|
"loss": 0.8064, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.4789762340036563, |
|
"grad_norm": 7.095282554626465, |
|
"learning_rate": 2.7143692870201098e-05, |
|
"loss": 0.7545, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.4826325411334552, |
|
"grad_norm": 1.267622470855713, |
|
"learning_rate": 2.7121755027422304e-05, |
|
"loss": 0.765, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.48628884826325414, |
|
"grad_norm": 11.8803071975708, |
|
"learning_rate": 2.709981718464351e-05, |
|
"loss": 0.9996, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.489945155393053, |
|
"grad_norm": 9.95639705657959, |
|
"learning_rate": 2.7077879341864716e-05, |
|
"loss": 1.0536, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.4936014625228519, |
|
"grad_norm": 11.731663703918457, |
|
"learning_rate": 2.7055941499085926e-05, |
|
"loss": 0.8901, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.49725776965265084, |
|
"grad_norm": 7.863046646118164, |
|
"learning_rate": 2.7034003656307132e-05, |
|
"loss": 0.6168, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5009140767824497, |
|
"grad_norm": 7.594435214996338, |
|
"learning_rate": 2.7012065813528338e-05, |
|
"loss": 1.1098, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5045703839122486, |
|
"grad_norm": 5.769408702850342, |
|
"learning_rate": 2.6990127970749544e-05, |
|
"loss": 0.6672, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.5082266910420475, |
|
"grad_norm": 7.641537666320801, |
|
"learning_rate": 2.696819012797075e-05, |
|
"loss": 0.9141, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.5118829981718465, |
|
"grad_norm": 8.880860328674316, |
|
"learning_rate": 2.6946252285191957e-05, |
|
"loss": 0.7542, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5155393053016454, |
|
"grad_norm": 3.2335469722747803, |
|
"learning_rate": 2.6924314442413163e-05, |
|
"loss": 0.7966, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.5191956124314442, |
|
"grad_norm": 3.989349365234375, |
|
"learning_rate": 2.690237659963437e-05, |
|
"loss": 0.7838, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.5228519195612431, |
|
"grad_norm": 12.424365043640137, |
|
"learning_rate": 2.6880438756855575e-05, |
|
"loss": 0.9574, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.526508226691042, |
|
"grad_norm": 6.308820724487305, |
|
"learning_rate": 2.6858500914076785e-05, |
|
"loss": 0.6676, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.5301645338208409, |
|
"grad_norm": 6.80699348449707, |
|
"learning_rate": 2.683656307129799e-05, |
|
"loss": 0.6364, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.5338208409506399, |
|
"grad_norm": 7.654812335968018, |
|
"learning_rate": 2.6814625228519197e-05, |
|
"loss": 0.8394, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.5374771480804388, |
|
"grad_norm": 3.173919677734375, |
|
"learning_rate": 2.6792687385740403e-05, |
|
"loss": 0.4993, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.5411334552102377, |
|
"grad_norm": 11.510188102722168, |
|
"learning_rate": 2.677074954296161e-05, |
|
"loss": 1.0199, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.5447897623400365, |
|
"grad_norm": 9.919046401977539, |
|
"learning_rate": 2.674881170018282e-05, |
|
"loss": 0.8567, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.5484460694698354, |
|
"grad_norm": 10.544548034667969, |
|
"learning_rate": 2.672687385740402e-05, |
|
"loss": 0.8208, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5521023765996343, |
|
"grad_norm": 10.39263916015625, |
|
"learning_rate": 2.6704936014625228e-05, |
|
"loss": 1.0027, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.5557586837294333, |
|
"grad_norm": 7.957463264465332, |
|
"learning_rate": 2.6682998171846434e-05, |
|
"loss": 0.5865, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.5594149908592322, |
|
"grad_norm": 6.65998649597168, |
|
"learning_rate": 2.6661060329067643e-05, |
|
"loss": 1.1056, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.5630712979890311, |
|
"grad_norm": 4.286714553833008, |
|
"learning_rate": 2.663912248628885e-05, |
|
"loss": 0.923, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.56672760511883, |
|
"grad_norm": 12.143743515014648, |
|
"learning_rate": 2.6617184643510055e-05, |
|
"loss": 0.8542, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.5703839122486288, |
|
"grad_norm": 7.362223148345947, |
|
"learning_rate": 2.659524680073126e-05, |
|
"loss": 0.9177, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.5740402193784278, |
|
"grad_norm": 8.774934768676758, |
|
"learning_rate": 2.657330895795247e-05, |
|
"loss": 0.7503, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.5776965265082267, |
|
"grad_norm": 7.924509048461914, |
|
"learning_rate": 2.6551371115173677e-05, |
|
"loss": 0.8291, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.5813528336380256, |
|
"grad_norm": 4.72158145904541, |
|
"learning_rate": 2.6529433272394883e-05, |
|
"loss": 0.6627, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.5850091407678245, |
|
"grad_norm": 4.265242576599121, |
|
"learning_rate": 2.6507495429616086e-05, |
|
"loss": 0.618, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.5886654478976234, |
|
"grad_norm": 7.109083652496338, |
|
"learning_rate": 2.6485557586837292e-05, |
|
"loss": 0.7683, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.5923217550274223, |
|
"grad_norm": 8.804269790649414, |
|
"learning_rate": 2.6463619744058502e-05, |
|
"loss": 0.6226, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.5959780621572212, |
|
"grad_norm": 5.748142242431641, |
|
"learning_rate": 2.6441681901279708e-05, |
|
"loss": 0.6175, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.5996343692870201, |
|
"grad_norm": 10.173929214477539, |
|
"learning_rate": 2.6419744058500914e-05, |
|
"loss": 0.6959, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.603290676416819, |
|
"grad_norm": 6.71423864364624, |
|
"learning_rate": 2.639780621572212e-05, |
|
"loss": 0.6785, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.6069469835466179, |
|
"grad_norm": 11.05833625793457, |
|
"learning_rate": 2.637586837294333e-05, |
|
"loss": 0.6683, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.6106032906764168, |
|
"grad_norm": 8.08876895904541, |
|
"learning_rate": 2.6353930530164536e-05, |
|
"loss": 0.8345, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.6142595978062158, |
|
"grad_norm": 8.007697105407715, |
|
"learning_rate": 2.6331992687385742e-05, |
|
"loss": 0.9306, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.6179159049360147, |
|
"grad_norm": 8.34351921081543, |
|
"learning_rate": 2.6310054844606948e-05, |
|
"loss": 0.9681, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.6215722120658135, |
|
"grad_norm": 9.194400787353516, |
|
"learning_rate": 2.6288117001828154e-05, |
|
"loss": 0.9323, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6252285191956124, |
|
"grad_norm": 3.603123903274536, |
|
"learning_rate": 2.626617915904936e-05, |
|
"loss": 0.67, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.6288848263254113, |
|
"grad_norm": 6.769972801208496, |
|
"learning_rate": 2.6244241316270567e-05, |
|
"loss": 0.6847, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.6325411334552102, |
|
"grad_norm": 6.123934745788574, |
|
"learning_rate": 2.6222303473491773e-05, |
|
"loss": 0.5735, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.6361974405850092, |
|
"grad_norm": 8.356404304504395, |
|
"learning_rate": 2.620036563071298e-05, |
|
"loss": 0.7249, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.6398537477148081, |
|
"grad_norm": 5.085474014282227, |
|
"learning_rate": 2.617842778793419e-05, |
|
"loss": 0.8015, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.643510054844607, |
|
"grad_norm": 6.887426376342773, |
|
"learning_rate": 2.6156489945155395e-05, |
|
"loss": 0.6637, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.6471663619744058, |
|
"grad_norm": 7.155372619628906, |
|
"learning_rate": 2.61345521023766e-05, |
|
"loss": 0.6163, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.6508226691042047, |
|
"grad_norm": 10.486412048339844, |
|
"learning_rate": 2.6112614259597807e-05, |
|
"loss": 0.7365, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.6544789762340036, |
|
"grad_norm": 8.337804794311523, |
|
"learning_rate": 2.6090676416819016e-05, |
|
"loss": 0.6944, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.6581352833638026, |
|
"grad_norm": 8.610974311828613, |
|
"learning_rate": 2.606873857404022e-05, |
|
"loss": 0.6498, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6617915904936015, |
|
"grad_norm": 9.723325729370117, |
|
"learning_rate": 2.6046800731261425e-05, |
|
"loss": 0.6993, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.6654478976234004, |
|
"grad_norm": 9.187579154968262, |
|
"learning_rate": 2.602486288848263e-05, |
|
"loss": 0.8795, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.6691042047531993, |
|
"grad_norm": 9.775445938110352, |
|
"learning_rate": 2.600292504570384e-05, |
|
"loss": 0.8081, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.6727605118829981, |
|
"grad_norm": 10.012187004089355, |
|
"learning_rate": 2.5980987202925047e-05, |
|
"loss": 0.7079, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.676416819012797, |
|
"grad_norm": 10.074971199035645, |
|
"learning_rate": 2.5959049360146253e-05, |
|
"loss": 0.6554, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.680073126142596, |
|
"grad_norm": 11.149927139282227, |
|
"learning_rate": 2.593711151736746e-05, |
|
"loss": 0.8357, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.6837294332723949, |
|
"grad_norm": 5.098260879516602, |
|
"learning_rate": 2.5915173674588666e-05, |
|
"loss": 0.7488, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.6873857404021938, |
|
"grad_norm": 8.32321834564209, |
|
"learning_rate": 2.5893235831809875e-05, |
|
"loss": 0.7639, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.6910420475319927, |
|
"grad_norm": 8.753900527954102, |
|
"learning_rate": 2.587129798903108e-05, |
|
"loss": 0.8777, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.6946983546617916, |
|
"grad_norm": 5.129249095916748, |
|
"learning_rate": 2.5849360146252284e-05, |
|
"loss": 0.7593, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.6983546617915904, |
|
"grad_norm": 10.712813377380371, |
|
"learning_rate": 2.582742230347349e-05, |
|
"loss": 0.6266, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.7020109689213894, |
|
"grad_norm": 4.966675758361816, |
|
"learning_rate": 2.58054844606947e-05, |
|
"loss": 0.7851, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.7056672760511883, |
|
"grad_norm": 4.763036727905273, |
|
"learning_rate": 2.5783546617915906e-05, |
|
"loss": 0.9193, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.7093235831809872, |
|
"grad_norm": 10.881400108337402, |
|
"learning_rate": 2.5761608775137112e-05, |
|
"loss": 0.7159, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.7129798903107861, |
|
"grad_norm": 8.307093620300293, |
|
"learning_rate": 2.5739670932358318e-05, |
|
"loss": 0.7091, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.716636197440585, |
|
"grad_norm": 8.85936450958252, |
|
"learning_rate": 2.5717733089579524e-05, |
|
"loss": 0.6216, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.720292504570384, |
|
"grad_norm": 8.200945854187012, |
|
"learning_rate": 2.5695795246800734e-05, |
|
"loss": 0.7674, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.7239488117001828, |
|
"grad_norm": 6.665803909301758, |
|
"learning_rate": 2.567385740402194e-05, |
|
"loss": 0.5824, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.7276051188299817, |
|
"grad_norm": 13.1766357421875, |
|
"learning_rate": 2.5651919561243146e-05, |
|
"loss": 0.8602, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.7312614259597806, |
|
"grad_norm": 12.900677680969238, |
|
"learning_rate": 2.562998171846435e-05, |
|
"loss": 0.7945, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7349177330895795, |
|
"grad_norm": 8.223727226257324, |
|
"learning_rate": 2.5608043875685558e-05, |
|
"loss": 0.7058, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.7385740402193784, |
|
"grad_norm": 5.132645606994629, |
|
"learning_rate": 2.5586106032906764e-05, |
|
"loss": 0.6005, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.7422303473491774, |
|
"grad_norm": 5.319431304931641, |
|
"learning_rate": 2.556416819012797e-05, |
|
"loss": 0.6141, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.7458866544789763, |
|
"grad_norm": 4.22127628326416, |
|
"learning_rate": 2.5542230347349177e-05, |
|
"loss": 0.7697, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.7495429616087751, |
|
"grad_norm": 7.919135093688965, |
|
"learning_rate": 2.5520292504570386e-05, |
|
"loss": 0.6771, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.753199268738574, |
|
"grad_norm": 8.82950496673584, |
|
"learning_rate": 2.5498354661791592e-05, |
|
"loss": 0.7459, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.7568555758683729, |
|
"grad_norm": 6.079866886138916, |
|
"learning_rate": 2.54764168190128e-05, |
|
"loss": 0.78, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.7605118829981719, |
|
"grad_norm": 9.02277660369873, |
|
"learning_rate": 2.5454478976234005e-05, |
|
"loss": 0.6527, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.7641681901279708, |
|
"grad_norm": 7.963276386260986, |
|
"learning_rate": 2.543254113345521e-05, |
|
"loss": 0.9617, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.7678244972577697, |
|
"grad_norm": 15.237689971923828, |
|
"learning_rate": 2.5410603290676417e-05, |
|
"loss": 0.9292, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7714808043875686, |
|
"grad_norm": 8.40709114074707, |
|
"learning_rate": 2.5388665447897623e-05, |
|
"loss": 1.1213, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.7751371115173674, |
|
"grad_norm": 15.25880241394043, |
|
"learning_rate": 2.536672760511883e-05, |
|
"loss": 0.9129, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.7787934186471663, |
|
"grad_norm": 9.398399353027344, |
|
"learning_rate": 2.5344789762340035e-05, |
|
"loss": 1.0284, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.7824497257769653, |
|
"grad_norm": 9.999375343322754, |
|
"learning_rate": 2.5322851919561245e-05, |
|
"loss": 0.9143, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.7861060329067642, |
|
"grad_norm": 6.247265815734863, |
|
"learning_rate": 2.530091407678245e-05, |
|
"loss": 0.5627, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.7897623400365631, |
|
"grad_norm": 11.39775276184082, |
|
"learning_rate": 2.5278976234003657e-05, |
|
"loss": 0.5297, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.793418647166362, |
|
"grad_norm": 7.309044361114502, |
|
"learning_rate": 2.5257038391224863e-05, |
|
"loss": 0.7952, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.7970749542961609, |
|
"grad_norm": 4.260741710662842, |
|
"learning_rate": 2.5235100548446073e-05, |
|
"loss": 0.6773, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.8007312614259597, |
|
"grad_norm": 6.936405658721924, |
|
"learning_rate": 2.521316270566728e-05, |
|
"loss": 0.8001, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.8043875685557587, |
|
"grad_norm": 6.857205390930176, |
|
"learning_rate": 2.5191224862888482e-05, |
|
"loss": 0.6795, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8080438756855576, |
|
"grad_norm": 14.970353126525879, |
|
"learning_rate": 2.5169287020109688e-05, |
|
"loss": 1.1865, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.8117001828153565, |
|
"grad_norm": 16.46977424621582, |
|
"learning_rate": 2.5147349177330894e-05, |
|
"loss": 1.2037, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.8153564899451554, |
|
"grad_norm": 4.785205841064453, |
|
"learning_rate": 2.5125411334552104e-05, |
|
"loss": 0.8542, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.8190127970749543, |
|
"grad_norm": 8.814366340637207, |
|
"learning_rate": 2.510347349177331e-05, |
|
"loss": 1.0594, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.8226691042047533, |
|
"grad_norm": 6.870213031768799, |
|
"learning_rate": 2.5081535648994516e-05, |
|
"loss": 0.8027, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.8263254113345521, |
|
"grad_norm": 3.1548120975494385, |
|
"learning_rate": 2.5059597806215722e-05, |
|
"loss": 0.5979, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.829981718464351, |
|
"grad_norm": 7.584613800048828, |
|
"learning_rate": 2.503765996343693e-05, |
|
"loss": 0.6417, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.8336380255941499, |
|
"grad_norm": 10.385662078857422, |
|
"learning_rate": 2.5015722120658138e-05, |
|
"loss": 0.9269, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.8372943327239488, |
|
"grad_norm": 4.356326103210449, |
|
"learning_rate": 2.4993784277879344e-05, |
|
"loss": 0.7558, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.8409506398537477, |
|
"grad_norm": 12.305597305297852, |
|
"learning_rate": 2.4971846435100547e-05, |
|
"loss": 0.9336, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8446069469835467, |
|
"grad_norm": 12.440481185913086, |
|
"learning_rate": 2.4949908592321753e-05, |
|
"loss": 0.7337, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.8482632541133455, |
|
"grad_norm": 14.280756950378418, |
|
"learning_rate": 2.4927970749542962e-05, |
|
"loss": 0.7303, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.8519195612431444, |
|
"grad_norm": 3.728710412979126, |
|
"learning_rate": 2.490603290676417e-05, |
|
"loss": 0.8716, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.8555758683729433, |
|
"grad_norm": 7.865159034729004, |
|
"learning_rate": 2.4884095063985374e-05, |
|
"loss": 1.0542, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.8592321755027422, |
|
"grad_norm": 8.721333503723145, |
|
"learning_rate": 2.486215722120658e-05, |
|
"loss": 1.0551, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.8628884826325411, |
|
"grad_norm": 1.7179598808288574, |
|
"learning_rate": 2.484021937842779e-05, |
|
"loss": 0.7777, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.8665447897623401, |
|
"grad_norm": 5.079452037811279, |
|
"learning_rate": 2.4818281535648996e-05, |
|
"loss": 0.7584, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.870201096892139, |
|
"grad_norm": 2.566901683807373, |
|
"learning_rate": 2.4796343692870202e-05, |
|
"loss": 0.9418, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.8738574040219378, |
|
"grad_norm": 0.8049097061157227, |
|
"learning_rate": 2.477440585009141e-05, |
|
"loss": 1.0866, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.8775137111517367, |
|
"grad_norm": 12.45459270477295, |
|
"learning_rate": 2.4752468007312615e-05, |
|
"loss": 0.9516, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8811700182815356, |
|
"grad_norm": 10.37132453918457, |
|
"learning_rate": 2.473053016453382e-05, |
|
"loss": 0.885, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.8848263254113345, |
|
"grad_norm": 11.392967224121094, |
|
"learning_rate": 2.4708592321755027e-05, |
|
"loss": 0.8999, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.8884826325411335, |
|
"grad_norm": 9.597825050354004, |
|
"learning_rate": 2.4686654478976233e-05, |
|
"loss": 0.7255, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.8921389396709324, |
|
"grad_norm": 6.229734897613525, |
|
"learning_rate": 2.466471663619744e-05, |
|
"loss": 0.6297, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.8957952468007313, |
|
"grad_norm": 6.92341423034668, |
|
"learning_rate": 2.464277879341865e-05, |
|
"loss": 0.8446, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.8994515539305301, |
|
"grad_norm": 6.999603748321533, |
|
"learning_rate": 2.4620840950639855e-05, |
|
"loss": 0.9243, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.903107861060329, |
|
"grad_norm": 6.688783645629883, |
|
"learning_rate": 2.459890310786106e-05, |
|
"loss": 0.9761, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.906764168190128, |
|
"grad_norm": 4.743894577026367, |
|
"learning_rate": 2.4576965265082267e-05, |
|
"loss": 0.8031, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.9104204753199269, |
|
"grad_norm": 5.617483139038086, |
|
"learning_rate": 2.4555027422303477e-05, |
|
"loss": 0.6959, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.9140767824497258, |
|
"grad_norm": 8.579802513122559, |
|
"learning_rate": 2.4533089579524683e-05, |
|
"loss": 0.9484, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.9177330895795247, |
|
"grad_norm": 13.061285018920898, |
|
"learning_rate": 2.4511151736745886e-05, |
|
"loss": 1.2487, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.9213893967093236, |
|
"grad_norm": 9.990108489990234, |
|
"learning_rate": 2.4489213893967092e-05, |
|
"loss": 0.8655, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.9250457038391224, |
|
"grad_norm": 7.661534309387207, |
|
"learning_rate": 2.44672760511883e-05, |
|
"loss": 0.8729, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.9287020109689214, |
|
"grad_norm": 3.4216301441192627, |
|
"learning_rate": 2.4445338208409507e-05, |
|
"loss": 0.6208, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.9323583180987203, |
|
"grad_norm": 6.860260963439941, |
|
"learning_rate": 2.4423400365630714e-05, |
|
"loss": 0.8488, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.9360146252285192, |
|
"grad_norm": 8.332857131958008, |
|
"learning_rate": 2.440146252285192e-05, |
|
"loss": 0.5859, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.9396709323583181, |
|
"grad_norm": 6.4805402755737305, |
|
"learning_rate": 2.4379524680073126e-05, |
|
"loss": 0.7436, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.943327239488117, |
|
"grad_norm": 5.344940662384033, |
|
"learning_rate": 2.4357586837294335e-05, |
|
"loss": 1.0088, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.946983546617916, |
|
"grad_norm": 9.946269035339355, |
|
"learning_rate": 2.433564899451554e-05, |
|
"loss": 0.7087, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.9506398537477148, |
|
"grad_norm": 4.209563255310059, |
|
"learning_rate": 2.4313711151736748e-05, |
|
"loss": 0.5656, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9542961608775137, |
|
"grad_norm": 4.404534816741943, |
|
"learning_rate": 2.429177330895795e-05, |
|
"loss": 0.6234, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.9579524680073126, |
|
"grad_norm": 4.724971294403076, |
|
"learning_rate": 2.426983546617916e-05, |
|
"loss": 0.7714, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.9616087751371115, |
|
"grad_norm": 6.836884498596191, |
|
"learning_rate": 2.4247897623400366e-05, |
|
"loss": 0.8142, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.9652650822669104, |
|
"grad_norm": 3.4139904975891113, |
|
"learning_rate": 2.4225959780621572e-05, |
|
"loss": 0.5885, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.9689213893967094, |
|
"grad_norm": 13.546429634094238, |
|
"learning_rate": 2.420402193784278e-05, |
|
"loss": 0.8844, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.9725776965265083, |
|
"grad_norm": 5.117456436157227, |
|
"learning_rate": 2.4182084095063988e-05, |
|
"loss": 0.7408, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.9762340036563071, |
|
"grad_norm": 11.973124504089355, |
|
"learning_rate": 2.4160146252285194e-05, |
|
"loss": 0.9015, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.979890310786106, |
|
"grad_norm": 7.9256815910339355, |
|
"learning_rate": 2.41382084095064e-05, |
|
"loss": 0.8179, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.9835466179159049, |
|
"grad_norm": 0.613832414150238, |
|
"learning_rate": 2.4116270566727606e-05, |
|
"loss": 0.8218, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.9872029250457038, |
|
"grad_norm": 2.720730781555176, |
|
"learning_rate": 2.4094332723948813e-05, |
|
"loss": 0.5718, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9908592321755028, |
|
"grad_norm": 5.895959854125977, |
|
"learning_rate": 2.407239488117002e-05, |
|
"loss": 0.9486, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.9945155393053017, |
|
"grad_norm": 6.581000804901123, |
|
"learning_rate": 2.4050457038391225e-05, |
|
"loss": 0.7959, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.9981718464351006, |
|
"grad_norm": 7.979818344116211, |
|
"learning_rate": 2.402851919561243e-05, |
|
"loss": 0.5748, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.0018281535648994, |
|
"grad_norm": 5.917481422424316, |
|
"learning_rate": 2.4006581352833637e-05, |
|
"loss": 1.0186, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.0054844606946984, |
|
"grad_norm": 1.8859217166900635, |
|
"learning_rate": 2.3984643510054847e-05, |
|
"loss": 0.8374, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.0091407678244972, |
|
"grad_norm": 10.354247093200684, |
|
"learning_rate": 2.3962705667276053e-05, |
|
"loss": 1.0679, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.012797074954296, |
|
"grad_norm": 6.047128677368164, |
|
"learning_rate": 2.394076782449726e-05, |
|
"loss": 1.0716, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.016453382084095, |
|
"grad_norm": 11.777497291564941, |
|
"learning_rate": 2.3918829981718465e-05, |
|
"loss": 0.8161, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.0201096892138939, |
|
"grad_norm": 3.427635908126831, |
|
"learning_rate": 2.389689213893967e-05, |
|
"loss": 0.6662, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.023765996343693, |
|
"grad_norm": 14.091401100158691, |
|
"learning_rate": 2.387495429616088e-05, |
|
"loss": 0.7342, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0274223034734917, |
|
"grad_norm": 6.376955032348633, |
|
"learning_rate": 2.3853016453382083e-05, |
|
"loss": 0.6482, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.0310786106032908, |
|
"grad_norm": 3.5191450119018555, |
|
"learning_rate": 2.383107861060329e-05, |
|
"loss": 0.9219, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.0347349177330896, |
|
"grad_norm": 4.531268119812012, |
|
"learning_rate": 2.3809140767824496e-05, |
|
"loss": 1.2418, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.0383912248628886, |
|
"grad_norm": 6.366710186004639, |
|
"learning_rate": 2.3787202925045705e-05, |
|
"loss": 0.6533, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.0420475319926874, |
|
"grad_norm": 2.5387659072875977, |
|
"learning_rate": 2.376526508226691e-05, |
|
"loss": 1.0503, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.0457038391224862, |
|
"grad_norm": 3.4339308738708496, |
|
"learning_rate": 2.3743327239488118e-05, |
|
"loss": 1.5317, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.0493601462522852, |
|
"grad_norm": 20.403852462768555, |
|
"learning_rate": 2.3721389396709324e-05, |
|
"loss": 1.1739, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.053016453382084, |
|
"grad_norm": 9.94764232635498, |
|
"learning_rate": 2.3699451553930533e-05, |
|
"loss": 0.9365, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.056672760511883, |
|
"grad_norm": 4.770013332366943, |
|
"learning_rate": 2.367751371115174e-05, |
|
"loss": 0.4251, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.0603290676416819, |
|
"grad_norm": 1.9703326225280762, |
|
"learning_rate": 2.3655575868372945e-05, |
|
"loss": 0.6515, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0639853747714807, |
|
"grad_norm": 9.562021255493164, |
|
"learning_rate": 2.3633638025594148e-05, |
|
"loss": 0.7161, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.0676416819012797, |
|
"grad_norm": 10.26481819152832, |
|
"learning_rate": 2.3611700182815354e-05, |
|
"loss": 0.7187, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.0712979890310785, |
|
"grad_norm": 3.004570722579956, |
|
"learning_rate": 2.3589762340036564e-05, |
|
"loss": 1.0414, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.0749542961608776, |
|
"grad_norm": 9.800512313842773, |
|
"learning_rate": 2.356782449725777e-05, |
|
"loss": 0.7966, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.0786106032906764, |
|
"grad_norm": 13.301290512084961, |
|
"learning_rate": 2.3545886654478976e-05, |
|
"loss": 0.9953, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.0822669104204754, |
|
"grad_norm": 2.7511966228485107, |
|
"learning_rate": 2.3523948811700182e-05, |
|
"loss": 0.705, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.0859232175502742, |
|
"grad_norm": 5.51497220993042, |
|
"learning_rate": 2.3502010968921392e-05, |
|
"loss": 0.8642, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.0895795246800732, |
|
"grad_norm": 6.78330659866333, |
|
"learning_rate": 2.3480073126142598e-05, |
|
"loss": 0.7787, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.093235831809872, |
|
"grad_norm": 4.385842323303223, |
|
"learning_rate": 2.3458135283363804e-05, |
|
"loss": 1.0877, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.0968921389396709, |
|
"grad_norm": 6.217209815979004, |
|
"learning_rate": 2.343619744058501e-05, |
|
"loss": 0.8384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.1005484460694699, |
|
"grad_norm": 13.187909126281738, |
|
"learning_rate": 2.3414259597806216e-05, |
|
"loss": 0.9411, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.1042047531992687, |
|
"grad_norm": 0.5087007880210876, |
|
"learning_rate": 2.3392321755027423e-05, |
|
"loss": 0.9831, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.1078610603290677, |
|
"grad_norm": 1.0318357944488525, |
|
"learning_rate": 2.337038391224863e-05, |
|
"loss": 0.6656, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.1115173674588665, |
|
"grad_norm": 7.319566249847412, |
|
"learning_rate": 2.3348446069469835e-05, |
|
"loss": 0.8284, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.1151736745886653, |
|
"grad_norm": 3.79536771774292, |
|
"learning_rate": 2.332650822669104e-05, |
|
"loss": 0.9016, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.1188299817184644, |
|
"grad_norm": 8.989640235900879, |
|
"learning_rate": 2.330457038391225e-05, |
|
"loss": 0.7304, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.1224862888482632, |
|
"grad_norm": 5.405416965484619, |
|
"learning_rate": 2.3282632541133457e-05, |
|
"loss": 1.0426, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.1261425959780622, |
|
"grad_norm": 2.653970241546631, |
|
"learning_rate": 2.3260694698354663e-05, |
|
"loss": 0.9311, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.129798903107861, |
|
"grad_norm": 0.901639997959137, |
|
"learning_rate": 2.323875685557587e-05, |
|
"loss": 0.9487, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.13345521023766, |
|
"grad_norm": 4.24121618270874, |
|
"learning_rate": 2.321681901279708e-05, |
|
"loss": 0.7323, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1371115173674589, |
|
"grad_norm": 7.49923849105835, |
|
"learning_rate": 2.319488117001828e-05, |
|
"loss": 0.6855, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.1407678244972579, |
|
"grad_norm": 2.1442363262176514, |
|
"learning_rate": 2.3172943327239487e-05, |
|
"loss": 0.8922, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.1444241316270567, |
|
"grad_norm": 7.328529357910156, |
|
"learning_rate": 2.3151005484460694e-05, |
|
"loss": 0.8567, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.1480804387568555, |
|
"grad_norm": 2.2346909046173096, |
|
"learning_rate": 2.31290676416819e-05, |
|
"loss": 0.9295, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.1517367458866545, |
|
"grad_norm": 1.7337790727615356, |
|
"learning_rate": 2.310712979890311e-05, |
|
"loss": 0.9011, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.1553930530164533, |
|
"grad_norm": 9.902853012084961, |
|
"learning_rate": 2.3085191956124315e-05, |
|
"loss": 1.1711, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.1590493601462524, |
|
"grad_norm": 4.4967217445373535, |
|
"learning_rate": 2.306325411334552e-05, |
|
"loss": 0.8189, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.1627056672760512, |
|
"grad_norm": 9.251031875610352, |
|
"learning_rate": 2.3041316270566728e-05, |
|
"loss": 1.0505, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.16636197440585, |
|
"grad_norm": 9.177526473999023, |
|
"learning_rate": 2.3019378427787937e-05, |
|
"loss": 0.9835, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.170018281535649, |
|
"grad_norm": 2.573434352874756, |
|
"learning_rate": 2.2997440585009143e-05, |
|
"loss": 0.7751, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.1736745886654478, |
|
"grad_norm": 8.38436508178711, |
|
"learning_rate": 2.2975502742230346e-05, |
|
"loss": 0.9563, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.1773308957952469, |
|
"grad_norm": 10.322296142578125, |
|
"learning_rate": 2.2953564899451552e-05, |
|
"loss": 1.0326, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.1809872029250457, |
|
"grad_norm": 1.9485523700714111, |
|
"learning_rate": 2.2931627056672762e-05, |
|
"loss": 1.037, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.1846435100548447, |
|
"grad_norm": 4.380084991455078, |
|
"learning_rate": 2.2909689213893968e-05, |
|
"loss": 0.56, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.1882998171846435, |
|
"grad_norm": 6.871321201324463, |
|
"learning_rate": 2.2887751371115174e-05, |
|
"loss": 0.7471, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.1919561243144425, |
|
"grad_norm": 9.694079399108887, |
|
"learning_rate": 2.286581352833638e-05, |
|
"loss": 0.9119, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.1956124314442413, |
|
"grad_norm": 5.262477874755859, |
|
"learning_rate": 2.2843875685557586e-05, |
|
"loss": 0.6997, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.1992687385740401, |
|
"grad_norm": 4.27209997177124, |
|
"learning_rate": 2.2821937842778796e-05, |
|
"loss": 0.5484, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.2029250457038392, |
|
"grad_norm": 7.245287895202637, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 0.5919, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.206581352833638, |
|
"grad_norm": 4.369983196258545, |
|
"learning_rate": 2.2778062157221208e-05, |
|
"loss": 1.1287, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.210237659963437, |
|
"grad_norm": 1.8020730018615723, |
|
"learning_rate": 2.275612431444241e-05, |
|
"loss": 0.6352, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.2138939670932358, |
|
"grad_norm": 4.279252529144287, |
|
"learning_rate": 2.273418647166362e-05, |
|
"loss": 0.8017, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.2175502742230346, |
|
"grad_norm": 4.222424030303955, |
|
"learning_rate": 2.2712248628884826e-05, |
|
"loss": 0.7692, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.2212065813528337, |
|
"grad_norm": 3.430072069168091, |
|
"learning_rate": 2.2690310786106033e-05, |
|
"loss": 0.7481, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.2248628884826325, |
|
"grad_norm": 5.211468696594238, |
|
"learning_rate": 2.266837294332724e-05, |
|
"loss": 0.8037, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.2285191956124315, |
|
"grad_norm": 9.226336479187012, |
|
"learning_rate": 2.264643510054845e-05, |
|
"loss": 0.9476, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.2321755027422303, |
|
"grad_norm": 4.394392967224121, |
|
"learning_rate": 2.2624497257769654e-05, |
|
"loss": 0.6557, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.2358318098720293, |
|
"grad_norm": 4.641608238220215, |
|
"learning_rate": 2.260255941499086e-05, |
|
"loss": 0.6668, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.2394881170018281, |
|
"grad_norm": 8.342939376831055, |
|
"learning_rate": 2.2580621572212067e-05, |
|
"loss": 0.7831, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.2431444241316272, |
|
"grad_norm": 0.8947893381118774, |
|
"learning_rate": 2.2558683729433273e-05, |
|
"loss": 0.5656, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.246800731261426, |
|
"grad_norm": 6.079960346221924, |
|
"learning_rate": 2.253674588665448e-05, |
|
"loss": 0.9966, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.2504570383912248, |
|
"grad_norm": 9.329411506652832, |
|
"learning_rate": 2.2514808043875685e-05, |
|
"loss": 0.999, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.2541133455210238, |
|
"grad_norm": 5.371129512786865, |
|
"learning_rate": 2.249287020109689e-05, |
|
"loss": 0.5545, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.2577696526508226, |
|
"grad_norm": 5.013857364654541, |
|
"learning_rate": 2.2470932358318097e-05, |
|
"loss": 1.1535, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.2614259597806217, |
|
"grad_norm": 6.94247579574585, |
|
"learning_rate": 2.2448994515539307e-05, |
|
"loss": 1.0131, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.2650822669104205, |
|
"grad_norm": 1.685486078262329, |
|
"learning_rate": 2.2427056672760513e-05, |
|
"loss": 0.8378, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.2687385740402193, |
|
"grad_norm": 4.796342372894287, |
|
"learning_rate": 2.240511882998172e-05, |
|
"loss": 0.6338, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.2723948811700183, |
|
"grad_norm": 5.746938705444336, |
|
"learning_rate": 2.2383180987202925e-05, |
|
"loss": 0.8043, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.2760511882998171, |
|
"grad_norm": 5.947088718414307, |
|
"learning_rate": 2.236124314442413e-05, |
|
"loss": 0.5994, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.2797074954296161, |
|
"grad_norm": 1.3671913146972656, |
|
"learning_rate": 2.233930530164534e-05, |
|
"loss": 0.9907, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.283363802559415, |
|
"grad_norm": 1.2178643941879272, |
|
"learning_rate": 2.2317367458866544e-05, |
|
"loss": 0.6638, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.2870201096892138, |
|
"grad_norm": 8.354637145996094, |
|
"learning_rate": 2.229542961608775e-05, |
|
"loss": 1.1229, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.2906764168190128, |
|
"grad_norm": 3.584672451019287, |
|
"learning_rate": 2.2273491773308956e-05, |
|
"loss": 0.7815, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.2943327239488118, |
|
"grad_norm": 2.3532357215881348, |
|
"learning_rate": 2.2251553930530166e-05, |
|
"loss": 0.8076, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.2979890310786106, |
|
"grad_norm": 3.357630729675293, |
|
"learning_rate": 2.2229616087751372e-05, |
|
"loss": 1.139, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.3016453382084094, |
|
"grad_norm": 7.9423346519470215, |
|
"learning_rate": 2.2207678244972578e-05, |
|
"loss": 1.1081, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.3053016453382085, |
|
"grad_norm": 10.97163200378418, |
|
"learning_rate": 2.2185740402193784e-05, |
|
"loss": 0.6949, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.3089579524680073, |
|
"grad_norm": 3.48557448387146, |
|
"learning_rate": 2.2163802559414994e-05, |
|
"loss": 0.7585, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.3126142595978063, |
|
"grad_norm": 7.3759565353393555, |
|
"learning_rate": 2.21418647166362e-05, |
|
"loss": 1.062, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.3162705667276051, |
|
"grad_norm": 1.880183458328247, |
|
"learning_rate": 2.2119926873857406e-05, |
|
"loss": 0.6005, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.319926873857404, |
|
"grad_norm": 2.7931017875671387, |
|
"learning_rate": 2.2097989031078612e-05, |
|
"loss": 0.5548, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.323583180987203, |
|
"grad_norm": 10.527241706848145, |
|
"learning_rate": 2.2076051188299815e-05, |
|
"loss": 0.549, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.3272394881170018, |
|
"grad_norm": 5.158708095550537, |
|
"learning_rate": 2.2054113345521024e-05, |
|
"loss": 1.5125, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.3308957952468008, |
|
"grad_norm": 2.298628091812134, |
|
"learning_rate": 2.203217550274223e-05, |
|
"loss": 0.854, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.3345521023765996, |
|
"grad_norm": 10.309005737304688, |
|
"learning_rate": 2.2010237659963437e-05, |
|
"loss": 1.0643, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.3382084095063984, |
|
"grad_norm": 3.3284668922424316, |
|
"learning_rate": 2.1988299817184643e-05, |
|
"loss": 1.1608, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.3418647166361974, |
|
"grad_norm": 2.4296984672546387, |
|
"learning_rate": 2.1966361974405852e-05, |
|
"loss": 0.8015, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.3455210237659965, |
|
"grad_norm": 10.130197525024414, |
|
"learning_rate": 2.194442413162706e-05, |
|
"loss": 0.7151, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.3491773308957953, |
|
"grad_norm": 9.950860023498535, |
|
"learning_rate": 2.1922486288848265e-05, |
|
"loss": 0.8923, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.352833638025594, |
|
"grad_norm": 9.493358612060547, |
|
"learning_rate": 2.190054844606947e-05, |
|
"loss": 0.9494, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.3564899451553931, |
|
"grad_norm": 5.511286735534668, |
|
"learning_rate": 2.187861060329068e-05, |
|
"loss": 0.6494, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.360146252285192, |
|
"grad_norm": 0.475504994392395, |
|
"learning_rate": 2.1856672760511883e-05, |
|
"loss": 1.2245, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.363802559414991, |
|
"grad_norm": 8.635137557983398, |
|
"learning_rate": 2.183473491773309e-05, |
|
"loss": 0.5039, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.3674588665447898, |
|
"grad_norm": 3.8953351974487305, |
|
"learning_rate": 2.1812797074954295e-05, |
|
"loss": 0.5876, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.3711151736745886, |
|
"grad_norm": 4.21866512298584, |
|
"learning_rate": 2.17908592321755e-05, |
|
"loss": 0.9671, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.3747714808043876, |
|
"grad_norm": 6.784433364868164, |
|
"learning_rate": 2.176892138939671e-05, |
|
"loss": 1.0172, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.3784277879341864, |
|
"grad_norm": 7.940158367156982, |
|
"learning_rate": 2.1746983546617917e-05, |
|
"loss": 0.8767, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.3820840950639854, |
|
"grad_norm": 0.827899694442749, |
|
"learning_rate": 2.1725045703839123e-05, |
|
"loss": 0.924, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.3857404021937842, |
|
"grad_norm": 4.189643383026123, |
|
"learning_rate": 2.170310786106033e-05, |
|
"loss": 1.0065, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.389396709323583, |
|
"grad_norm": 1.9168022871017456, |
|
"learning_rate": 2.168117001828154e-05, |
|
"loss": 1.5282, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.393053016453382, |
|
"grad_norm": 1.0433759689331055, |
|
"learning_rate": 2.1659232175502745e-05, |
|
"loss": 0.8177, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.3967093235831811, |
|
"grad_norm": 7.197315216064453, |
|
"learning_rate": 2.1637294332723948e-05, |
|
"loss": 0.644, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.40036563071298, |
|
"grad_norm": 4.568287372589111, |
|
"learning_rate": 2.1615356489945154e-05, |
|
"loss": 0.5555, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.4040219378427787, |
|
"grad_norm": 9.683319091796875, |
|
"learning_rate": 2.1593418647166363e-05, |
|
"loss": 0.895, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.4076782449725778, |
|
"grad_norm": 7.343099594116211, |
|
"learning_rate": 2.157148080438757e-05, |
|
"loss": 0.7645, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.4113345521023766, |
|
"grad_norm": 8.893482208251953, |
|
"learning_rate": 2.1549542961608776e-05, |
|
"loss": 0.8237, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.4149908592321756, |
|
"grad_norm": 9.558774948120117, |
|
"learning_rate": 2.1527605118829982e-05, |
|
"loss": 1.4352, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.4186471663619744, |
|
"grad_norm": 3.180133819580078, |
|
"learning_rate": 2.1505667276051188e-05, |
|
"loss": 0.9726, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.4223034734917732, |
|
"grad_norm": 4.226669788360596, |
|
"learning_rate": 2.1483729433272397e-05, |
|
"loss": 0.6608, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.4259597806215722, |
|
"grad_norm": 2.0851640701293945, |
|
"learning_rate": 2.1461791590493604e-05, |
|
"loss": 0.8724, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.429616087751371, |
|
"grad_norm": 1.5792533159255981, |
|
"learning_rate": 2.143985374771481e-05, |
|
"loss": 1.1509, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.43327239488117, |
|
"grad_norm": 5.39309024810791, |
|
"learning_rate": 2.1417915904936013e-05, |
|
"loss": 0.733, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.436928702010969, |
|
"grad_norm": 6.3452677726745605, |
|
"learning_rate": 2.1395978062157222e-05, |
|
"loss": 0.7428, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.4405850091407677, |
|
"grad_norm": 4.476494312286377, |
|
"learning_rate": 2.1374040219378428e-05, |
|
"loss": 0.9109, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.4442413162705667, |
|
"grad_norm": 11.283713340759277, |
|
"learning_rate": 2.1352102376599634e-05, |
|
"loss": 1.0231, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.4478976234003658, |
|
"grad_norm": 3.30483341217041, |
|
"learning_rate": 2.133016453382084e-05, |
|
"loss": 1.0793, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.4515539305301646, |
|
"grad_norm": 5.595132827758789, |
|
"learning_rate": 2.1308226691042047e-05, |
|
"loss": 1.5486, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.4552102376599634, |
|
"grad_norm": 3.3429744243621826, |
|
"learning_rate": 2.1286288848263256e-05, |
|
"loss": 1.4396, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.4588665447897624, |
|
"grad_norm": 2.220364570617676, |
|
"learning_rate": 2.1264351005484462e-05, |
|
"loss": 0.8467, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.4625228519195612, |
|
"grad_norm": 1.5086268186569214, |
|
"learning_rate": 2.124241316270567e-05, |
|
"loss": 0.5857, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4661791590493602, |
|
"grad_norm": 7.653486251831055, |
|
"learning_rate": 2.1220475319926875e-05, |
|
"loss": 1.1748, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.469835466179159, |
|
"grad_norm": 7.453839302062988, |
|
"learning_rate": 2.119853747714808e-05, |
|
"loss": 0.6077, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.4734917733089579, |
|
"grad_norm": 5.1094441413879395, |
|
"learning_rate": 2.1176599634369287e-05, |
|
"loss": 0.7599, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.477148080438757, |
|
"grad_norm": 8.580041885375977, |
|
"learning_rate": 2.1154661791590493e-05, |
|
"loss": 0.7057, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.4808043875685557, |
|
"grad_norm": 5.279627799987793, |
|
"learning_rate": 2.11327239488117e-05, |
|
"loss": 0.7115, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.4844606946983547, |
|
"grad_norm": 5.886457920074463, |
|
"learning_rate": 2.111078610603291e-05, |
|
"loss": 0.7922, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.4881170018281535, |
|
"grad_norm": 8.935380935668945, |
|
"learning_rate": 2.1088848263254115e-05, |
|
"loss": 0.8798, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.4917733089579523, |
|
"grad_norm": 4.792860984802246, |
|
"learning_rate": 2.106691042047532e-05, |
|
"loss": 0.7714, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.4954296160877514, |
|
"grad_norm": 5.927025318145752, |
|
"learning_rate": 2.1044972577696527e-05, |
|
"loss": 0.8479, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.4990859232175504, |
|
"grad_norm": 4.06768798828125, |
|
"learning_rate": 2.1023034734917733e-05, |
|
"loss": 1.0168, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.5027422303473492, |
|
"grad_norm": 5.292023181915283, |
|
"learning_rate": 2.1001096892138943e-05, |
|
"loss": 1.1981, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.506398537477148, |
|
"grad_norm": 8.131914138793945, |
|
"learning_rate": 2.0979159049360146e-05, |
|
"loss": 1.1474, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.5100548446069468, |
|
"grad_norm": 9.737383842468262, |
|
"learning_rate": 2.095722120658135e-05, |
|
"loss": 0.8616, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.5137111517367459, |
|
"grad_norm": 2.422138214111328, |
|
"learning_rate": 2.0935283363802558e-05, |
|
"loss": 0.8315, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.517367458866545, |
|
"grad_norm": 1.734221339225769, |
|
"learning_rate": 2.0913345521023767e-05, |
|
"loss": 0.8787, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.5210237659963437, |
|
"grad_norm": 0.9889214038848877, |
|
"learning_rate": 2.0891407678244973e-05, |
|
"loss": 0.7116, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.5246800731261425, |
|
"grad_norm": 4.243373394012451, |
|
"learning_rate": 2.086946983546618e-05, |
|
"loss": 0.8402, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.5283363802559415, |
|
"grad_norm": 3.111729860305786, |
|
"learning_rate": 2.0847531992687386e-05, |
|
"loss": 1.1098, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.5319926873857403, |
|
"grad_norm": 1.9713119268417358, |
|
"learning_rate": 2.0825594149908595e-05, |
|
"loss": 0.7797, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.5356489945155394, |
|
"grad_norm": 5.521538734436035, |
|
"learning_rate": 2.08036563071298e-05, |
|
"loss": 0.6614, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.5393053016453382, |
|
"grad_norm": 2.166930675506592, |
|
"learning_rate": 2.0781718464351008e-05, |
|
"loss": 0.9268, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.542961608775137, |
|
"grad_norm": 1.7511789798736572, |
|
"learning_rate": 2.075978062157221e-05, |
|
"loss": 0.8894, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.546617915904936, |
|
"grad_norm": 8.769426345825195, |
|
"learning_rate": 2.0737842778793416e-05, |
|
"loss": 0.8558, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.550274223034735, |
|
"grad_norm": 5.798864364624023, |
|
"learning_rate": 2.0715904936014626e-05, |
|
"loss": 0.767, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.5539305301645339, |
|
"grad_norm": 5.127215385437012, |
|
"learning_rate": 2.0693967093235832e-05, |
|
"loss": 0.5996, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.5575868372943327, |
|
"grad_norm": 3.0306711196899414, |
|
"learning_rate": 2.0672029250457038e-05, |
|
"loss": 0.4811, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.5612431444241315, |
|
"grad_norm": 1.175572156906128, |
|
"learning_rate": 2.0650091407678244e-05, |
|
"loss": 0.6173, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.5648994515539305, |
|
"grad_norm": 4.409485340118408, |
|
"learning_rate": 2.0628153564899454e-05, |
|
"loss": 0.9317, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.5685557586837295, |
|
"grad_norm": 4.677966594696045, |
|
"learning_rate": 2.060621572212066e-05, |
|
"loss": 1.0926, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.5722120658135283, |
|
"grad_norm": 8.307379722595215, |
|
"learning_rate": 2.0584277879341866e-05, |
|
"loss": 0.9221, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.5758683729433272, |
|
"grad_norm": 2.0957555770874023, |
|
"learning_rate": 2.0562340036563072e-05, |
|
"loss": 1.0005, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.5795246800731262, |
|
"grad_norm": 1.3396669626235962, |
|
"learning_rate": 2.0540402193784275e-05, |
|
"loss": 0.7351, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.583180987202925, |
|
"grad_norm": 5.14472770690918, |
|
"learning_rate": 2.0518464351005485e-05, |
|
"loss": 1.1173, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.586837294332724, |
|
"grad_norm": 2.601489782333374, |
|
"learning_rate": 2.049652650822669e-05, |
|
"loss": 0.6813, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.5904936014625228, |
|
"grad_norm": 4.059136867523193, |
|
"learning_rate": 2.0474588665447897e-05, |
|
"loss": 1.0248, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.5941499085923216, |
|
"grad_norm": 6.217931747436523, |
|
"learning_rate": 2.0452650822669103e-05, |
|
"loss": 0.7066, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.5978062157221207, |
|
"grad_norm": 7.017310619354248, |
|
"learning_rate": 2.0430712979890313e-05, |
|
"loss": 0.6382, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.6014625228519197, |
|
"grad_norm": 6.520296096801758, |
|
"learning_rate": 2.040877513711152e-05, |
|
"loss": 0.4315, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.6051188299817185, |
|
"grad_norm": 6.086079120635986, |
|
"learning_rate": 2.0386837294332725e-05, |
|
"loss": 0.6829, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.6087751371115173, |
|
"grad_norm": 3.4015817642211914, |
|
"learning_rate": 2.036489945155393e-05, |
|
"loss": 0.6142, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.6124314442413161, |
|
"grad_norm": 7.188704013824463, |
|
"learning_rate": 2.034296160877514e-05, |
|
"loss": 0.5532, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.6160877513711152, |
|
"grad_norm": 3.989145517349243, |
|
"learning_rate": 2.0321023765996343e-05, |
|
"loss": 1.0227, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.6197440585009142, |
|
"grad_norm": 5.923662185668945, |
|
"learning_rate": 2.029908592321755e-05, |
|
"loss": 0.6978, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.623400365630713, |
|
"grad_norm": 5.101003170013428, |
|
"learning_rate": 2.0277148080438756e-05, |
|
"loss": 0.9833, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.6270566727605118, |
|
"grad_norm": 9.158041000366211, |
|
"learning_rate": 2.0255210237659962e-05, |
|
"loss": 1.0931, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.6307129798903108, |
|
"grad_norm": 6.297501564025879, |
|
"learning_rate": 2.023327239488117e-05, |
|
"loss": 1.1048, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.6343692870201096, |
|
"grad_norm": 3.9536404609680176, |
|
"learning_rate": 2.0211334552102377e-05, |
|
"loss": 0.7332, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.6380255941499087, |
|
"grad_norm": 4.0736212730407715, |
|
"learning_rate": 2.0189396709323584e-05, |
|
"loss": 0.5685, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.6416819012797075, |
|
"grad_norm": 11.199592590332031, |
|
"learning_rate": 2.016745886654479e-05, |
|
"loss": 0.8059, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.6453382084095063, |
|
"grad_norm": 10.829754829406738, |
|
"learning_rate": 2.0145521023766e-05, |
|
"loss": 1.0358, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.6489945155393053, |
|
"grad_norm": 4.670787811279297, |
|
"learning_rate": 2.0123583180987205e-05, |
|
"loss": 0.8369, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 1.6526508226691043, |
|
"grad_norm": 6.225413799285889, |
|
"learning_rate": 2.0101645338208408e-05, |
|
"loss": 1.2236, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 1.6563071297989032, |
|
"grad_norm": 3.398374557495117, |
|
"learning_rate": 2.0079707495429614e-05, |
|
"loss": 0.5667, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 1.659963436928702, |
|
"grad_norm": 3.375204086303711, |
|
"learning_rate": 2.0057769652650824e-05, |
|
"loss": 0.989, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 1.6636197440585008, |
|
"grad_norm": 4.518038749694824, |
|
"learning_rate": 2.003583180987203e-05, |
|
"loss": 0.7565, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.6672760511882998, |
|
"grad_norm": 3.7947514057159424, |
|
"learning_rate": 2.0013893967093236e-05, |
|
"loss": 0.9918, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 1.6709323583180988, |
|
"grad_norm": 2.7493553161621094, |
|
"learning_rate": 1.9991956124314442e-05, |
|
"loss": 0.4559, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 1.6745886654478976, |
|
"grad_norm": 2.3222575187683105, |
|
"learning_rate": 1.997001828153565e-05, |
|
"loss": 0.6695, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 1.6782449725776964, |
|
"grad_norm": 8.733063697814941, |
|
"learning_rate": 1.9948080438756858e-05, |
|
"loss": 0.6937, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 1.6819012797074955, |
|
"grad_norm": 5.651478290557861, |
|
"learning_rate": 1.9926142595978064e-05, |
|
"loss": 0.4887, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6855575868372943, |
|
"grad_norm": 5.600511074066162, |
|
"learning_rate": 1.990420475319927e-05, |
|
"loss": 0.6819, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 1.6892138939670933, |
|
"grad_norm": 5.3927903175354, |
|
"learning_rate": 1.9882266910420476e-05, |
|
"loss": 0.8285, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 1.6928702010968921, |
|
"grad_norm": 4.391313076019287, |
|
"learning_rate": 1.9860329067641682e-05, |
|
"loss": 0.7116, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 1.696526508226691, |
|
"grad_norm": 6.470620155334473, |
|
"learning_rate": 1.983839122486289e-05, |
|
"loss": 1.2811, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 1.70018281535649, |
|
"grad_norm": 1.9842756986618042, |
|
"learning_rate": 1.9816453382084095e-05, |
|
"loss": 1.1558, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.703839122486289, |
|
"grad_norm": 6.438689708709717, |
|
"learning_rate": 1.97945155393053e-05, |
|
"loss": 0.8181, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 1.7074954296160878, |
|
"grad_norm": 5.5345845222473145, |
|
"learning_rate": 1.977257769652651e-05, |
|
"loss": 0.4793, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 1.7111517367458866, |
|
"grad_norm": 6.923543930053711, |
|
"learning_rate": 1.9750639853747717e-05, |
|
"loss": 1.2972, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 1.7148080438756854, |
|
"grad_norm": 7.229982376098633, |
|
"learning_rate": 1.9728702010968923e-05, |
|
"loss": 1.006, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 1.7184643510054844, |
|
"grad_norm": 5.0050201416015625, |
|
"learning_rate": 1.970676416819013e-05, |
|
"loss": 0.7382, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.7221206581352835, |
|
"grad_norm": 5.115394115447998, |
|
"learning_rate": 1.9684826325411335e-05, |
|
"loss": 1.0649, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 1.7257769652650823, |
|
"grad_norm": 6.4145307540893555, |
|
"learning_rate": 1.9662888482632544e-05, |
|
"loss": 0.9784, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 1.729433272394881, |
|
"grad_norm": 3.8062143325805664, |
|
"learning_rate": 1.9640950639853747e-05, |
|
"loss": 0.809, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 1.7330895795246801, |
|
"grad_norm": 3.4305763244628906, |
|
"learning_rate": 1.9619012797074953e-05, |
|
"loss": 0.6094, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 1.736745886654479, |
|
"grad_norm": 4.138398170471191, |
|
"learning_rate": 1.959707495429616e-05, |
|
"loss": 0.6374, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.740402193784278, |
|
"grad_norm": 3.1539058685302734, |
|
"learning_rate": 1.957513711151737e-05, |
|
"loss": 0.4952, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 1.7440585009140768, |
|
"grad_norm": 2.051999807357788, |
|
"learning_rate": 1.9553199268738575e-05, |
|
"loss": 0.8456, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 1.7477148080438756, |
|
"grad_norm": 5.383764743804932, |
|
"learning_rate": 1.953126142595978e-05, |
|
"loss": 0.9809, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 1.7513711151736746, |
|
"grad_norm": 10.34570026397705, |
|
"learning_rate": 1.9509323583180987e-05, |
|
"loss": 1.4191, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 1.7550274223034736, |
|
"grad_norm": 7.438785552978516, |
|
"learning_rate": 1.9487385740402194e-05, |
|
"loss": 0.9254, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.7586837294332724, |
|
"grad_norm": 5.489014148712158, |
|
"learning_rate": 1.9465447897623403e-05, |
|
"loss": 0.912, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 1.7623400365630713, |
|
"grad_norm": 2.74650502204895, |
|
"learning_rate": 1.944351005484461e-05, |
|
"loss": 0.7544, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 1.76599634369287, |
|
"grad_norm": 6.396740436553955, |
|
"learning_rate": 1.9421572212065812e-05, |
|
"loss": 0.5699, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 1.769652650822669, |
|
"grad_norm": 4.5033745765686035, |
|
"learning_rate": 1.9399634369287018e-05, |
|
"loss": 0.7621, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.7733089579524681, |
|
"grad_norm": 2.8868985176086426, |
|
"learning_rate": 1.9377696526508228e-05, |
|
"loss": 0.5894, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.776965265082267, |
|
"grad_norm": 5.314028739929199, |
|
"learning_rate": 1.9355758683729434e-05, |
|
"loss": 0.8135, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.7806215722120657, |
|
"grad_norm": 7.692873477935791, |
|
"learning_rate": 1.933382084095064e-05, |
|
"loss": 1.6637, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.7842778793418648, |
|
"grad_norm": 6.586564064025879, |
|
"learning_rate": 1.9311882998171846e-05, |
|
"loss": 0.634, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.7879341864716636, |
|
"grad_norm": 4.398944854736328, |
|
"learning_rate": 1.9289945155393056e-05, |
|
"loss": 0.9242, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.7915904936014626, |
|
"grad_norm": 3.091824769973755, |
|
"learning_rate": 1.9268007312614262e-05, |
|
"loss": 0.5293, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.7952468007312614, |
|
"grad_norm": 1.7957733869552612, |
|
"learning_rate": 1.9246069469835468e-05, |
|
"loss": 0.4717, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.7989031078610602, |
|
"grad_norm": 8.411224365234375, |
|
"learning_rate": 1.9224131627056674e-05, |
|
"loss": 0.9129, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.8025594149908593, |
|
"grad_norm": 6.0289788246154785, |
|
"learning_rate": 1.9202193784277877e-05, |
|
"loss": 0.9803, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.8062157221206583, |
|
"grad_norm": 2.4739830493927, |
|
"learning_rate": 1.9180255941499086e-05, |
|
"loss": 0.5742, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.809872029250457, |
|
"grad_norm": 5.185890197753906, |
|
"learning_rate": 1.9158318098720292e-05, |
|
"loss": 0.5904, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.813528336380256, |
|
"grad_norm": 7.785595893859863, |
|
"learning_rate": 1.91363802559415e-05, |
|
"loss": 0.9302, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.8171846435100547, |
|
"grad_norm": 4.2491841316223145, |
|
"learning_rate": 1.9114442413162705e-05, |
|
"loss": 0.6755, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.8208409506398537, |
|
"grad_norm": 5.402482986450195, |
|
"learning_rate": 1.9092504570383914e-05, |
|
"loss": 0.7065, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.8244972577696528, |
|
"grad_norm": 9.053221702575684, |
|
"learning_rate": 1.907056672760512e-05, |
|
"loss": 0.8879, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.8281535648994516, |
|
"grad_norm": 4.956139087677002, |
|
"learning_rate": 1.9048628884826327e-05, |
|
"loss": 0.7763, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.8318098720292504, |
|
"grad_norm": 4.047802925109863, |
|
"learning_rate": 1.9026691042047533e-05, |
|
"loss": 0.985, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.8354661791590492, |
|
"grad_norm": 2.324805736541748, |
|
"learning_rate": 1.9004753199268742e-05, |
|
"loss": 0.8605, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.8391224862888482, |
|
"grad_norm": 8.674615859985352, |
|
"learning_rate": 1.8982815356489945e-05, |
|
"loss": 0.7584, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.8427787934186473, |
|
"grad_norm": 2.8716583251953125, |
|
"learning_rate": 1.896087751371115e-05, |
|
"loss": 0.8896, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.846435100548446, |
|
"grad_norm": 4.845273494720459, |
|
"learning_rate": 1.8938939670932357e-05, |
|
"loss": 0.7585, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.8500914076782449, |
|
"grad_norm": 1.3373600244522095, |
|
"learning_rate": 1.8917001828153563e-05, |
|
"loss": 0.8324, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.853747714808044, |
|
"grad_norm": 3.5930116176605225, |
|
"learning_rate": 1.8895063985374773e-05, |
|
"loss": 0.5972, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.857404021937843, |
|
"grad_norm": 2.8679511547088623, |
|
"learning_rate": 1.887312614259598e-05, |
|
"loss": 1.2015, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.8610603290676417, |
|
"grad_norm": 5.207054615020752, |
|
"learning_rate": 1.8851188299817185e-05, |
|
"loss": 1.1164, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.8647166361974405, |
|
"grad_norm": 4.295830249786377, |
|
"learning_rate": 1.882925045703839e-05, |
|
"loss": 0.9228, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.8683729433272394, |
|
"grad_norm": 6.6493682861328125, |
|
"learning_rate": 1.88073126142596e-05, |
|
"loss": 1.2885, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.8720292504570384, |
|
"grad_norm": 9.316621780395508, |
|
"learning_rate": 1.8785374771480807e-05, |
|
"loss": 0.9024, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.8756855575868374, |
|
"grad_norm": 1.7442660331726074, |
|
"learning_rate": 1.876343692870201e-05, |
|
"loss": 0.6215, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.8793418647166362, |
|
"grad_norm": 3.714203357696533, |
|
"learning_rate": 1.8741499085923216e-05, |
|
"loss": 1.0476, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.882998171846435, |
|
"grad_norm": 8.656035423278809, |
|
"learning_rate": 1.8719561243144422e-05, |
|
"loss": 0.8761, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.8866544789762338, |
|
"grad_norm": 7.139505863189697, |
|
"learning_rate": 1.869762340036563e-05, |
|
"loss": 0.6815, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.8903107861060329, |
|
"grad_norm": 5.897740840911865, |
|
"learning_rate": 1.8675685557586838e-05, |
|
"loss": 0.7645, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.893967093235832, |
|
"grad_norm": 6.025356292724609, |
|
"learning_rate": 1.8653747714808044e-05, |
|
"loss": 0.9224, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.8976234003656307, |
|
"grad_norm": 3.462116003036499, |
|
"learning_rate": 1.863180987202925e-05, |
|
"loss": 0.6179, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.9012797074954295, |
|
"grad_norm": 0.449295312166214, |
|
"learning_rate": 1.860987202925046e-05, |
|
"loss": 0.5513, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.9049360146252285, |
|
"grad_norm": 8.190743446350098, |
|
"learning_rate": 1.8587934186471666e-05, |
|
"loss": 0.9577, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.9085923217550276, |
|
"grad_norm": 8.000064849853516, |
|
"learning_rate": 1.8565996343692872e-05, |
|
"loss": 1.1829, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.9122486288848264, |
|
"grad_norm": 2.7674405574798584, |
|
"learning_rate": 1.8544058500914075e-05, |
|
"loss": 0.9203, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.9159049360146252, |
|
"grad_norm": 3.4354286193847656, |
|
"learning_rate": 1.8522120658135284e-05, |
|
"loss": 0.8279, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.919561243144424, |
|
"grad_norm": 4.011999607086182, |
|
"learning_rate": 1.850018281535649e-05, |
|
"loss": 0.7985, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.923217550274223, |
|
"grad_norm": 6.80394172668457, |
|
"learning_rate": 1.8478244972577696e-05, |
|
"loss": 0.7541, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.926873857404022, |
|
"grad_norm": 9.098631858825684, |
|
"learning_rate": 1.8456307129798903e-05, |
|
"loss": 0.7121, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.9305301645338209, |
|
"grad_norm": 8.139768600463867, |
|
"learning_rate": 1.843436928702011e-05, |
|
"loss": 1.0927, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.9341864716636197, |
|
"grad_norm": 7.283916473388672, |
|
"learning_rate": 1.8412431444241318e-05, |
|
"loss": 0.9501, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.9378427787934185, |
|
"grad_norm": 5.627073764801025, |
|
"learning_rate": 1.8390493601462524e-05, |
|
"loss": 1.2397, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.9414990859232175, |
|
"grad_norm": 4.708215713500977, |
|
"learning_rate": 1.836855575868373e-05, |
|
"loss": 0.8767, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.9451553930530165, |
|
"grad_norm": 5.6944756507873535, |
|
"learning_rate": 1.8346617915904937e-05, |
|
"loss": 0.7765, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.9488117001828154, |
|
"grad_norm": 2.780611038208008, |
|
"learning_rate": 1.8324680073126143e-05, |
|
"loss": 0.9307, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.9524680073126142, |
|
"grad_norm": 6.318012237548828, |
|
"learning_rate": 1.830274223034735e-05, |
|
"loss": 0.9262, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.9561243144424132, |
|
"grad_norm": 3.8964459896087646, |
|
"learning_rate": 1.8280804387568555e-05, |
|
"loss": 0.6519, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.9597806215722122, |
|
"grad_norm": 3.204008102416992, |
|
"learning_rate": 1.825886654478976e-05, |
|
"loss": 1.0352, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.963436928702011, |
|
"grad_norm": 6.150453567504883, |
|
"learning_rate": 1.823692870201097e-05, |
|
"loss": 0.8672, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.9670932358318098, |
|
"grad_norm": 3.9006292819976807, |
|
"learning_rate": 1.8214990859232177e-05, |
|
"loss": 0.6804, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.9707495429616086, |
|
"grad_norm": 3.0023293495178223, |
|
"learning_rate": 1.8193053016453383e-05, |
|
"loss": 0.7373, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.9744058500914077, |
|
"grad_norm": 7.111054420471191, |
|
"learning_rate": 1.817111517367459e-05, |
|
"loss": 0.9125, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.9780621572212067, |
|
"grad_norm": 7.576889991760254, |
|
"learning_rate": 1.8149177330895795e-05, |
|
"loss": 0.5759, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.9817184643510055, |
|
"grad_norm": 9.145369529724121, |
|
"learning_rate": 1.8127239488117005e-05, |
|
"loss": 0.9557, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.9853747714808043, |
|
"grad_norm": 8.636487007141113, |
|
"learning_rate": 1.8105301645338208e-05, |
|
"loss": 0.8976, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.9890310786106031, |
|
"grad_norm": 4.460054874420166, |
|
"learning_rate": 1.8083363802559414e-05, |
|
"loss": 0.7172, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.9926873857404022, |
|
"grad_norm": 8.192395210266113, |
|
"learning_rate": 1.806142595978062e-05, |
|
"loss": 0.856, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.9963436928702012, |
|
"grad_norm": 9.720686912536621, |
|
"learning_rate": 1.803948811700183e-05, |
|
"loss": 0.9177, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 7.616659641265869, |
|
"learning_rate": 1.8017550274223036e-05, |
|
"loss": 0.7784, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.003656307129799, |
|
"grad_norm": 5.925053596496582, |
|
"learning_rate": 1.799561243144424e-05, |
|
"loss": 0.6278, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.0073126142595976, |
|
"grad_norm": 5.279562950134277, |
|
"learning_rate": 1.7973674588665448e-05, |
|
"loss": 0.581, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.010968921389397, |
|
"grad_norm": 5.457578182220459, |
|
"learning_rate": 1.7951736745886657e-05, |
|
"loss": 0.9397, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.0146252285191957, |
|
"grad_norm": 2.3031833171844482, |
|
"learning_rate": 1.7929798903107863e-05, |
|
"loss": 0.734, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.0182815356489945, |
|
"grad_norm": 3.8108150959014893, |
|
"learning_rate": 1.790786106032907e-05, |
|
"loss": 0.716, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.0219378427787933, |
|
"grad_norm": 6.341092586517334, |
|
"learning_rate": 1.7885923217550272e-05, |
|
"loss": 0.9646, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.025594149908592, |
|
"grad_norm": 3.282466411590576, |
|
"learning_rate": 1.786398537477148e-05, |
|
"loss": 0.8874, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.0292504570383914, |
|
"grad_norm": 1.760282039642334, |
|
"learning_rate": 1.7842047531992688e-05, |
|
"loss": 0.8131, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.03290676416819, |
|
"grad_norm": 5.197391510009766, |
|
"learning_rate": 1.7820109689213894e-05, |
|
"loss": 0.4766, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.036563071297989, |
|
"grad_norm": 6.330410480499268, |
|
"learning_rate": 1.77981718464351e-05, |
|
"loss": 0.8051, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.0402193784277878, |
|
"grad_norm": 2.116508722305298, |
|
"learning_rate": 1.7776234003656306e-05, |
|
"loss": 0.5772, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.043875685557587, |
|
"grad_norm": 7.164584636688232, |
|
"learning_rate": 1.7754296160877516e-05, |
|
"loss": 1.1541, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.047531992687386, |
|
"grad_norm": 3.2902145385742188, |
|
"learning_rate": 1.7732358318098722e-05, |
|
"loss": 0.8753, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.0511882998171846, |
|
"grad_norm": 4.900457859039307, |
|
"learning_rate": 1.7710420475319928e-05, |
|
"loss": 0.721, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.0548446069469835, |
|
"grad_norm": 1.8482491970062256, |
|
"learning_rate": 1.7688482632541134e-05, |
|
"loss": 0.5912, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.0585009140767823, |
|
"grad_norm": 6.206057548522949, |
|
"learning_rate": 1.766654478976234e-05, |
|
"loss": 0.7698, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.0621572212065815, |
|
"grad_norm": 2.8507750034332275, |
|
"learning_rate": 1.7644606946983547e-05, |
|
"loss": 0.585, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.0658135283363803, |
|
"grad_norm": 1.5750012397766113, |
|
"learning_rate": 1.7622669104204753e-05, |
|
"loss": 0.788, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.069469835466179, |
|
"grad_norm": 0.9211186170578003, |
|
"learning_rate": 1.760073126142596e-05, |
|
"loss": 0.7428, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.073126142595978, |
|
"grad_norm": 6.672236442565918, |
|
"learning_rate": 1.7578793418647165e-05, |
|
"loss": 1.1684, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.076782449725777, |
|
"grad_norm": 5.079084396362305, |
|
"learning_rate": 1.7556855575868375e-05, |
|
"loss": 0.8299, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.080438756855576, |
|
"grad_norm": 2.206005573272705, |
|
"learning_rate": 1.753491773308958e-05, |
|
"loss": 0.7393, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.084095063985375, |
|
"grad_norm": 5.880030155181885, |
|
"learning_rate": 1.7512979890310787e-05, |
|
"loss": 0.7371, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.0877513711151736, |
|
"grad_norm": 2.5095629692077637, |
|
"learning_rate": 1.7491042047531993e-05, |
|
"loss": 0.7439, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.0914076782449724, |
|
"grad_norm": 3.8941352367401123, |
|
"learning_rate": 1.7469104204753203e-05, |
|
"loss": 0.7426, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.0950639853747717, |
|
"grad_norm": 2.9596612453460693, |
|
"learning_rate": 1.7447166361974405e-05, |
|
"loss": 1.0313, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.0987202925045705, |
|
"grad_norm": 5.640470027923584, |
|
"learning_rate": 1.742522851919561e-05, |
|
"loss": 0.8318, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.1023765996343693, |
|
"grad_norm": 9.360175132751465, |
|
"learning_rate": 1.7403290676416818e-05, |
|
"loss": 0.9472, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.106032906764168, |
|
"grad_norm": 3.729229688644409, |
|
"learning_rate": 1.7381352833638024e-05, |
|
"loss": 0.9582, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.109689213893967, |
|
"grad_norm": 4.457205295562744, |
|
"learning_rate": 1.7359414990859233e-05, |
|
"loss": 0.848, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.113345521023766, |
|
"grad_norm": 2.072932243347168, |
|
"learning_rate": 1.733747714808044e-05, |
|
"loss": 1.1283, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.117001828153565, |
|
"grad_norm": 2.81571888923645, |
|
"learning_rate": 1.7315539305301646e-05, |
|
"loss": 0.8487, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.1206581352833638, |
|
"grad_norm": 4.277017593383789, |
|
"learning_rate": 1.7293601462522852e-05, |
|
"loss": 0.7518, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.1243144424131626, |
|
"grad_norm": 4.090396404266357, |
|
"learning_rate": 1.727166361974406e-05, |
|
"loss": 0.8979, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.1279707495429614, |
|
"grad_norm": 1.6413131952285767, |
|
"learning_rate": 1.7249725776965267e-05, |
|
"loss": 0.5561, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.1316270566727606, |
|
"grad_norm": 1.0182098150253296, |
|
"learning_rate": 1.7227787934186474e-05, |
|
"loss": 0.6022, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.1352833638025595, |
|
"grad_norm": 1.9393812417984009, |
|
"learning_rate": 1.7205850091407676e-05, |
|
"loss": 0.916, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.1389396709323583, |
|
"grad_norm": 1.6483741998672485, |
|
"learning_rate": 1.7183912248628886e-05, |
|
"loss": 0.8657, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.142595978062157, |
|
"grad_norm": 5.0950927734375, |
|
"learning_rate": 1.7161974405850092e-05, |
|
"loss": 0.769, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.1462522851919563, |
|
"grad_norm": 5.417265892028809, |
|
"learning_rate": 1.7140036563071298e-05, |
|
"loss": 0.8892, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.149908592321755, |
|
"grad_norm": 2.497882604598999, |
|
"learning_rate": 1.7118098720292504e-05, |
|
"loss": 0.6083, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.153564899451554, |
|
"grad_norm": 2.365013599395752, |
|
"learning_rate": 1.709616087751371e-05, |
|
"loss": 0.8289, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.1572212065813527, |
|
"grad_norm": 4.738333225250244, |
|
"learning_rate": 1.707422303473492e-05, |
|
"loss": 0.8626, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.1608775137111516, |
|
"grad_norm": 3.8534250259399414, |
|
"learning_rate": 1.7052285191956126e-05, |
|
"loss": 0.7599, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.164533820840951, |
|
"grad_norm": 4.418381214141846, |
|
"learning_rate": 1.7030347349177332e-05, |
|
"loss": 0.6414, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.1681901279707496, |
|
"grad_norm": 3.9305009841918945, |
|
"learning_rate": 1.700840950639854e-05, |
|
"loss": 0.8187, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.1718464351005484, |
|
"grad_norm": 3.8605735301971436, |
|
"learning_rate": 1.6986471663619744e-05, |
|
"loss": 0.7981, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.1755027422303472, |
|
"grad_norm": 0.5784508585929871, |
|
"learning_rate": 1.696453382084095e-05, |
|
"loss": 0.5747, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.1791590493601465, |
|
"grad_norm": 3.44700288772583, |
|
"learning_rate": 1.6942595978062157e-05, |
|
"loss": 0.9207, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.1828153564899453, |
|
"grad_norm": 2.130711317062378, |
|
"learning_rate": 1.6920658135283363e-05, |
|
"loss": 0.5485, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.186471663619744, |
|
"grad_norm": 3.466505289077759, |
|
"learning_rate": 1.689872029250457e-05, |
|
"loss": 0.8698, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.190127970749543, |
|
"grad_norm": 2.2737669944763184, |
|
"learning_rate": 1.687678244972578e-05, |
|
"loss": 0.9317, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.1937842778793417, |
|
"grad_norm": 6.341795444488525, |
|
"learning_rate": 1.6854844606946985e-05, |
|
"loss": 0.8099, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.197440585009141, |
|
"grad_norm": 7.006868839263916, |
|
"learning_rate": 1.683290676416819e-05, |
|
"loss": 0.9554, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.2010968921389398, |
|
"grad_norm": 3.7944741249084473, |
|
"learning_rate": 1.6810968921389397e-05, |
|
"loss": 0.7068, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.2047531992687386, |
|
"grad_norm": 3.8332672119140625, |
|
"learning_rate": 1.6789031078610607e-05, |
|
"loss": 0.7278, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.2084095063985374, |
|
"grad_norm": 6.753068447113037, |
|
"learning_rate": 1.676709323583181e-05, |
|
"loss": 0.8768, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.212065813528336, |
|
"grad_norm": 6.275936603546143, |
|
"learning_rate": 1.6745155393053015e-05, |
|
"loss": 0.743, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.2157221206581355, |
|
"grad_norm": 0.639437198638916, |
|
"learning_rate": 1.672321755027422e-05, |
|
"loss": 0.7593, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.2193784277879343, |
|
"grad_norm": 2.318837881088257, |
|
"learning_rate": 1.670127970749543e-05, |
|
"loss": 0.6968, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.223034734917733, |
|
"grad_norm": 4.160284996032715, |
|
"learning_rate": 1.6679341864716637e-05, |
|
"loss": 0.764, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.226691042047532, |
|
"grad_norm": 4.3529744148254395, |
|
"learning_rate": 1.6657404021937843e-05, |
|
"loss": 0.7927, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.2303473491773307, |
|
"grad_norm": 6.292082786560059, |
|
"learning_rate": 1.663546617915905e-05, |
|
"loss": 0.8195, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.23400365630713, |
|
"grad_norm": 1.47853684425354, |
|
"learning_rate": 1.6613528336380256e-05, |
|
"loss": 0.4685, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.2376599634369287, |
|
"grad_norm": 1.9506633281707764, |
|
"learning_rate": 1.6593784277879343e-05, |
|
"loss": 0.8133, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.2413162705667276, |
|
"grad_norm": 3.7667181491851807, |
|
"learning_rate": 1.657184643510055e-05, |
|
"loss": 0.7294, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.2449725776965264, |
|
"grad_norm": 3.3465397357940674, |
|
"learning_rate": 1.6549908592321755e-05, |
|
"loss": 0.6542, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.2486288848263256, |
|
"grad_norm": 6.2452545166015625, |
|
"learning_rate": 1.652797074954296e-05, |
|
"loss": 1.0769, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.2522851919561244, |
|
"grad_norm": 3.649399518966675, |
|
"learning_rate": 1.6506032906764167e-05, |
|
"loss": 0.6094, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.2559414990859232, |
|
"grad_norm": 1.8042731285095215, |
|
"learning_rate": 1.6484095063985374e-05, |
|
"loss": 0.9194, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.259597806215722, |
|
"grad_norm": 6.0087714195251465, |
|
"learning_rate": 1.6462157221206583e-05, |
|
"loss": 0.7116, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.263254113345521, |
|
"grad_norm": 2.632741928100586, |
|
"learning_rate": 1.644021937842779e-05, |
|
"loss": 0.713, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.26691042047532, |
|
"grad_norm": 1.5080722570419312, |
|
"learning_rate": 1.6418281535648995e-05, |
|
"loss": 0.7972, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.270566727605119, |
|
"grad_norm": 5.658291816711426, |
|
"learning_rate": 1.63963436928702e-05, |
|
"loss": 0.8587, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.2742230347349177, |
|
"grad_norm": 2.7925331592559814, |
|
"learning_rate": 1.637440585009141e-05, |
|
"loss": 0.859, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.2778793418647165, |
|
"grad_norm": 7.432958126068115, |
|
"learning_rate": 1.6352468007312614e-05, |
|
"loss": 1.1442, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.2815356489945158, |
|
"grad_norm": 3.1976866722106934, |
|
"learning_rate": 1.633053016453382e-05, |
|
"loss": 0.9542, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.2851919561243146, |
|
"grad_norm": 6.586294174194336, |
|
"learning_rate": 1.6308592321755026e-05, |
|
"loss": 0.9548, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.2888482632541134, |
|
"grad_norm": 4.858059406280518, |
|
"learning_rate": 1.6286654478976232e-05, |
|
"loss": 0.8362, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.292504570383912, |
|
"grad_norm": 0.6416640281677246, |
|
"learning_rate": 1.6264716636197442e-05, |
|
"loss": 0.866, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.296160877513711, |
|
"grad_norm": 2.6693904399871826, |
|
"learning_rate": 1.6242778793418648e-05, |
|
"loss": 0.9518, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.2998171846435103, |
|
"grad_norm": 3.4559848308563232, |
|
"learning_rate": 1.6220840950639854e-05, |
|
"loss": 0.9771, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.303473491773309, |
|
"grad_norm": 4.6828460693359375, |
|
"learning_rate": 1.619890310786106e-05, |
|
"loss": 0.6363, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.307129798903108, |
|
"grad_norm": 7.3838911056518555, |
|
"learning_rate": 1.617696526508227e-05, |
|
"loss": 1.1616, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.3107861060329067, |
|
"grad_norm": 2.1157217025756836, |
|
"learning_rate": 1.6155027422303476e-05, |
|
"loss": 0.6204, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.3144424131627055, |
|
"grad_norm": 5.136549949645996, |
|
"learning_rate": 1.613308957952468e-05, |
|
"loss": 0.9136, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.3180987202925047, |
|
"grad_norm": 4.352057933807373, |
|
"learning_rate": 1.6111151736745885e-05, |
|
"loss": 1.0355, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.3217550274223036, |
|
"grad_norm": 6.010753154754639, |
|
"learning_rate": 1.6089213893967094e-05, |
|
"loss": 0.8964, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.3254113345521024, |
|
"grad_norm": 4.205333232879639, |
|
"learning_rate": 1.60672760511883e-05, |
|
"loss": 0.5251, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 2.329067641681901, |
|
"grad_norm": 1.6704707145690918, |
|
"learning_rate": 1.6045338208409507e-05, |
|
"loss": 0.8491, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 2.3327239488117, |
|
"grad_norm": 6.694083213806152, |
|
"learning_rate": 1.6023400365630713e-05, |
|
"loss": 0.6679, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 2.3363802559414992, |
|
"grad_norm": 3.342144727706909, |
|
"learning_rate": 1.600146252285192e-05, |
|
"loss": 0.7972, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 2.340036563071298, |
|
"grad_norm": 1.7112003564834595, |
|
"learning_rate": 1.597952468007313e-05, |
|
"loss": 0.6471, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.343692870201097, |
|
"grad_norm": 5.751948833465576, |
|
"learning_rate": 1.5957586837294335e-05, |
|
"loss": 0.7035, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 2.3473491773308957, |
|
"grad_norm": 5.628826141357422, |
|
"learning_rate": 1.593564899451554e-05, |
|
"loss": 0.9389, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 2.3510054844606945, |
|
"grad_norm": 2.342500686645508, |
|
"learning_rate": 1.5913711151736743e-05, |
|
"loss": 0.8235, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 2.3546617915904937, |
|
"grad_norm": 6.325570106506348, |
|
"learning_rate": 1.5891773308957953e-05, |
|
"loss": 0.7512, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 2.3583180987202925, |
|
"grad_norm": 2.6905734539031982, |
|
"learning_rate": 1.586983546617916e-05, |
|
"loss": 0.7487, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 2.3619744058500913, |
|
"grad_norm": 6.612014293670654, |
|
"learning_rate": 1.5847897623400365e-05, |
|
"loss": 0.8339, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 2.36563071297989, |
|
"grad_norm": 5.0433526039123535, |
|
"learning_rate": 1.582595978062157e-05, |
|
"loss": 0.7244, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 2.3692870201096894, |
|
"grad_norm": 7.0606536865234375, |
|
"learning_rate": 1.580402193784278e-05, |
|
"loss": 0.7509, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 2.372943327239488, |
|
"grad_norm": 3.0852813720703125, |
|
"learning_rate": 1.5782084095063987e-05, |
|
"loss": 0.9728, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 2.376599634369287, |
|
"grad_norm": 1.0151329040527344, |
|
"learning_rate": 1.5760146252285193e-05, |
|
"loss": 0.4665, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.380255941499086, |
|
"grad_norm": 4.328806400299072, |
|
"learning_rate": 1.57382084095064e-05, |
|
"loss": 1.0313, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 2.383912248628885, |
|
"grad_norm": 3.0017240047454834, |
|
"learning_rate": 1.5716270566727605e-05, |
|
"loss": 0.8768, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 2.387568555758684, |
|
"grad_norm": 5.693215370178223, |
|
"learning_rate": 1.569433272394881e-05, |
|
"loss": 1.0785, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 2.3912248628884827, |
|
"grad_norm": 1.9527254104614258, |
|
"learning_rate": 1.5672394881170018e-05, |
|
"loss": 0.9748, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 2.3948811700182815, |
|
"grad_norm": 7.030393123626709, |
|
"learning_rate": 1.5650457038391224e-05, |
|
"loss": 0.9713, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 2.3985374771480803, |
|
"grad_norm": 4.398252487182617, |
|
"learning_rate": 1.562851919561243e-05, |
|
"loss": 0.8411, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 2.4021937842778796, |
|
"grad_norm": 4.480136394500732, |
|
"learning_rate": 1.560658135283364e-05, |
|
"loss": 0.7707, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 2.4058500914076784, |
|
"grad_norm": 1.4185088872909546, |
|
"learning_rate": 1.5584643510054846e-05, |
|
"loss": 0.6503, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 2.409506398537477, |
|
"grad_norm": 2.1291399002075195, |
|
"learning_rate": 1.5562705667276052e-05, |
|
"loss": 0.865, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 2.413162705667276, |
|
"grad_norm": 1.884865641593933, |
|
"learning_rate": 1.5540767824497258e-05, |
|
"loss": 0.846, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.416819012797075, |
|
"grad_norm": 5.985278129577637, |
|
"learning_rate": 1.5518829981718464e-05, |
|
"loss": 1.0771, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 2.420475319926874, |
|
"grad_norm": 2.479788064956665, |
|
"learning_rate": 1.5496892138939674e-05, |
|
"loss": 0.5075, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 2.424131627056673, |
|
"grad_norm": 5.608107089996338, |
|
"learning_rate": 1.5474954296160876e-05, |
|
"loss": 0.6899, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 2.4277879341864717, |
|
"grad_norm": 3.776259660720825, |
|
"learning_rate": 1.5453016453382083e-05, |
|
"loss": 1.0147, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 2.4314442413162705, |
|
"grad_norm": 3.7614502906799316, |
|
"learning_rate": 1.543107861060329e-05, |
|
"loss": 0.8459, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 2.4351005484460693, |
|
"grad_norm": 2.099912405014038, |
|
"learning_rate": 1.5409140767824498e-05, |
|
"loss": 1.1646, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 2.4387568555758685, |
|
"grad_norm": 0.44922786951065063, |
|
"learning_rate": 1.5387202925045704e-05, |
|
"loss": 0.8236, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 2.4424131627056673, |
|
"grad_norm": 5.04080867767334, |
|
"learning_rate": 1.536526508226691e-05, |
|
"loss": 1.0443, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 2.446069469835466, |
|
"grad_norm": 6.819587707519531, |
|
"learning_rate": 1.5343327239488117e-05, |
|
"loss": 1.0736, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 2.449725776965265, |
|
"grad_norm": 3.2166435718536377, |
|
"learning_rate": 1.5321389396709326e-05, |
|
"loss": 0.6082, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.4533820840950638, |
|
"grad_norm": 5.284506320953369, |
|
"learning_rate": 1.5299451553930532e-05, |
|
"loss": 0.874, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 2.457038391224863, |
|
"grad_norm": 4.369775295257568, |
|
"learning_rate": 1.527751371115174e-05, |
|
"loss": 0.7909, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 2.460694698354662, |
|
"grad_norm": 3.289560079574585, |
|
"learning_rate": 1.5255575868372943e-05, |
|
"loss": 0.5602, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 2.4643510054844606, |
|
"grad_norm": 1.0491223335266113, |
|
"learning_rate": 1.5233638025594149e-05, |
|
"loss": 0.5654, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 2.4680073126142594, |
|
"grad_norm": 6.228038311004639, |
|
"learning_rate": 1.5211700182815359e-05, |
|
"loss": 0.7385, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 2.4716636197440587, |
|
"grad_norm": 1.8368405103683472, |
|
"learning_rate": 1.5189762340036563e-05, |
|
"loss": 0.7565, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 2.4753199268738575, |
|
"grad_norm": 4.739897727966309, |
|
"learning_rate": 1.516782449725777e-05, |
|
"loss": 0.791, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 2.4789762340036563, |
|
"grad_norm": 4.953397274017334, |
|
"learning_rate": 1.5145886654478975e-05, |
|
"loss": 0.7508, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 2.482632541133455, |
|
"grad_norm": 2.6047234535217285, |
|
"learning_rate": 1.5123948811700185e-05, |
|
"loss": 0.7291, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 2.4862888482632544, |
|
"grad_norm": 6.410040378570557, |
|
"learning_rate": 1.5102010968921391e-05, |
|
"loss": 0.7964, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.489945155393053, |
|
"grad_norm": 3.258415699005127, |
|
"learning_rate": 1.5080073126142595e-05, |
|
"loss": 0.5596, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 2.493601462522852, |
|
"grad_norm": 3.6299631595611572, |
|
"learning_rate": 1.5058135283363802e-05, |
|
"loss": 0.6576, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 2.497257769652651, |
|
"grad_norm": 3.453648090362549, |
|
"learning_rate": 1.5036197440585011e-05, |
|
"loss": 0.6678, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 2.5009140767824496, |
|
"grad_norm": 1.2416099309921265, |
|
"learning_rate": 1.5014259597806217e-05, |
|
"loss": 0.4999, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 2.504570383912249, |
|
"grad_norm": 6.100232124328613, |
|
"learning_rate": 1.4992321755027423e-05, |
|
"loss": 0.6388, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 2.5082266910420477, |
|
"grad_norm": 5.142564296722412, |
|
"learning_rate": 1.4970383912248628e-05, |
|
"loss": 0.6444, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 2.5118829981718465, |
|
"grad_norm": 4.675838947296143, |
|
"learning_rate": 1.4948446069469836e-05, |
|
"loss": 0.7841, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 2.5155393053016453, |
|
"grad_norm": 2.114088296890259, |
|
"learning_rate": 1.4926508226691042e-05, |
|
"loss": 0.9303, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 2.519195612431444, |
|
"grad_norm": 2.986456871032715, |
|
"learning_rate": 1.490457038391225e-05, |
|
"loss": 0.7067, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 2.5228519195612433, |
|
"grad_norm": 4.167447090148926, |
|
"learning_rate": 1.4882632541133456e-05, |
|
"loss": 0.9053, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.526508226691042, |
|
"grad_norm": 4.037667751312256, |
|
"learning_rate": 1.4860694698354662e-05, |
|
"loss": 0.6749, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 2.530164533820841, |
|
"grad_norm": 3.1031131744384766, |
|
"learning_rate": 1.4838756855575868e-05, |
|
"loss": 0.978, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 2.5338208409506398, |
|
"grad_norm": 8.000749588012695, |
|
"learning_rate": 1.4816819012797076e-05, |
|
"loss": 0.9373, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 2.5374771480804386, |
|
"grad_norm": 5.090115070343018, |
|
"learning_rate": 1.4794881170018282e-05, |
|
"loss": 0.6007, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 2.541133455210238, |
|
"grad_norm": 4.267579555511475, |
|
"learning_rate": 1.477294332723949e-05, |
|
"loss": 0.6713, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 2.5447897623400366, |
|
"grad_norm": 6.383331775665283, |
|
"learning_rate": 1.4751005484460694e-05, |
|
"loss": 0.8684, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 2.5484460694698354, |
|
"grad_norm": 5.479264736175537, |
|
"learning_rate": 1.47290676416819e-05, |
|
"loss": 1.0091, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 2.5521023765996342, |
|
"grad_norm": 1.4539798498153687, |
|
"learning_rate": 1.4707129798903108e-05, |
|
"loss": 1.2803, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 2.555758683729433, |
|
"grad_norm": 4.5096755027771, |
|
"learning_rate": 1.4685191956124314e-05, |
|
"loss": 1.1298, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 2.5594149908592323, |
|
"grad_norm": 2.711442232131958, |
|
"learning_rate": 1.4663254113345522e-05, |
|
"loss": 0.8488, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.563071297989031, |
|
"grad_norm": 5.5778069496154785, |
|
"learning_rate": 1.4641316270566727e-05, |
|
"loss": 0.8496, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 2.56672760511883, |
|
"grad_norm": 6.614429473876953, |
|
"learning_rate": 1.4619378427787935e-05, |
|
"loss": 0.897, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 2.5703839122486287, |
|
"grad_norm": 4.096016883850098, |
|
"learning_rate": 1.459744058500914e-05, |
|
"loss": 0.9322, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 2.5740402193784275, |
|
"grad_norm": 3.7577602863311768, |
|
"learning_rate": 1.4575502742230349e-05, |
|
"loss": 0.8438, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 2.577696526508227, |
|
"grad_norm": 6.623696327209473, |
|
"learning_rate": 1.4553564899451555e-05, |
|
"loss": 0.8535, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 2.5813528336380256, |
|
"grad_norm": 4.914971828460693, |
|
"learning_rate": 1.453162705667276e-05, |
|
"loss": 0.9281, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 2.5850091407678244, |
|
"grad_norm": 3.639310359954834, |
|
"learning_rate": 1.4509689213893967e-05, |
|
"loss": 0.5874, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 2.5886654478976237, |
|
"grad_norm": 4.59980583190918, |
|
"learning_rate": 1.4487751371115173e-05, |
|
"loss": 0.669, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 2.5923217550274225, |
|
"grad_norm": 3.802577018737793, |
|
"learning_rate": 1.4465813528336381e-05, |
|
"loss": 0.7863, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 2.5959780621572213, |
|
"grad_norm": 5.985960960388184, |
|
"learning_rate": 1.4443875685557587e-05, |
|
"loss": 1.1215, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.59963436928702, |
|
"grad_norm": 7.36239767074585, |
|
"learning_rate": 1.4421937842778793e-05, |
|
"loss": 0.9102, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 2.603290676416819, |
|
"grad_norm": 4.171439170837402, |
|
"learning_rate": 1.44e-05, |
|
"loss": 0.5814, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 2.606946983546618, |
|
"grad_norm": 3.7119038105010986, |
|
"learning_rate": 1.4378062157221207e-05, |
|
"loss": 1.1138, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 2.610603290676417, |
|
"grad_norm": 0.3623199164867401, |
|
"learning_rate": 1.4356124314442413e-05, |
|
"loss": 0.7191, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 2.6142595978062158, |
|
"grad_norm": 7.952626705169678, |
|
"learning_rate": 1.4334186471663621e-05, |
|
"loss": 1.008, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 2.6179159049360146, |
|
"grad_norm": 4.192795753479004, |
|
"learning_rate": 1.4312248628884826e-05, |
|
"loss": 1.0029, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 2.6215722120658134, |
|
"grad_norm": 1.6941229104995728, |
|
"learning_rate": 1.4290310786106033e-05, |
|
"loss": 0.5333, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 2.6252285191956126, |
|
"grad_norm": 4.540876865386963, |
|
"learning_rate": 1.426837294332724e-05, |
|
"loss": 0.6368, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 2.6288848263254114, |
|
"grad_norm": 3.824742078781128, |
|
"learning_rate": 1.4246435100548447e-05, |
|
"loss": 0.8578, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 2.6325411334552102, |
|
"grad_norm": 6.03521203994751, |
|
"learning_rate": 1.4224497257769654e-05, |
|
"loss": 0.9158, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.636197440585009, |
|
"grad_norm": 1.0500041246414185, |
|
"learning_rate": 1.420255941499086e-05, |
|
"loss": 0.7151, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 2.639853747714808, |
|
"grad_norm": 3.9835376739501953, |
|
"learning_rate": 1.4180621572212066e-05, |
|
"loss": 0.9856, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 2.643510054844607, |
|
"grad_norm": 4.260631084442139, |
|
"learning_rate": 1.4158683729433272e-05, |
|
"loss": 0.7225, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 2.647166361974406, |
|
"grad_norm": 4.900208473205566, |
|
"learning_rate": 1.413674588665448e-05, |
|
"loss": 0.7778, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 2.6508226691042047, |
|
"grad_norm": 2.9643290042877197, |
|
"learning_rate": 1.4114808043875686e-05, |
|
"loss": 0.6312, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.6544789762340035, |
|
"grad_norm": 2.850414752960205, |
|
"learning_rate": 1.4092870201096894e-05, |
|
"loss": 0.8683, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 2.6581352833638023, |
|
"grad_norm": 5.803402423858643, |
|
"learning_rate": 1.4070932358318098e-05, |
|
"loss": 0.8922, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 2.6617915904936016, |
|
"grad_norm": 4.494935512542725, |
|
"learning_rate": 1.4048994515539306e-05, |
|
"loss": 0.6408, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 2.6654478976234004, |
|
"grad_norm": 2.5925052165985107, |
|
"learning_rate": 1.4027056672760512e-05, |
|
"loss": 0.864, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 2.669104204753199, |
|
"grad_norm": 3.3631858825683594, |
|
"learning_rate": 1.400511882998172e-05, |
|
"loss": 0.603, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.672760511882998, |
|
"grad_norm": 3.358248472213745, |
|
"learning_rate": 1.3983180987202926e-05, |
|
"loss": 0.5987, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 2.676416819012797, |
|
"grad_norm": 3.431640386581421, |
|
"learning_rate": 1.396124314442413e-05, |
|
"loss": 0.891, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 2.680073126142596, |
|
"grad_norm": 5.032719612121582, |
|
"learning_rate": 1.3939305301645338e-05, |
|
"loss": 0.9732, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 2.683729433272395, |
|
"grad_norm": 7.277076721191406, |
|
"learning_rate": 1.3917367458866545e-05, |
|
"loss": 1.1068, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 2.6873857404021937, |
|
"grad_norm": 2.9995198249816895, |
|
"learning_rate": 1.3895429616087752e-05, |
|
"loss": 0.8166, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.691042047531993, |
|
"grad_norm": 3.2115001678466797, |
|
"learning_rate": 1.3873491773308959e-05, |
|
"loss": 0.7981, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 2.6946983546617918, |
|
"grad_norm": 0.935015082359314, |
|
"learning_rate": 1.3851553930530165e-05, |
|
"loss": 0.5872, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 2.6983546617915906, |
|
"grad_norm": 3.3315343856811523, |
|
"learning_rate": 1.3829616087751371e-05, |
|
"loss": 0.593, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 2.7020109689213894, |
|
"grad_norm": 8.530818939208984, |
|
"learning_rate": 1.3807678244972579e-05, |
|
"loss": 1.2569, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 2.705667276051188, |
|
"grad_norm": 3.22756290435791, |
|
"learning_rate": 1.3785740402193785e-05, |
|
"loss": 0.8342, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.7093235831809874, |
|
"grad_norm": 1.0916093587875366, |
|
"learning_rate": 1.3763802559414993e-05, |
|
"loss": 0.8432, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 2.7129798903107862, |
|
"grad_norm": 5.046055793762207, |
|
"learning_rate": 1.3741864716636197e-05, |
|
"loss": 0.8075, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 2.716636197440585, |
|
"grad_norm": 4.796830654144287, |
|
"learning_rate": 1.3719926873857405e-05, |
|
"loss": 1.1543, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 2.720292504570384, |
|
"grad_norm": 5.081254005432129, |
|
"learning_rate": 1.3697989031078611e-05, |
|
"loss": 0.9912, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 2.7239488117001827, |
|
"grad_norm": 3.72564697265625, |
|
"learning_rate": 1.3676051188299817e-05, |
|
"loss": 0.7743, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.727605118829982, |
|
"grad_norm": 5.735417366027832, |
|
"learning_rate": 1.3654113345521025e-05, |
|
"loss": 0.8145, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 2.7312614259597807, |
|
"grad_norm": 2.6865832805633545, |
|
"learning_rate": 1.363217550274223e-05, |
|
"loss": 1.0108, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 2.7349177330895795, |
|
"grad_norm": 4.572368621826172, |
|
"learning_rate": 1.3610237659963437e-05, |
|
"loss": 0.8924, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 2.7385740402193783, |
|
"grad_norm": 5.849616050720215, |
|
"learning_rate": 1.3588299817184644e-05, |
|
"loss": 0.9521, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 2.742230347349177, |
|
"grad_norm": 3.1010758876800537, |
|
"learning_rate": 1.3566361974405851e-05, |
|
"loss": 0.9336, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.7458866544789764, |
|
"grad_norm": 4.738924980163574, |
|
"learning_rate": 1.3544424131627057e-05, |
|
"loss": 0.6897, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 2.749542961608775, |
|
"grad_norm": 6.994441032409668, |
|
"learning_rate": 1.3522486288848264e-05, |
|
"loss": 1.0206, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 2.753199268738574, |
|
"grad_norm": 2.939159393310547, |
|
"learning_rate": 1.350054844606947e-05, |
|
"loss": 1.2065, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 2.756855575868373, |
|
"grad_norm": 5.182316780090332, |
|
"learning_rate": 1.3478610603290678e-05, |
|
"loss": 0.9884, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 2.7605118829981716, |
|
"grad_norm": 4.590856552124023, |
|
"learning_rate": 1.3456672760511884e-05, |
|
"loss": 0.8894, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.764168190127971, |
|
"grad_norm": 5.282886505126953, |
|
"learning_rate": 1.343473491773309e-05, |
|
"loss": 0.8427, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 2.7678244972577697, |
|
"grad_norm": 3.7344796657562256, |
|
"learning_rate": 1.3412797074954296e-05, |
|
"loss": 0.7983, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 2.7714808043875685, |
|
"grad_norm": 3.6710190773010254, |
|
"learning_rate": 1.3390859232175502e-05, |
|
"loss": 0.7053, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 2.7751371115173673, |
|
"grad_norm": 3.3527188301086426, |
|
"learning_rate": 1.336892138939671e-05, |
|
"loss": 0.7733, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 2.778793418647166, |
|
"grad_norm": 4.841655254364014, |
|
"learning_rate": 1.3346983546617916e-05, |
|
"loss": 0.8462, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.7824497257769654, |
|
"grad_norm": 1.9838179349899292, |
|
"learning_rate": 1.3325045703839124e-05, |
|
"loss": 0.6837, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 2.786106032906764, |
|
"grad_norm": 4.187015056610107, |
|
"learning_rate": 1.3303107861060328e-05, |
|
"loss": 0.7068, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 2.789762340036563, |
|
"grad_norm": 4.960452079772949, |
|
"learning_rate": 1.3281170018281536e-05, |
|
"loss": 0.8144, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 2.7934186471663622, |
|
"grad_norm": 5.154735565185547, |
|
"learning_rate": 1.3259232175502742e-05, |
|
"loss": 0.5711, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 2.797074954296161, |
|
"grad_norm": 7.650027275085449, |
|
"learning_rate": 1.323729433272395e-05, |
|
"loss": 1.0676, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.80073126142596, |
|
"grad_norm": 2.561450242996216, |
|
"learning_rate": 1.3215356489945156e-05, |
|
"loss": 1.0003, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 2.8043875685557587, |
|
"grad_norm": 5.075997352600098, |
|
"learning_rate": 1.319341864716636e-05, |
|
"loss": 0.7371, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 2.8080438756855575, |
|
"grad_norm": 5.0892181396484375, |
|
"learning_rate": 1.3171480804387569e-05, |
|
"loss": 0.7836, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 2.8117001828153567, |
|
"grad_norm": 2.6121692657470703, |
|
"learning_rate": 1.3149542961608775e-05, |
|
"loss": 0.7845, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 2.8153564899451555, |
|
"grad_norm": 4.506619453430176, |
|
"learning_rate": 1.3127605118829983e-05, |
|
"loss": 0.9367, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.8190127970749543, |
|
"grad_norm": 6.061919212341309, |
|
"learning_rate": 1.3105667276051189e-05, |
|
"loss": 1.3084, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 2.822669104204753, |
|
"grad_norm": 5.916521072387695, |
|
"learning_rate": 1.3083729433272395e-05, |
|
"loss": 0.7738, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 2.826325411334552, |
|
"grad_norm": 4.980602741241455, |
|
"learning_rate": 1.3061791590493601e-05, |
|
"loss": 0.9172, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 2.829981718464351, |
|
"grad_norm": 5.4095139503479, |
|
"learning_rate": 1.3039853747714809e-05, |
|
"loss": 0.9361, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 2.83363802559415, |
|
"grad_norm": 2.727238178253174, |
|
"learning_rate": 1.3017915904936015e-05, |
|
"loss": 0.6558, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.837294332723949, |
|
"grad_norm": 6.939225196838379, |
|
"learning_rate": 1.2995978062157223e-05, |
|
"loss": 0.8616, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 2.8409506398537476, |
|
"grad_norm": 2.1128830909729004, |
|
"learning_rate": 1.2974040219378427e-05, |
|
"loss": 0.7853, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 2.8446069469835464, |
|
"grad_norm": 5.917961120605469, |
|
"learning_rate": 1.2952102376599635e-05, |
|
"loss": 0.8472, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 2.8482632541133457, |
|
"grad_norm": 3.7327582836151123, |
|
"learning_rate": 1.2930164533820841e-05, |
|
"loss": 0.7266, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 2.8519195612431445, |
|
"grad_norm": 6.155743598937988, |
|
"learning_rate": 1.2908226691042047e-05, |
|
"loss": 0.797, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.8555758683729433, |
|
"grad_norm": 2.516705274581909, |
|
"learning_rate": 1.2886288848263255e-05, |
|
"loss": 0.7553, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 2.859232175502742, |
|
"grad_norm": 8.74838924407959, |
|
"learning_rate": 1.286435100548446e-05, |
|
"loss": 1.0794, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 2.862888482632541, |
|
"grad_norm": 5.0210113525390625, |
|
"learning_rate": 1.2842413162705668e-05, |
|
"loss": 0.6795, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 2.86654478976234, |
|
"grad_norm": 6.808406352996826, |
|
"learning_rate": 1.2820475319926874e-05, |
|
"loss": 0.8541, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 2.870201096892139, |
|
"grad_norm": 8.608129501342773, |
|
"learning_rate": 1.2798537477148082e-05, |
|
"loss": 0.989, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.873857404021938, |
|
"grad_norm": 3.3586058616638184, |
|
"learning_rate": 1.2776599634369288e-05, |
|
"loss": 0.961, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 2.8775137111517366, |
|
"grad_norm": 3.2911384105682373, |
|
"learning_rate": 1.2754661791590494e-05, |
|
"loss": 0.7997, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 2.8811700182815354, |
|
"grad_norm": 3.714557647705078, |
|
"learning_rate": 1.27327239488117e-05, |
|
"loss": 0.914, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 2.8848263254113347, |
|
"grad_norm": 3.879274368286133, |
|
"learning_rate": 1.2710786106032908e-05, |
|
"loss": 0.8965, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 2.8884826325411335, |
|
"grad_norm": 4.490417003631592, |
|
"learning_rate": 1.2688848263254114e-05, |
|
"loss": 0.7234, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.8921389396709323, |
|
"grad_norm": 2.7484891414642334, |
|
"learning_rate": 1.266691042047532e-05, |
|
"loss": 0.6367, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 2.8957952468007315, |
|
"grad_norm": 4.121150493621826, |
|
"learning_rate": 1.2644972577696526e-05, |
|
"loss": 0.9689, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 2.89945155393053, |
|
"grad_norm": 4.113166332244873, |
|
"learning_rate": 1.2623034734917732e-05, |
|
"loss": 0.7614, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 2.903107861060329, |
|
"grad_norm": 2.689598321914673, |
|
"learning_rate": 1.260109689213894e-05, |
|
"loss": 0.8595, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 2.906764168190128, |
|
"grad_norm": 4.187771320343018, |
|
"learning_rate": 1.2579159049360146e-05, |
|
"loss": 0.8277, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.9104204753199268, |
|
"grad_norm": 1.3637969493865967, |
|
"learning_rate": 1.2557221206581354e-05, |
|
"loss": 0.4846, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 2.914076782449726, |
|
"grad_norm": 2.0621910095214844, |
|
"learning_rate": 1.2535283363802559e-05, |
|
"loss": 0.7043, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 2.917733089579525, |
|
"grad_norm": 3.3105924129486084, |
|
"learning_rate": 1.2513345521023766e-05, |
|
"loss": 0.9526, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 2.9213893967093236, |
|
"grad_norm": 6.157617092132568, |
|
"learning_rate": 1.2491407678244973e-05, |
|
"loss": 0.709, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 2.9250457038391224, |
|
"grad_norm": 3.511514663696289, |
|
"learning_rate": 1.246946983546618e-05, |
|
"loss": 0.97, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.9287020109689212, |
|
"grad_norm": 6.093450546264648, |
|
"learning_rate": 1.2447531992687387e-05, |
|
"loss": 0.5408, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 2.9323583180987205, |
|
"grad_norm": 3.3278634548187256, |
|
"learning_rate": 1.2425594149908593e-05, |
|
"loss": 0.7725, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 2.9360146252285193, |
|
"grad_norm": 2.85172963142395, |
|
"learning_rate": 1.2403656307129799e-05, |
|
"loss": 0.6774, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 2.939670932358318, |
|
"grad_norm": 3.4190468788146973, |
|
"learning_rate": 1.2381718464351005e-05, |
|
"loss": 0.7875, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 2.943327239488117, |
|
"grad_norm": 4.990618705749512, |
|
"learning_rate": 1.2359780621572213e-05, |
|
"loss": 0.8861, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.9469835466179157, |
|
"grad_norm": 3.767422676086426, |
|
"learning_rate": 1.2337842778793419e-05, |
|
"loss": 0.8276, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 2.950639853747715, |
|
"grad_norm": 5.137510776519775, |
|
"learning_rate": 1.2315904936014625e-05, |
|
"loss": 0.7357, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 2.954296160877514, |
|
"grad_norm": 7.071557998657227, |
|
"learning_rate": 1.2293967093235831e-05, |
|
"loss": 0.8031, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 2.9579524680073126, |
|
"grad_norm": 1.2824524641036987, |
|
"learning_rate": 1.2272029250457039e-05, |
|
"loss": 0.7664, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 2.9616087751371114, |
|
"grad_norm": 3.3414242267608643, |
|
"learning_rate": 1.2250091407678245e-05, |
|
"loss": 0.6475, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.96526508226691, |
|
"grad_norm": 2.206388473510742, |
|
"learning_rate": 1.2228153564899453e-05, |
|
"loss": 0.8417, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 2.9689213893967095, |
|
"grad_norm": 1.1660181283950806, |
|
"learning_rate": 1.2208409506398537e-05, |
|
"loss": 0.7856, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 2.9725776965265083, |
|
"grad_norm": 4.5918121337890625, |
|
"learning_rate": 1.2186471663619745e-05, |
|
"loss": 1.0086, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 2.976234003656307, |
|
"grad_norm": 5.133539199829102, |
|
"learning_rate": 1.2164533820840951e-05, |
|
"loss": 0.7742, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 2.979890310786106, |
|
"grad_norm": 3.002700090408325, |
|
"learning_rate": 1.2142595978062159e-05, |
|
"loss": 0.6667, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.9835466179159047, |
|
"grad_norm": 2.861591100692749, |
|
"learning_rate": 1.2120658135283363e-05, |
|
"loss": 0.688, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 2.987202925045704, |
|
"grad_norm": 6.372570037841797, |
|
"learning_rate": 1.2098720292504571e-05, |
|
"loss": 0.7678, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 2.9908592321755028, |
|
"grad_norm": 2.619347333908081, |
|
"learning_rate": 1.2076782449725777e-05, |
|
"loss": 0.6073, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 2.9945155393053016, |
|
"grad_norm": 5.605367183685303, |
|
"learning_rate": 1.2054844606946983e-05, |
|
"loss": 1.0449, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 2.998171846435101, |
|
"grad_norm": 5.622511863708496, |
|
"learning_rate": 1.2032906764168191e-05, |
|
"loss": 0.8425, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.0018281535648996, |
|
"grad_norm": 6.9952712059021, |
|
"learning_rate": 1.2010968921389397e-05, |
|
"loss": 0.6519, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 3.0054844606946984, |
|
"grad_norm": 3.982757806777954, |
|
"learning_rate": 1.1989031078610603e-05, |
|
"loss": 0.6918, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 3.0091407678244972, |
|
"grad_norm": 0.8815748691558838, |
|
"learning_rate": 1.196709323583181e-05, |
|
"loss": 0.7903, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 3.012797074954296, |
|
"grad_norm": 3.3442909717559814, |
|
"learning_rate": 1.1945155393053017e-05, |
|
"loss": 0.8296, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 3.016453382084095, |
|
"grad_norm": 2.6583852767944336, |
|
"learning_rate": 1.1923217550274223e-05, |
|
"loss": 0.6184, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 3.020109689213894, |
|
"grad_norm": 7.427060127258301, |
|
"learning_rate": 1.1901279707495431e-05, |
|
"loss": 0.8173, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 3.023765996343693, |
|
"grad_norm": 2.647944927215576, |
|
"learning_rate": 1.1879341864716636e-05, |
|
"loss": 0.5993, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 3.0274223034734917, |
|
"grad_norm": 4.050746917724609, |
|
"learning_rate": 1.1857404021937844e-05, |
|
"loss": 0.8844, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 3.0310786106032905, |
|
"grad_norm": 3.5873947143554688, |
|
"learning_rate": 1.183546617915905e-05, |
|
"loss": 0.9434, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 3.03473491773309, |
|
"grad_norm": 4.723058223724365, |
|
"learning_rate": 1.1813528336380256e-05, |
|
"loss": 0.9812, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.0383912248628886, |
|
"grad_norm": 3.5461058616638184, |
|
"learning_rate": 1.1791590493601464e-05, |
|
"loss": 0.7292, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 3.0420475319926874, |
|
"grad_norm": 4.339077949523926, |
|
"learning_rate": 1.1769652650822668e-05, |
|
"loss": 0.6777, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 3.045703839122486, |
|
"grad_norm": 5.269365310668945, |
|
"learning_rate": 1.1747714808043876e-05, |
|
"loss": 0.9067, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 3.049360146252285, |
|
"grad_norm": 4.5295562744140625, |
|
"learning_rate": 1.1725776965265082e-05, |
|
"loss": 0.843, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 3.0530164533820843, |
|
"grad_norm": 2.6268155574798584, |
|
"learning_rate": 1.170383912248629e-05, |
|
"loss": 0.6744, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 3.056672760511883, |
|
"grad_norm": 0.5700417757034302, |
|
"learning_rate": 1.1681901279707496e-05, |
|
"loss": 0.5303, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 3.060329067641682, |
|
"grad_norm": 2.430975914001465, |
|
"learning_rate": 1.1659963436928702e-05, |
|
"loss": 0.4397, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 3.0639853747714807, |
|
"grad_norm": 5.6289167404174805, |
|
"learning_rate": 1.1638025594149908e-05, |
|
"loss": 0.844, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 3.0676416819012795, |
|
"grad_norm": 4.169682025909424, |
|
"learning_rate": 1.1616087751371116e-05, |
|
"loss": 0.6566, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 3.0712979890310788, |
|
"grad_norm": 5.4011101722717285, |
|
"learning_rate": 1.1594149908592322e-05, |
|
"loss": 0.8949, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 3.0749542961608776, |
|
"grad_norm": 6.648904323577881, |
|
"learning_rate": 1.157221206581353e-05, |
|
"loss": 1.166, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 3.0786106032906764, |
|
"grad_norm": 6.321312427520752, |
|
"learning_rate": 1.1550274223034735e-05, |
|
"loss": 0.8976, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 3.082266910420475, |
|
"grad_norm": 2.092905044555664, |
|
"learning_rate": 1.152833638025594e-05, |
|
"loss": 0.8257, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 3.0859232175502744, |
|
"grad_norm": 2.951486825942993, |
|
"learning_rate": 1.1506398537477149e-05, |
|
"loss": 0.7798, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 3.0895795246800732, |
|
"grad_norm": 2.4010651111602783, |
|
"learning_rate": 1.1484460694698355e-05, |
|
"loss": 0.8107, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 3.093235831809872, |
|
"grad_norm": 5.102409362792969, |
|
"learning_rate": 1.1462522851919563e-05, |
|
"loss": 0.8601, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 3.096892138939671, |
|
"grad_norm": 4.4188008308410645, |
|
"learning_rate": 1.1440585009140767e-05, |
|
"loss": 0.5271, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 3.1005484460694697, |
|
"grad_norm": 2.7852301597595215, |
|
"learning_rate": 1.1418647166361975e-05, |
|
"loss": 0.5515, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 3.104204753199269, |
|
"grad_norm": 3.6287953853607178, |
|
"learning_rate": 1.1396709323583181e-05, |
|
"loss": 0.9211, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 3.1078610603290677, |
|
"grad_norm": 3.436657428741455, |
|
"learning_rate": 1.1374771480804389e-05, |
|
"loss": 0.7869, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.1115173674588665, |
|
"grad_norm": 2.1031956672668457, |
|
"learning_rate": 1.1352833638025595e-05, |
|
"loss": 0.7247, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 3.1151736745886653, |
|
"grad_norm": 3.4341351985931396, |
|
"learning_rate": 1.1330895795246801e-05, |
|
"loss": 0.747, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 3.118829981718464, |
|
"grad_norm": 5.897623062133789, |
|
"learning_rate": 1.1308957952468007e-05, |
|
"loss": 0.7396, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 3.1224862888482634, |
|
"grad_norm": 6.546688556671143, |
|
"learning_rate": 1.1287020109689213e-05, |
|
"loss": 0.9603, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 3.126142595978062, |
|
"grad_norm": 3.705522060394287, |
|
"learning_rate": 1.1265082266910421e-05, |
|
"loss": 0.8995, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 3.129798903107861, |
|
"grad_norm": 4.903218746185303, |
|
"learning_rate": 1.1243144424131627e-05, |
|
"loss": 0.8082, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 3.13345521023766, |
|
"grad_norm": 3.2400360107421875, |
|
"learning_rate": 1.1221206581352834e-05, |
|
"loss": 0.8776, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 3.137111517367459, |
|
"grad_norm": 5.3413472175598145, |
|
"learning_rate": 1.119926873857404e-05, |
|
"loss": 0.9777, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 3.140767824497258, |
|
"grad_norm": 2.983618974685669, |
|
"learning_rate": 1.1177330895795248e-05, |
|
"loss": 0.8496, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 3.1444241316270567, |
|
"grad_norm": 5.781644821166992, |
|
"learning_rate": 1.1155393053016454e-05, |
|
"loss": 0.8602, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 3.1480804387568555, |
|
"grad_norm": 2.5962939262390137, |
|
"learning_rate": 1.1133455210237662e-05, |
|
"loss": 0.7064, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 3.1517367458866543, |
|
"grad_norm": 1.6077173948287964, |
|
"learning_rate": 1.1111517367458866e-05, |
|
"loss": 0.9999, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 3.1553930530164536, |
|
"grad_norm": 6.301138401031494, |
|
"learning_rate": 1.1089579524680074e-05, |
|
"loss": 0.7141, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 3.1590493601462524, |
|
"grad_norm": 6.716737747192383, |
|
"learning_rate": 1.106764168190128e-05, |
|
"loss": 0.9286, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 3.162705667276051, |
|
"grad_norm": 1.6867204904556274, |
|
"learning_rate": 1.1045703839122488e-05, |
|
"loss": 0.5474, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 3.16636197440585, |
|
"grad_norm": 4.190735340118408, |
|
"learning_rate": 1.1023765996343694e-05, |
|
"loss": 0.8172, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 3.170018281535649, |
|
"grad_norm": 4.85944128036499, |
|
"learning_rate": 1.1001828153564898e-05, |
|
"loss": 0.6575, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 3.173674588665448, |
|
"grad_norm": 3.7237160205841064, |
|
"learning_rate": 1.0979890310786106e-05, |
|
"loss": 0.882, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 3.177330895795247, |
|
"grad_norm": 3.742342710494995, |
|
"learning_rate": 1.0957952468007312e-05, |
|
"loss": 0.6917, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 3.1809872029250457, |
|
"grad_norm": 3.6586384773254395, |
|
"learning_rate": 1.093601462522852e-05, |
|
"loss": 1.279, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 3.1846435100548445, |
|
"grad_norm": 7.146944522857666, |
|
"learning_rate": 1.0914076782449726e-05, |
|
"loss": 0.944, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 3.1882998171846433, |
|
"grad_norm": 4.166520595550537, |
|
"learning_rate": 1.0892138939670932e-05, |
|
"loss": 0.8725, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 3.1919561243144425, |
|
"grad_norm": 3.07065486907959, |
|
"learning_rate": 1.0870201096892139e-05, |
|
"loss": 0.7427, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 3.1956124314442413, |
|
"grad_norm": 3.676762342453003, |
|
"learning_rate": 1.0848263254113346e-05, |
|
"loss": 0.5641, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 3.19926873857404, |
|
"grad_norm": 6.545246124267578, |
|
"learning_rate": 1.0826325411334553e-05, |
|
"loss": 0.8099, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 3.202925045703839, |
|
"grad_norm": 4.962130069732666, |
|
"learning_rate": 1.080438756855576e-05, |
|
"loss": 0.7208, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 3.206581352833638, |
|
"grad_norm": 1.6501739025115967, |
|
"learning_rate": 1.0782449725776965e-05, |
|
"loss": 0.7099, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 3.210237659963437, |
|
"grad_norm": 1.7010256052017212, |
|
"learning_rate": 1.0760511882998171e-05, |
|
"loss": 0.6378, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 3.213893967093236, |
|
"grad_norm": 3.4093239307403564, |
|
"learning_rate": 1.0738574040219379e-05, |
|
"loss": 0.9871, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 3.2175502742230346, |
|
"grad_norm": 3.0757012367248535, |
|
"learning_rate": 1.0716636197440585e-05, |
|
"loss": 0.8088, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 3.2212065813528334, |
|
"grad_norm": 5.524442672729492, |
|
"learning_rate": 1.0694698354661793e-05, |
|
"loss": 0.6628, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 3.2248628884826327, |
|
"grad_norm": 5.470324993133545, |
|
"learning_rate": 1.0672760511882997e-05, |
|
"loss": 0.8868, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 3.2285191956124315, |
|
"grad_norm": 4.4467363357543945, |
|
"learning_rate": 1.0650822669104205e-05, |
|
"loss": 0.8019, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 3.2321755027422303, |
|
"grad_norm": 4.382303714752197, |
|
"learning_rate": 1.0628884826325411e-05, |
|
"loss": 0.7377, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 3.235831809872029, |
|
"grad_norm": 5.965306282043457, |
|
"learning_rate": 1.0606946983546619e-05, |
|
"loss": 0.7711, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 3.2394881170018284, |
|
"grad_norm": 3.7286956310272217, |
|
"learning_rate": 1.0585009140767825e-05, |
|
"loss": 0.6884, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 3.243144424131627, |
|
"grad_norm": 4.183840274810791, |
|
"learning_rate": 1.0563071297989031e-05, |
|
"loss": 0.6954, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 3.246800731261426, |
|
"grad_norm": 2.53548526763916, |
|
"learning_rate": 1.0541133455210237e-05, |
|
"loss": 0.674, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 3.250457038391225, |
|
"grad_norm": 4.073317527770996, |
|
"learning_rate": 1.0519195612431444e-05, |
|
"loss": 0.7438, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 3.2541133455210236, |
|
"grad_norm": 0.9088375568389893, |
|
"learning_rate": 1.0497257769652651e-05, |
|
"loss": 0.4774, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 3.257769652650823, |
|
"grad_norm": 3.897162914276123, |
|
"learning_rate": 1.0475319926873858e-05, |
|
"loss": 0.9991, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 3.2614259597806217, |
|
"grad_norm": 4.331843376159668, |
|
"learning_rate": 1.0453382084095064e-05, |
|
"loss": 0.7728, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 3.2650822669104205, |
|
"grad_norm": 4.146157264709473, |
|
"learning_rate": 1.043144424131627e-05, |
|
"loss": 0.8487, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 3.2687385740402193, |
|
"grad_norm": 3.263507127761841, |
|
"learning_rate": 1.0409506398537478e-05, |
|
"loss": 0.7174, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 3.272394881170018, |
|
"grad_norm": 2.1005804538726807, |
|
"learning_rate": 1.0387568555758684e-05, |
|
"loss": 0.6679, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 3.2760511882998173, |
|
"grad_norm": 5.195742607116699, |
|
"learning_rate": 1.0365630712979892e-05, |
|
"loss": 0.7573, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 3.279707495429616, |
|
"grad_norm": 6.104463577270508, |
|
"learning_rate": 1.0343692870201096e-05, |
|
"loss": 0.9765, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 3.283363802559415, |
|
"grad_norm": 4.647432327270508, |
|
"learning_rate": 1.0321755027422304e-05, |
|
"loss": 0.8738, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 3.2870201096892138, |
|
"grad_norm": 2.8530044555664062, |
|
"learning_rate": 1.029981718464351e-05, |
|
"loss": 0.9031, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 3.2906764168190126, |
|
"grad_norm": 3.9043076038360596, |
|
"learning_rate": 1.0277879341864718e-05, |
|
"loss": 0.8052, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.294332723948812, |
|
"grad_norm": 4.055187225341797, |
|
"learning_rate": 1.0255941499085924e-05, |
|
"loss": 1.1009, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 3.2979890310786106, |
|
"grad_norm": 5.00345516204834, |
|
"learning_rate": 1.0234003656307129e-05, |
|
"loss": 0.6651, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 3.3016453382084094, |
|
"grad_norm": 6.529092788696289, |
|
"learning_rate": 1.0212065813528336e-05, |
|
"loss": 0.8191, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 3.3053016453382082, |
|
"grad_norm": 6.646930694580078, |
|
"learning_rate": 1.0190127970749543e-05, |
|
"loss": 0.8653, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 3.3089579524680075, |
|
"grad_norm": 3.7335169315338135, |
|
"learning_rate": 1.016819012797075e-05, |
|
"loss": 0.5912, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 3.3126142595978063, |
|
"grad_norm": 4.354644298553467, |
|
"learning_rate": 1.0146252285191956e-05, |
|
"loss": 0.8478, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 3.316270566727605, |
|
"grad_norm": 5.461722373962402, |
|
"learning_rate": 1.0124314442413163e-05, |
|
"loss": 0.8978, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 3.319926873857404, |
|
"grad_norm": 5.001184463500977, |
|
"learning_rate": 1.0102376599634369e-05, |
|
"loss": 0.7269, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 3.3235831809872027, |
|
"grad_norm": 6.416454792022705, |
|
"learning_rate": 1.0080438756855577e-05, |
|
"loss": 0.5774, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 3.327239488117002, |
|
"grad_norm": 1.3187748193740845, |
|
"learning_rate": 1.0058500914076783e-05, |
|
"loss": 0.7587, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 3.330895795246801, |
|
"grad_norm": 4.8642120361328125, |
|
"learning_rate": 1.003656307129799e-05, |
|
"loss": 0.5372, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 3.3345521023765996, |
|
"grad_norm": 7.198103904724121, |
|
"learning_rate": 1.0014625228519195e-05, |
|
"loss": 0.7514, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 3.3382084095063984, |
|
"grad_norm": 3.342548131942749, |
|
"learning_rate": 9.992687385740401e-06, |
|
"loss": 0.6805, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 3.3418647166361977, |
|
"grad_norm": 7.126440048217773, |
|
"learning_rate": 9.970749542961609e-06, |
|
"loss": 0.7093, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 3.3455210237659965, |
|
"grad_norm": 3.5442097187042236, |
|
"learning_rate": 9.948811700182815e-06, |
|
"loss": 1.0982, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 3.3491773308957953, |
|
"grad_norm": 6.7846550941467285, |
|
"learning_rate": 9.926873857404023e-06, |
|
"loss": 1.0415, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 3.352833638025594, |
|
"grad_norm": 4.274459362030029, |
|
"learning_rate": 9.904936014625227e-06, |
|
"loss": 0.8924, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 3.356489945155393, |
|
"grad_norm": 1.8313312530517578, |
|
"learning_rate": 9.882998171846435e-06, |
|
"loss": 0.5321, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 3.360146252285192, |
|
"grad_norm": 3.1850969791412354, |
|
"learning_rate": 9.861060329067641e-06, |
|
"loss": 0.5233, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 3.363802559414991, |
|
"grad_norm": 4.866973400115967, |
|
"learning_rate": 9.83912248628885e-06, |
|
"loss": 1.079, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 3.3674588665447898, |
|
"grad_norm": 6.718703269958496, |
|
"learning_rate": 9.817184643510055e-06, |
|
"loss": 0.8018, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 3.3711151736745886, |
|
"grad_norm": 2.3948628902435303, |
|
"learning_rate": 9.795246800731262e-06, |
|
"loss": 0.7269, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 3.3747714808043874, |
|
"grad_norm": 5.219935417175293, |
|
"learning_rate": 9.773308957952468e-06, |
|
"loss": 1.0058, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 3.3784277879341866, |
|
"grad_norm": 2.0924437046051025, |
|
"learning_rate": 9.751371115173675e-06, |
|
"loss": 0.6634, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 3.3820840950639854, |
|
"grad_norm": 3.7175605297088623, |
|
"learning_rate": 9.729433272394882e-06, |
|
"loss": 0.7466, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 3.3857404021937842, |
|
"grad_norm": 2.551532745361328, |
|
"learning_rate": 9.707495429616088e-06, |
|
"loss": 0.5294, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 3.389396709323583, |
|
"grad_norm": 4.496357440948486, |
|
"learning_rate": 9.685557586837294e-06, |
|
"loss": 0.6347, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 3.393053016453382, |
|
"grad_norm": 3.644022226333618, |
|
"learning_rate": 9.6636197440585e-06, |
|
"loss": 0.6684, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 3.396709323583181, |
|
"grad_norm": 7.155831336975098, |
|
"learning_rate": 9.641681901279708e-06, |
|
"loss": 0.6775, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 3.40036563071298, |
|
"grad_norm": 2.396113872528076, |
|
"learning_rate": 9.619744058500914e-06, |
|
"loss": 0.9559, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 3.4040219378427787, |
|
"grad_norm": 4.719156742095947, |
|
"learning_rate": 9.597806215722122e-06, |
|
"loss": 0.7062, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 3.4076782449725775, |
|
"grad_norm": 6.176454544067383, |
|
"learning_rate": 9.575868372943328e-06, |
|
"loss": 0.6693, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 3.411334552102377, |
|
"grad_norm": 5.314862251281738, |
|
"learning_rate": 9.553930530164534e-06, |
|
"loss": 0.7944, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 3.4149908592321756, |
|
"grad_norm": 3.4913902282714844, |
|
"learning_rate": 9.53199268738574e-06, |
|
"loss": 0.648, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 3.4186471663619744, |
|
"grad_norm": 5.6252217292785645, |
|
"learning_rate": 9.510054844606948e-06, |
|
"loss": 1.126, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 3.422303473491773, |
|
"grad_norm": 2.5324652194976807, |
|
"learning_rate": 9.488117001828154e-06, |
|
"loss": 0.6604, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 3.425959780621572, |
|
"grad_norm": 1.6346749067306519, |
|
"learning_rate": 9.46617915904936e-06, |
|
"loss": 0.7154, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 3.4296160877513713, |
|
"grad_norm": 2.9343535900115967, |
|
"learning_rate": 9.444241316270567e-06, |
|
"loss": 0.8223, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 3.43327239488117, |
|
"grad_norm": 5.402102947235107, |
|
"learning_rate": 9.422303473491773e-06, |
|
"loss": 1.0261, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 3.436928702010969, |
|
"grad_norm": 4.360336780548096, |
|
"learning_rate": 9.40036563071298e-06, |
|
"loss": 0.8469, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 3.4405850091407677, |
|
"grad_norm": 2.100147008895874, |
|
"learning_rate": 9.378427787934187e-06, |
|
"loss": 0.8319, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 3.444241316270567, |
|
"grad_norm": 5.960880279541016, |
|
"learning_rate": 9.356489945155395e-06, |
|
"loss": 0.8822, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 3.4478976234003658, |
|
"grad_norm": 1.511212706565857, |
|
"learning_rate": 9.334552102376599e-06, |
|
"loss": 0.6203, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 3.4515539305301646, |
|
"grad_norm": 6.034298896789551, |
|
"learning_rate": 9.312614259597807e-06, |
|
"loss": 1.0723, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 3.4552102376599634, |
|
"grad_norm": 3.4445579051971436, |
|
"learning_rate": 9.290676416819013e-06, |
|
"loss": 0.8172, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 3.458866544789762, |
|
"grad_norm": 3.7017529010772705, |
|
"learning_rate": 9.26873857404022e-06, |
|
"loss": 0.8642, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 3.4625228519195614, |
|
"grad_norm": 1.1782617568969727, |
|
"learning_rate": 9.246800731261427e-06, |
|
"loss": 0.738, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 3.4661791590493602, |
|
"grad_norm": 3.201063394546509, |
|
"learning_rate": 9.224862888482633e-06, |
|
"loss": 0.658, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 3.469835466179159, |
|
"grad_norm": 6.244758605957031, |
|
"learning_rate": 9.20292504570384e-06, |
|
"loss": 0.8515, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 3.473491773308958, |
|
"grad_norm": 1.5054762363433838, |
|
"learning_rate": 9.180987202925045e-06, |
|
"loss": 0.6815, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.4771480804387567, |
|
"grad_norm": 4.566993236541748, |
|
"learning_rate": 9.159049360146253e-06, |
|
"loss": 0.9876, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 3.480804387568556, |
|
"grad_norm": 2.5225489139556885, |
|
"learning_rate": 9.13711151736746e-06, |
|
"loss": 0.6651, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 3.4844606946983547, |
|
"grad_norm": 2.050199031829834, |
|
"learning_rate": 9.115173674588665e-06, |
|
"loss": 0.5577, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 3.4881170018281535, |
|
"grad_norm": 4.673213958740234, |
|
"learning_rate": 9.093235831809872e-06, |
|
"loss": 0.4605, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 3.4917733089579523, |
|
"grad_norm": 3.7386956214904785, |
|
"learning_rate": 9.07129798903108e-06, |
|
"loss": 0.7403, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 3.495429616087751, |
|
"grad_norm": 3.0746006965637207, |
|
"learning_rate": 9.049360146252286e-06, |
|
"loss": 0.7544, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 3.4990859232175504, |
|
"grad_norm": 2.793351650238037, |
|
"learning_rate": 9.027422303473493e-06, |
|
"loss": 0.6867, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 3.502742230347349, |
|
"grad_norm": 2.7322490215301514, |
|
"learning_rate": 9.005484460694698e-06, |
|
"loss": 0.5481, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 3.506398537477148, |
|
"grad_norm": 5.938803195953369, |
|
"learning_rate": 8.983546617915906e-06, |
|
"loss": 0.7047, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 3.510054844606947, |
|
"grad_norm": 4.601770877838135, |
|
"learning_rate": 8.961608775137112e-06, |
|
"loss": 0.9434, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 3.5137111517367456, |
|
"grad_norm": 4.575321674346924, |
|
"learning_rate": 8.939670932358318e-06, |
|
"loss": 0.9526, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 3.517367458866545, |
|
"grad_norm": 2.3321361541748047, |
|
"learning_rate": 8.917733089579526e-06, |
|
"loss": 0.8838, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 3.5210237659963437, |
|
"grad_norm": 4.160899639129639, |
|
"learning_rate": 8.89579524680073e-06, |
|
"loss": 0.7722, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 3.5246800731261425, |
|
"grad_norm": 4.240328311920166, |
|
"learning_rate": 8.873857404021938e-06, |
|
"loss": 0.8319, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 3.5283363802559418, |
|
"grad_norm": 5.453382968902588, |
|
"learning_rate": 8.851919561243144e-06, |
|
"loss": 0.7496, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 3.53199268738574, |
|
"grad_norm": 4.4032087326049805, |
|
"learning_rate": 8.829981718464352e-06, |
|
"loss": 0.7413, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 3.5356489945155394, |
|
"grad_norm": 1.5674322843551636, |
|
"learning_rate": 8.808043875685558e-06, |
|
"loss": 0.7772, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 3.539305301645338, |
|
"grad_norm": 1.919179916381836, |
|
"learning_rate": 8.786106032906764e-06, |
|
"loss": 0.6127, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 3.542961608775137, |
|
"grad_norm": 5.616965293884277, |
|
"learning_rate": 8.76416819012797e-06, |
|
"loss": 1.054, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 3.5466179159049362, |
|
"grad_norm": 4.339515209197998, |
|
"learning_rate": 8.742230347349178e-06, |
|
"loss": 0.8764, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 3.550274223034735, |
|
"grad_norm": 2.599030017852783, |
|
"learning_rate": 8.720292504570384e-06, |
|
"loss": 0.6655, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 3.553930530164534, |
|
"grad_norm": 7.379239082336426, |
|
"learning_rate": 8.69835466179159e-06, |
|
"loss": 0.8186, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 3.5575868372943327, |
|
"grad_norm": 5.922464847564697, |
|
"learning_rate": 8.676416819012797e-06, |
|
"loss": 0.7687, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 3.5612431444241315, |
|
"grad_norm": 1.1867303848266602, |
|
"learning_rate": 8.654478976234003e-06, |
|
"loss": 0.7539, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 3.5648994515539307, |
|
"grad_norm": 3.390425205230713, |
|
"learning_rate": 8.63254113345521e-06, |
|
"loss": 0.6565, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 3.5685557586837295, |
|
"grad_norm": 3.1860547065734863, |
|
"learning_rate": 8.610603290676417e-06, |
|
"loss": 0.6849, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 3.5722120658135283, |
|
"grad_norm": 2.4596757888793945, |
|
"learning_rate": 8.588665447897625e-06, |
|
"loss": 0.7395, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 3.575868372943327, |
|
"grad_norm": 2.9441282749176025, |
|
"learning_rate": 8.566727605118829e-06, |
|
"loss": 0.5293, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 3.579524680073126, |
|
"grad_norm": 1.4628350734710693, |
|
"learning_rate": 8.544789762340037e-06, |
|
"loss": 0.8293, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 3.583180987202925, |
|
"grad_norm": 7.661937236785889, |
|
"learning_rate": 8.522851919561243e-06, |
|
"loss": 0.7429, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 3.586837294332724, |
|
"grad_norm": 2.91107177734375, |
|
"learning_rate": 8.500914076782451e-06, |
|
"loss": 0.6905, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 3.590493601462523, |
|
"grad_norm": 1.8382437229156494, |
|
"learning_rate": 8.478976234003657e-06, |
|
"loss": 0.7484, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 3.5941499085923216, |
|
"grad_norm": 5.709616661071777, |
|
"learning_rate": 8.457038391224863e-06, |
|
"loss": 0.9248, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 3.5978062157221204, |
|
"grad_norm": 4.454899311065674, |
|
"learning_rate": 8.43510054844607e-06, |
|
"loss": 0.6578, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 3.6014625228519197, |
|
"grad_norm": 6.460973739624023, |
|
"learning_rate": 8.413162705667276e-06, |
|
"loss": 0.8633, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 3.6051188299817185, |
|
"grad_norm": 2.352285146713257, |
|
"learning_rate": 8.391224862888483e-06, |
|
"loss": 0.6608, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 3.6087751371115173, |
|
"grad_norm": 2.8091228008270264, |
|
"learning_rate": 8.36928702010969e-06, |
|
"loss": 0.642, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 3.612431444241316, |
|
"grad_norm": 2.4271621704101562, |
|
"learning_rate": 8.347349177330896e-06, |
|
"loss": 0.5951, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 3.616087751371115, |
|
"grad_norm": 5.804758548736572, |
|
"learning_rate": 8.325411334552102e-06, |
|
"loss": 0.6621, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 3.619744058500914, |
|
"grad_norm": 3.8473427295684814, |
|
"learning_rate": 8.30347349177331e-06, |
|
"loss": 0.6347, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 3.623400365630713, |
|
"grad_norm": 3.387230396270752, |
|
"learning_rate": 8.281535648994516e-06, |
|
"loss": 0.7107, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 3.627056672760512, |
|
"grad_norm": 7.850528240203857, |
|
"learning_rate": 8.259597806215724e-06, |
|
"loss": 0.7608, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 3.630712979890311, |
|
"grad_norm": 4.779109954833984, |
|
"learning_rate": 8.237659963436928e-06, |
|
"loss": 0.9004, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 3.6343692870201094, |
|
"grad_norm": 7.75559139251709, |
|
"learning_rate": 8.215722120658136e-06, |
|
"loss": 0.9884, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 3.6380255941499087, |
|
"grad_norm": 3.2816567420959473, |
|
"learning_rate": 8.193784277879342e-06, |
|
"loss": 0.9046, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 3.6416819012797075, |
|
"grad_norm": 3.8553521633148193, |
|
"learning_rate": 8.171846435100548e-06, |
|
"loss": 0.6122, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 3.6453382084095063, |
|
"grad_norm": 4.713034152984619, |
|
"learning_rate": 8.149908592321756e-06, |
|
"loss": 0.7977, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 3.6489945155393055, |
|
"grad_norm": 8.331437110900879, |
|
"learning_rate": 8.12797074954296e-06, |
|
"loss": 0.7995, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 3.6526508226691043, |
|
"grad_norm": 2.3194291591644287, |
|
"learning_rate": 8.106032906764168e-06, |
|
"loss": 0.9511, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 3.656307129798903, |
|
"grad_norm": 5.6562676429748535, |
|
"learning_rate": 8.084095063985374e-06, |
|
"loss": 0.7415, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.659963436928702, |
|
"grad_norm": 3.207094192504883, |
|
"learning_rate": 8.062157221206582e-06, |
|
"loss": 0.7371, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 3.6636197440585008, |
|
"grad_norm": 5.320219993591309, |
|
"learning_rate": 8.040219378427788e-06, |
|
"loss": 0.8301, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 3.6672760511883, |
|
"grad_norm": 3.936784505844116, |
|
"learning_rate": 8.018281535648995e-06, |
|
"loss": 0.759, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 3.670932358318099, |
|
"grad_norm": 1.9420430660247803, |
|
"learning_rate": 7.9963436928702e-06, |
|
"loss": 0.9414, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 3.6745886654478976, |
|
"grad_norm": 5.9929728507995605, |
|
"learning_rate": 7.974405850091408e-06, |
|
"loss": 0.6761, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 3.6782449725776964, |
|
"grad_norm": 5.185636520385742, |
|
"learning_rate": 7.952468007312615e-06, |
|
"loss": 0.775, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 3.6819012797074953, |
|
"grad_norm": 2.565422534942627, |
|
"learning_rate": 7.930530164533822e-06, |
|
"loss": 0.7599, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 3.6855575868372945, |
|
"grad_norm": 6.941178321838379, |
|
"learning_rate": 7.908592321755027e-06, |
|
"loss": 0.6673, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 3.6892138939670933, |
|
"grad_norm": 3.0745110511779785, |
|
"learning_rate": 7.886654478976233e-06, |
|
"loss": 0.685, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 3.692870201096892, |
|
"grad_norm": 4.359233379364014, |
|
"learning_rate": 7.864716636197441e-06, |
|
"loss": 0.7306, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.696526508226691, |
|
"grad_norm": 2.1655170917510986, |
|
"learning_rate": 7.842778793418647e-06, |
|
"loss": 0.6061, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 3.7001828153564897, |
|
"grad_norm": 2.5100502967834473, |
|
"learning_rate": 7.820840950639855e-06, |
|
"loss": 0.5755, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 3.703839122486289, |
|
"grad_norm": 2.577319383621216, |
|
"learning_rate": 7.801096892138939e-06, |
|
"loss": 0.8511, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 3.707495429616088, |
|
"grad_norm": 4.023679733276367, |
|
"learning_rate": 7.779159049360147e-06, |
|
"loss": 0.9649, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 3.7111517367458866, |
|
"grad_norm": 3.2172110080718994, |
|
"learning_rate": 7.757221206581353e-06, |
|
"loss": 0.9499, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 3.7148080438756854, |
|
"grad_norm": 4.36275053024292, |
|
"learning_rate": 7.73528336380256e-06, |
|
"loss": 0.7763, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 3.7184643510054842, |
|
"grad_norm": 4.072483062744141, |
|
"learning_rate": 7.713345521023765e-06, |
|
"loss": 0.7541, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 3.7221206581352835, |
|
"grad_norm": 4.370612144470215, |
|
"learning_rate": 7.691407678244973e-06, |
|
"loss": 1.0629, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 3.7257769652650823, |
|
"grad_norm": 3.0197012424468994, |
|
"learning_rate": 7.669469835466179e-06, |
|
"loss": 0.6874, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 3.729433272394881, |
|
"grad_norm": 2.190140962600708, |
|
"learning_rate": 7.647531992687387e-06, |
|
"loss": 0.784, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.7330895795246803, |
|
"grad_norm": 1.6328208446502686, |
|
"learning_rate": 7.625594149908592e-06, |
|
"loss": 0.7953, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 3.7367458866544787, |
|
"grad_norm": 4.16575288772583, |
|
"learning_rate": 7.6036563071298e-06, |
|
"loss": 0.8314, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 3.740402193784278, |
|
"grad_norm": 6.011321067810059, |
|
"learning_rate": 7.581718464351006e-06, |
|
"loss": 0.6144, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 3.7440585009140768, |
|
"grad_norm": 4.7472710609436035, |
|
"learning_rate": 7.559780621572211e-06, |
|
"loss": 0.7889, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 3.7477148080438756, |
|
"grad_norm": 2.6220803260803223, |
|
"learning_rate": 7.537842778793419e-06, |
|
"loss": 0.7036, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 3.751371115173675, |
|
"grad_norm": 2.190154552459717, |
|
"learning_rate": 7.5159049360146245e-06, |
|
"loss": 0.9437, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 3.7550274223034736, |
|
"grad_norm": 4.362695693969727, |
|
"learning_rate": 7.493967093235832e-06, |
|
"loss": 0.8136, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 3.7586837294332724, |
|
"grad_norm": 3.7511837482452393, |
|
"learning_rate": 7.472029250457039e-06, |
|
"loss": 0.7365, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 3.7623400365630713, |
|
"grad_norm": 2.2203571796417236, |
|
"learning_rate": 7.450091407678245e-06, |
|
"loss": 0.6625, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 3.76599634369287, |
|
"grad_norm": 4.447721004486084, |
|
"learning_rate": 7.4281535648994516e-06, |
|
"loss": 0.8594, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.7696526508226693, |
|
"grad_norm": 5.554366111755371, |
|
"learning_rate": 7.406215722120658e-06, |
|
"loss": 0.891, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 3.773308957952468, |
|
"grad_norm": 5.551204681396484, |
|
"learning_rate": 7.384277879341865e-06, |
|
"loss": 0.6996, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 3.776965265082267, |
|
"grad_norm": 2.1783394813537598, |
|
"learning_rate": 7.362340036563072e-06, |
|
"loss": 0.7032, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 3.7806215722120657, |
|
"grad_norm": 2.8184330463409424, |
|
"learning_rate": 7.340402193784278e-06, |
|
"loss": 0.7221, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 3.7842778793418645, |
|
"grad_norm": 3.869269609451294, |
|
"learning_rate": 7.318464351005485e-06, |
|
"loss": 0.8456, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 3.787934186471664, |
|
"grad_norm": 1.1639561653137207, |
|
"learning_rate": 7.296526508226691e-06, |
|
"loss": 0.9325, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 3.7915904936014626, |
|
"grad_norm": 2.3072006702423096, |
|
"learning_rate": 7.274588665447898e-06, |
|
"loss": 0.6822, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 3.7952468007312614, |
|
"grad_norm": 4.453368186950684, |
|
"learning_rate": 7.252650822669105e-06, |
|
"loss": 0.6115, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 3.7989031078610602, |
|
"grad_norm": 2.4103519916534424, |
|
"learning_rate": 7.230712979890311e-06, |
|
"loss": 0.7449, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 3.802559414990859, |
|
"grad_norm": 5.65090274810791, |
|
"learning_rate": 7.208775137111518e-06, |
|
"loss": 0.9821, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.8062157221206583, |
|
"grad_norm": 4.507080078125, |
|
"learning_rate": 7.186837294332723e-06, |
|
"loss": 0.7213, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 3.809872029250457, |
|
"grad_norm": 2.4969277381896973, |
|
"learning_rate": 7.16489945155393e-06, |
|
"loss": 1.2508, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 3.813528336380256, |
|
"grad_norm": 1.090476393699646, |
|
"learning_rate": 7.142961608775137e-06, |
|
"loss": 0.6157, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 3.8171846435100547, |
|
"grad_norm": 2.3121488094329834, |
|
"learning_rate": 7.1210237659963435e-06, |
|
"loss": 0.816, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 3.8208409506398535, |
|
"grad_norm": 3.1048355102539062, |
|
"learning_rate": 7.0990859232175505e-06, |
|
"loss": 0.5809, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 3.8244972577696528, |
|
"grad_norm": 4.020531177520752, |
|
"learning_rate": 7.077148080438757e-06, |
|
"loss": 0.9782, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 3.8281535648994516, |
|
"grad_norm": 3.6427266597747803, |
|
"learning_rate": 7.055210237659964e-06, |
|
"loss": 0.6038, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 3.8318098720292504, |
|
"grad_norm": 4.342096328735352, |
|
"learning_rate": 7.033272394881171e-06, |
|
"loss": 1.2473, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 3.835466179159049, |
|
"grad_norm": 3.162109136581421, |
|
"learning_rate": 7.011334552102377e-06, |
|
"loss": 0.6195, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 3.839122486288848, |
|
"grad_norm": 2.9012115001678467, |
|
"learning_rate": 6.989396709323584e-06, |
|
"loss": 0.8822, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.8427787934186473, |
|
"grad_norm": 6.881933212280273, |
|
"learning_rate": 6.96745886654479e-06, |
|
"loss": 0.8352, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 3.846435100548446, |
|
"grad_norm": 6.350467681884766, |
|
"learning_rate": 6.945521023765997e-06, |
|
"loss": 0.9048, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 3.850091407678245, |
|
"grad_norm": 2.833682060241699, |
|
"learning_rate": 6.923583180987203e-06, |
|
"loss": 0.8362, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 3.853747714808044, |
|
"grad_norm": 5.460103511810303, |
|
"learning_rate": 6.901645338208409e-06, |
|
"loss": 0.5568, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 3.857404021937843, |
|
"grad_norm": 2.551905870437622, |
|
"learning_rate": 6.879707495429616e-06, |
|
"loss": 0.8165, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 3.8610603290676417, |
|
"grad_norm": 4.430031776428223, |
|
"learning_rate": 6.857769652650823e-06, |
|
"loss": 0.9246, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 3.8647166361974405, |
|
"grad_norm": 2.5683767795562744, |
|
"learning_rate": 6.835831809872029e-06, |
|
"loss": 0.7689, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 3.8683729433272394, |
|
"grad_norm": 2.5122482776641846, |
|
"learning_rate": 6.813893967093236e-06, |
|
"loss": 0.8126, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 3.8720292504570386, |
|
"grad_norm": 3.8249447345733643, |
|
"learning_rate": 6.791956124314442e-06, |
|
"loss": 0.6062, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 3.8756855575868374, |
|
"grad_norm": 3.5439441204071045, |
|
"learning_rate": 6.770018281535649e-06, |
|
"loss": 0.621, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.8793418647166362, |
|
"grad_norm": 4.30275297164917, |
|
"learning_rate": 6.748080438756856e-06, |
|
"loss": 0.8285, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 3.882998171846435, |
|
"grad_norm": 4.716472148895264, |
|
"learning_rate": 6.7261425959780625e-06, |
|
"loss": 0.5947, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 3.886654478976234, |
|
"grad_norm": 5.69554328918457, |
|
"learning_rate": 6.7042047531992695e-06, |
|
"loss": 0.6366, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 3.890310786106033, |
|
"grad_norm": 6.0481133460998535, |
|
"learning_rate": 6.682266910420476e-06, |
|
"loss": 0.6759, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 3.893967093235832, |
|
"grad_norm": 5.054582118988037, |
|
"learning_rate": 6.660329067641682e-06, |
|
"loss": 0.9449, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 3.8976234003656307, |
|
"grad_norm": 4.874343395233154, |
|
"learning_rate": 6.638391224862889e-06, |
|
"loss": 0.542, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 3.9012797074954295, |
|
"grad_norm": 1.556717872619629, |
|
"learning_rate": 6.616453382084095e-06, |
|
"loss": 0.9563, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 3.9049360146252283, |
|
"grad_norm": 1.8250552415847778, |
|
"learning_rate": 6.594515539305302e-06, |
|
"loss": 0.6699, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 3.9085923217550276, |
|
"grad_norm": 1.2335312366485596, |
|
"learning_rate": 6.572577696526508e-06, |
|
"loss": 0.8833, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 3.9122486288848264, |
|
"grad_norm": 4.553168296813965, |
|
"learning_rate": 6.550639853747715e-06, |
|
"loss": 0.7619, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.915904936014625, |
|
"grad_norm": 5.518167495727539, |
|
"learning_rate": 6.528702010968922e-06, |
|
"loss": 0.9136, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 3.919561243144424, |
|
"grad_norm": 4.577470302581787, |
|
"learning_rate": 6.506764168190128e-06, |
|
"loss": 0.8478, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 3.923217550274223, |
|
"grad_norm": 5.852701663970947, |
|
"learning_rate": 6.484826325411335e-06, |
|
"loss": 0.9866, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 3.926873857404022, |
|
"grad_norm": 2.7787961959838867, |
|
"learning_rate": 6.462888482632541e-06, |
|
"loss": 0.788, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 3.930530164533821, |
|
"grad_norm": 5.320887565612793, |
|
"learning_rate": 6.440950639853748e-06, |
|
"loss": 0.8878, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 3.9341864716636197, |
|
"grad_norm": 5.620364665985107, |
|
"learning_rate": 6.419012797074955e-06, |
|
"loss": 0.7458, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 3.9378427787934185, |
|
"grad_norm": 4.398257732391357, |
|
"learning_rate": 6.3970749542961605e-06, |
|
"loss": 0.7291, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 3.9414990859232173, |
|
"grad_norm": 1.9630357027053833, |
|
"learning_rate": 6.3751371115173675e-06, |
|
"loss": 0.8595, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 3.9451553930530165, |
|
"grad_norm": 3.069357395172119, |
|
"learning_rate": 6.353199268738574e-06, |
|
"loss": 0.5498, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 3.9488117001828154, |
|
"grad_norm": 8.382603645324707, |
|
"learning_rate": 6.331261425959781e-06, |
|
"loss": 1.0477, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.952468007312614, |
|
"grad_norm": 2.2028815746307373, |
|
"learning_rate": 6.309323583180988e-06, |
|
"loss": 0.7652, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 3.9561243144424134, |
|
"grad_norm": 5.587583541870117, |
|
"learning_rate": 6.287385740402194e-06, |
|
"loss": 0.777, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 3.9597806215722122, |
|
"grad_norm": 4.032431602478027, |
|
"learning_rate": 6.265447897623401e-06, |
|
"loss": 0.7834, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 3.963436928702011, |
|
"grad_norm": 3.680415630340576, |
|
"learning_rate": 6.243510054844607e-06, |
|
"loss": 0.5833, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 3.96709323583181, |
|
"grad_norm": 2.4800500869750977, |
|
"learning_rate": 6.221572212065814e-06, |
|
"loss": 0.858, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 3.9707495429616086, |
|
"grad_norm": 4.882104873657227, |
|
"learning_rate": 6.199634369287021e-06, |
|
"loss": 0.8244, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 3.974405850091408, |
|
"grad_norm": 5.2901411056518555, |
|
"learning_rate": 6.177696526508227e-06, |
|
"loss": 0.8658, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 3.9780621572212067, |
|
"grad_norm": 7.267496109008789, |
|
"learning_rate": 6.155758683729433e-06, |
|
"loss": 0.7934, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 3.9817184643510055, |
|
"grad_norm": 5.89931058883667, |
|
"learning_rate": 6.133820840950639e-06, |
|
"loss": 0.7339, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 3.9853747714808043, |
|
"grad_norm": 5.361083507537842, |
|
"learning_rate": 6.111882998171846e-06, |
|
"loss": 0.8397, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.989031078610603, |
|
"grad_norm": 3.948314666748047, |
|
"learning_rate": 6.089945155393053e-06, |
|
"loss": 0.7595, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 3.9926873857404024, |
|
"grad_norm": 1.0359902381896973, |
|
"learning_rate": 6.068007312614259e-06, |
|
"loss": 0.6643, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 3.996343692870201, |
|
"grad_norm": 5.438472270965576, |
|
"learning_rate": 6.046069469835466e-06, |
|
"loss": 0.8025, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.690487384796143, |
|
"learning_rate": 6.0241316270566725e-06, |
|
"loss": 0.8951, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 4.003656307129799, |
|
"grad_norm": 6.72605562210083, |
|
"learning_rate": 6.0021937842778795e-06, |
|
"loss": 0.7852, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 4.007312614259598, |
|
"grad_norm": 6.367304801940918, |
|
"learning_rate": 5.9802559414990865e-06, |
|
"loss": 0.9468, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 4.010968921389397, |
|
"grad_norm": 4.209175109863281, |
|
"learning_rate": 5.958318098720293e-06, |
|
"loss": 0.7559, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 4.014625228519195, |
|
"grad_norm": 2.612675428390503, |
|
"learning_rate": 5.9363802559415e-06, |
|
"loss": 0.6464, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 4.0182815356489945, |
|
"grad_norm": 3.516434907913208, |
|
"learning_rate": 5.914442413162706e-06, |
|
"loss": 0.761, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 4.021937842778794, |
|
"grad_norm": 3.522313117980957, |
|
"learning_rate": 5.892504570383912e-06, |
|
"loss": 0.9033, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.025594149908592, |
|
"grad_norm": 3.2648613452911377, |
|
"learning_rate": 5.870566727605119e-06, |
|
"loss": 0.5933, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 4.029250457038391, |
|
"grad_norm": 4.611745357513428, |
|
"learning_rate": 5.848628884826325e-06, |
|
"loss": 1.1358, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 4.03290676416819, |
|
"grad_norm": 2.9652693271636963, |
|
"learning_rate": 5.826691042047532e-06, |
|
"loss": 0.7771, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 4.036563071297989, |
|
"grad_norm": 4.490486145019531, |
|
"learning_rate": 5.804753199268738e-06, |
|
"loss": 0.8091, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 4.040219378427788, |
|
"grad_norm": 4.961881637573242, |
|
"learning_rate": 5.782815356489945e-06, |
|
"loss": 0.7322, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 4.043875685557587, |
|
"grad_norm": 4.714334011077881, |
|
"learning_rate": 5.760877513711152e-06, |
|
"loss": 0.6583, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 4.047531992687386, |
|
"grad_norm": 4.1526570320129395, |
|
"learning_rate": 5.738939670932358e-06, |
|
"loss": 0.9548, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 4.051188299817184, |
|
"grad_norm": 6.9063239097595215, |
|
"learning_rate": 5.717001828153565e-06, |
|
"loss": 1.1938, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 4.0548446069469835, |
|
"grad_norm": 4.847267150878906, |
|
"learning_rate": 5.6950639853747714e-06, |
|
"loss": 0.7049, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 4.058500914076783, |
|
"grad_norm": 4.072165489196777, |
|
"learning_rate": 5.6731261425959784e-06, |
|
"loss": 0.5814, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 4.062157221206581, |
|
"grad_norm": 7.266864776611328, |
|
"learning_rate": 5.651188299817185e-06, |
|
"loss": 0.9592, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 4.06581352833638, |
|
"grad_norm": 4.926406383514404, |
|
"learning_rate": 5.629250457038391e-06, |
|
"loss": 0.5798, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 4.06946983546618, |
|
"grad_norm": 5.232889175415039, |
|
"learning_rate": 5.607312614259598e-06, |
|
"loss": 0.7774, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 4.073126142595978, |
|
"grad_norm": 2.904597759246826, |
|
"learning_rate": 5.585374771480805e-06, |
|
"loss": 0.9523, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 4.076782449725777, |
|
"grad_norm": 2.809514045715332, |
|
"learning_rate": 5.563436928702011e-06, |
|
"loss": 0.9262, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 4.0804387568555756, |
|
"grad_norm": 3.8771932125091553, |
|
"learning_rate": 5.541499085923218e-06, |
|
"loss": 0.6266, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 4.084095063985375, |
|
"grad_norm": 1.681246280670166, |
|
"learning_rate": 5.519561243144424e-06, |
|
"loss": 0.8984, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 4.087751371115174, |
|
"grad_norm": 5.3567795753479, |
|
"learning_rate": 5.497623400365631e-06, |
|
"loss": 0.8853, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 4.091407678244972, |
|
"grad_norm": 4.239979267120361, |
|
"learning_rate": 5.475685557586838e-06, |
|
"loss": 0.8099, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 4.095063985374772, |
|
"grad_norm": 1.681639313697815, |
|
"learning_rate": 5.453747714808044e-06, |
|
"loss": 0.582, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 4.09872029250457, |
|
"grad_norm": 5.81494140625, |
|
"learning_rate": 5.431809872029251e-06, |
|
"loss": 0.7281, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 4.102376599634369, |
|
"grad_norm": 4.564912796020508, |
|
"learning_rate": 5.409872029250457e-06, |
|
"loss": 0.7389, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 4.1060329067641685, |
|
"grad_norm": 3.4300804138183594, |
|
"learning_rate": 5.387934186471664e-06, |
|
"loss": 0.6513, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 4.109689213893967, |
|
"grad_norm": 3.141324281692505, |
|
"learning_rate": 5.36599634369287e-06, |
|
"loss": 0.9758, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 4.113345521023766, |
|
"grad_norm": 1.9948968887329102, |
|
"learning_rate": 5.3440585009140765e-06, |
|
"loss": 0.8143, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 4.1170018281535645, |
|
"grad_norm": 0.9731669425964355, |
|
"learning_rate": 5.3221206581352835e-06, |
|
"loss": 0.6493, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 4.120658135283364, |
|
"grad_norm": 3.1560752391815186, |
|
"learning_rate": 5.30018281535649e-06, |
|
"loss": 0.7211, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 4.124314442413163, |
|
"grad_norm": 1.2107890844345093, |
|
"learning_rate": 5.278244972577697e-06, |
|
"loss": 0.7278, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 4.127970749542961, |
|
"grad_norm": 1.0509763956069946, |
|
"learning_rate": 5.256307129798904e-06, |
|
"loss": 0.5858, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 4.131627056672761, |
|
"grad_norm": 3.70460844039917, |
|
"learning_rate": 5.23436928702011e-06, |
|
"loss": 0.8024, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 4.135283363802559, |
|
"grad_norm": 4.243873596191406, |
|
"learning_rate": 5.212431444241317e-06, |
|
"loss": 1.0335, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 4.138939670932358, |
|
"grad_norm": 4.228180885314941, |
|
"learning_rate": 5.190493601462523e-06, |
|
"loss": 0.865, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 4.1425959780621575, |
|
"grad_norm": 8.000550270080566, |
|
"learning_rate": 5.16855575868373e-06, |
|
"loss": 0.9858, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 4.146252285191956, |
|
"grad_norm": 7.326601505279541, |
|
"learning_rate": 5.146617915904937e-06, |
|
"loss": 1.1155, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 4.149908592321755, |
|
"grad_norm": 6.109528064727783, |
|
"learning_rate": 5.124680073126143e-06, |
|
"loss": 0.5274, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 4.153564899451554, |
|
"grad_norm": 3.239499568939209, |
|
"learning_rate": 5.102742230347349e-06, |
|
"loss": 0.9185, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 4.157221206581353, |
|
"grad_norm": 5.765626430511475, |
|
"learning_rate": 5.080804387568555e-06, |
|
"loss": 0.6147, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 4.160877513711152, |
|
"grad_norm": 3.189391613006592, |
|
"learning_rate": 5.058866544789762e-06, |
|
"loss": 0.7233, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 4.16453382084095, |
|
"grad_norm": 5.938801288604736, |
|
"learning_rate": 5.036928702010969e-06, |
|
"loss": 0.7033, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 4.16819012797075, |
|
"grad_norm": 1.7474747896194458, |
|
"learning_rate": 5.014990859232175e-06, |
|
"loss": 0.644, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 4.171846435100549, |
|
"grad_norm": 2.9664547443389893, |
|
"learning_rate": 4.993053016453382e-06, |
|
"loss": 0.7644, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 4.175502742230347, |
|
"grad_norm": 3.7296855449676514, |
|
"learning_rate": 4.9711151736745885e-06, |
|
"loss": 0.8427, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 4.1791590493601465, |
|
"grad_norm": 5.18561315536499, |
|
"learning_rate": 4.9491773308957955e-06, |
|
"loss": 0.6164, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 4.182815356489945, |
|
"grad_norm": 4.443209648132324, |
|
"learning_rate": 4.9272394881170025e-06, |
|
"loss": 0.6032, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 4.186471663619744, |
|
"grad_norm": 5.131235599517822, |
|
"learning_rate": 4.905301645338209e-06, |
|
"loss": 0.9402, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 4.190127970749543, |
|
"grad_norm": 7.778912544250488, |
|
"learning_rate": 4.883363802559416e-06, |
|
"loss": 0.7508, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 4.193784277879342, |
|
"grad_norm": 3.81158709526062, |
|
"learning_rate": 4.861425959780622e-06, |
|
"loss": 0.6463, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 4.197440585009141, |
|
"grad_norm": 3.7394750118255615, |
|
"learning_rate": 4.839488117001828e-06, |
|
"loss": 0.8441, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 4.201096892138939, |
|
"grad_norm": 5.460958003997803, |
|
"learning_rate": 4.817550274223035e-06, |
|
"loss": 0.758, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 4.204753199268739, |
|
"grad_norm": 3.6687943935394287, |
|
"learning_rate": 4.795612431444241e-06, |
|
"loss": 0.6764, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.208409506398538, |
|
"grad_norm": 5.314717769622803, |
|
"learning_rate": 4.773674588665448e-06, |
|
"loss": 0.599, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 4.212065813528336, |
|
"grad_norm": 1.7225974798202515, |
|
"learning_rate": 4.751736745886654e-06, |
|
"loss": 0.7536, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 4.2157221206581355, |
|
"grad_norm": 4.815572261810303, |
|
"learning_rate": 4.729798903107861e-06, |
|
"loss": 0.7072, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 4.219378427787934, |
|
"grad_norm": 6.468070983886719, |
|
"learning_rate": 4.707861060329068e-06, |
|
"loss": 1.0721, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 4.223034734917733, |
|
"grad_norm": 2.3022828102111816, |
|
"learning_rate": 4.685923217550274e-06, |
|
"loss": 0.5984, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 4.226691042047532, |
|
"grad_norm": 6.993771553039551, |
|
"learning_rate": 4.663985374771481e-06, |
|
"loss": 1.0772, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 4.230347349177331, |
|
"grad_norm": 3.061063766479492, |
|
"learning_rate": 4.642047531992687e-06, |
|
"loss": 0.5588, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 4.23400365630713, |
|
"grad_norm": 1.7412669658660889, |
|
"learning_rate": 4.620109689213894e-06, |
|
"loss": 0.7227, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 4.237659963436928, |
|
"grad_norm": 8.846871376037598, |
|
"learning_rate": 4.598171846435101e-06, |
|
"loss": 0.6635, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 4.2413162705667276, |
|
"grad_norm": 17.051027297973633, |
|
"learning_rate": 4.576234003656307e-06, |
|
"loss": 0.9688, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 4.244972577696527, |
|
"grad_norm": 8.541803359985352, |
|
"learning_rate": 4.554296160877514e-06, |
|
"loss": 0.8742, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 4.248628884826325, |
|
"grad_norm": 6.193167686462402, |
|
"learning_rate": 4.53235831809872e-06, |
|
"loss": 0.6511, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 4.252285191956124, |
|
"grad_norm": 7.520668983459473, |
|
"learning_rate": 4.510420475319927e-06, |
|
"loss": 0.6708, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 4.255941499085923, |
|
"grad_norm": 4.377003192901611, |
|
"learning_rate": 4.488482632541134e-06, |
|
"loss": 0.5833, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 4.259597806215722, |
|
"grad_norm": 8.407455444335938, |
|
"learning_rate": 4.46654478976234e-06, |
|
"loss": 0.6796, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 4.263254113345521, |
|
"grad_norm": 6.994277477264404, |
|
"learning_rate": 4.444606946983547e-06, |
|
"loss": 0.6158, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 4.26691042047532, |
|
"grad_norm": 4.836822032928467, |
|
"learning_rate": 4.422669104204753e-06, |
|
"loss": 0.5752, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 4.270566727605119, |
|
"grad_norm": 8.016481399536133, |
|
"learning_rate": 4.40073126142596e-06, |
|
"loss": 0.6766, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 4.274223034734918, |
|
"grad_norm": 7.545466423034668, |
|
"learning_rate": 4.378793418647167e-06, |
|
"loss": 0.756, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 4.2778793418647165, |
|
"grad_norm": 6.334908485412598, |
|
"learning_rate": 4.356855575868373e-06, |
|
"loss": 0.6505, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 4.281535648994516, |
|
"grad_norm": 8.154512405395508, |
|
"learning_rate": 4.334917733089579e-06, |
|
"loss": 0.7931, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 4.285191956124314, |
|
"grad_norm": 6.061620712280273, |
|
"learning_rate": 4.312979890310786e-06, |
|
"loss": 0.7183, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 4.288848263254113, |
|
"grad_norm": 8.32985782623291, |
|
"learning_rate": 4.291042047531992e-06, |
|
"loss": 0.6878, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 4.292504570383913, |
|
"grad_norm": 5.723931312561035, |
|
"learning_rate": 4.269104204753199e-06, |
|
"loss": 0.7703, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 4.296160877513711, |
|
"grad_norm": 8.518719673156738, |
|
"learning_rate": 4.2471663619744055e-06, |
|
"loss": 0.6322, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 4.29981718464351, |
|
"grad_norm": 6.429286956787109, |
|
"learning_rate": 4.2252285191956125e-06, |
|
"loss": 0.6678, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 4.303473491773309, |
|
"grad_norm": 6.832225799560547, |
|
"learning_rate": 4.2032906764168195e-06, |
|
"loss": 0.7779, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 4.307129798903108, |
|
"grad_norm": 5.4020233154296875, |
|
"learning_rate": 4.181352833638026e-06, |
|
"loss": 0.6867, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 4.310786106032907, |
|
"grad_norm": 5.374074459075928, |
|
"learning_rate": 4.159414990859233e-06, |
|
"loss": 0.6868, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 4.3144424131627055, |
|
"grad_norm": 5.138687610626221, |
|
"learning_rate": 4.137477148080439e-06, |
|
"loss": 0.8428, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 4.318098720292505, |
|
"grad_norm": 10.137980461120605, |
|
"learning_rate": 4.115539305301646e-06, |
|
"loss": 0.7087, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 4.321755027422303, |
|
"grad_norm": 4.559932231903076, |
|
"learning_rate": 4.093601462522853e-06, |
|
"loss": 0.6856, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 4.325411334552102, |
|
"grad_norm": 6.470888137817383, |
|
"learning_rate": 4.071663619744058e-06, |
|
"loss": 0.7017, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 4.329067641681902, |
|
"grad_norm": 7.216504096984863, |
|
"learning_rate": 4.049725776965265e-06, |
|
"loss": 0.6752, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 4.3327239488117, |
|
"grad_norm": 6.0090460777282715, |
|
"learning_rate": 4.027787934186471e-06, |
|
"loss": 0.8154, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 4.336380255941499, |
|
"grad_norm": 9.844496726989746, |
|
"learning_rate": 4.005850091407678e-06, |
|
"loss": 0.7323, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 4.340036563071298, |
|
"grad_norm": 10.084904670715332, |
|
"learning_rate": 3.983912248628885e-06, |
|
"loss": 0.8326, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 4.343692870201097, |
|
"grad_norm": 8.137714385986328, |
|
"learning_rate": 3.961974405850091e-06, |
|
"loss": 0.7666, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 4.347349177330896, |
|
"grad_norm": 5.626021385192871, |
|
"learning_rate": 3.940036563071298e-06, |
|
"loss": 0.7711, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 4.3510054844606945, |
|
"grad_norm": 7.723333358764648, |
|
"learning_rate": 3.9180987202925044e-06, |
|
"loss": 0.6027, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 4.354661791590494, |
|
"grad_norm": 7.431672096252441, |
|
"learning_rate": 3.8961608775137114e-06, |
|
"loss": 0.666, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 4.358318098720293, |
|
"grad_norm": 6.387314319610596, |
|
"learning_rate": 3.874223034734918e-06, |
|
"loss": 0.7947, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 4.361974405850091, |
|
"grad_norm": 9.410737991333008, |
|
"learning_rate": 3.8522851919561246e-06, |
|
"loss": 0.7556, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 4.365630712979891, |
|
"grad_norm": 5.66964864730835, |
|
"learning_rate": 3.8303473491773315e-06, |
|
"loss": 0.7359, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 4.369287020109689, |
|
"grad_norm": 10.612873077392578, |
|
"learning_rate": 3.8084095063985373e-06, |
|
"loss": 0.7017, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 4.372943327239488, |
|
"grad_norm": 5.808506488800049, |
|
"learning_rate": 3.786471663619744e-06, |
|
"loss": 0.7297, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 4.376599634369287, |
|
"grad_norm": 9.308990478515625, |
|
"learning_rate": 3.7645338208409504e-06, |
|
"loss": 0.8003, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 4.380255941499086, |
|
"grad_norm": 7.967999458312988, |
|
"learning_rate": 3.742595978062157e-06, |
|
"loss": 0.6148, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 4.383912248628885, |
|
"grad_norm": 5.871792316436768, |
|
"learning_rate": 3.720658135283364e-06, |
|
"loss": 0.6653, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 4.387568555758683, |
|
"grad_norm": 7.684876918792725, |
|
"learning_rate": 3.6987202925045705e-06, |
|
"loss": 0.6063, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.391224862888483, |
|
"grad_norm": 8.816610336303711, |
|
"learning_rate": 3.676782449725777e-06, |
|
"loss": 0.6981, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 4.394881170018282, |
|
"grad_norm": 6.932671546936035, |
|
"learning_rate": 3.6548446069469836e-06, |
|
"loss": 0.763, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 4.39853747714808, |
|
"grad_norm": 7.768485069274902, |
|
"learning_rate": 3.63290676416819e-06, |
|
"loss": 0.6659, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 4.4021937842778796, |
|
"grad_norm": 6.058159828186035, |
|
"learning_rate": 3.6109689213893968e-06, |
|
"loss": 0.7142, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 4.405850091407678, |
|
"grad_norm": 7.062812805175781, |
|
"learning_rate": 3.5890310786106033e-06, |
|
"loss": 0.8288, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 4.409506398537477, |
|
"grad_norm": 10.744300842285156, |
|
"learning_rate": 3.56709323583181e-06, |
|
"loss": 0.8523, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 4.413162705667276, |
|
"grad_norm": 5.199676036834717, |
|
"learning_rate": 3.5451553930530165e-06, |
|
"loss": 0.6737, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 4.416819012797075, |
|
"grad_norm": 7.22199821472168, |
|
"learning_rate": 3.5232175502742234e-06, |
|
"loss": 0.6945, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 4.420475319926874, |
|
"grad_norm": 7.236554145812988, |
|
"learning_rate": 3.50127970749543e-06, |
|
"loss": 0.7632, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 4.424131627056672, |
|
"grad_norm": 5.387056350708008, |
|
"learning_rate": 3.479341864716636e-06, |
|
"loss": 0.795, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 4.427787934186472, |
|
"grad_norm": 5.174760341644287, |
|
"learning_rate": 3.4574040219378427e-06, |
|
"loss": 0.6356, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 4.431444241316271, |
|
"grad_norm": 8.171443939208984, |
|
"learning_rate": 3.4354661791590493e-06, |
|
"loss": 0.916, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 4.435100548446069, |
|
"grad_norm": 6.220861911773682, |
|
"learning_rate": 3.4157221206581357e-06, |
|
"loss": 0.7167, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 4.4387568555758685, |
|
"grad_norm": 4.111860275268555, |
|
"learning_rate": 3.393784277879342e-06, |
|
"loss": 0.7382, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 4.442413162705667, |
|
"grad_norm": 9.302396774291992, |
|
"learning_rate": 3.3718464351005484e-06, |
|
"loss": 0.7702, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 4.446069469835466, |
|
"grad_norm": 6.859189987182617, |
|
"learning_rate": 3.349908592321755e-06, |
|
"loss": 0.7118, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 4.449725776965265, |
|
"grad_norm": 8.368714332580566, |
|
"learning_rate": 3.327970749542962e-06, |
|
"loss": 0.6512, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 4.453382084095064, |
|
"grad_norm": 4.548081398010254, |
|
"learning_rate": 3.3060329067641685e-06, |
|
"loss": 0.5731, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 4.457038391224863, |
|
"grad_norm": 6.483217239379883, |
|
"learning_rate": 3.2840950639853746e-06, |
|
"loss": 0.689, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 4.460694698354661, |
|
"grad_norm": 6.644962310791016, |
|
"learning_rate": 3.262157221206581e-06, |
|
"loss": 0.544, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 4.464351005484461, |
|
"grad_norm": 5.917163848876953, |
|
"learning_rate": 3.2402193784277877e-06, |
|
"loss": 0.6778, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 4.46800731261426, |
|
"grad_norm": 8.300089836120605, |
|
"learning_rate": 3.2182815356489947e-06, |
|
"loss": 0.6243, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 4.471663619744058, |
|
"grad_norm": 6.0708184242248535, |
|
"learning_rate": 3.1963436928702013e-06, |
|
"loss": 0.7093, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 4.4753199268738575, |
|
"grad_norm": 7.4208526611328125, |
|
"learning_rate": 3.174405850091408e-06, |
|
"loss": 0.7837, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 4.478976234003657, |
|
"grad_norm": 6.546789169311523, |
|
"learning_rate": 3.152468007312614e-06, |
|
"loss": 0.6736, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 4.482632541133455, |
|
"grad_norm": 4.865387916564941, |
|
"learning_rate": 3.130530164533821e-06, |
|
"loss": 0.6906, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 4.486288848263254, |
|
"grad_norm": 8.03560733795166, |
|
"learning_rate": 3.1085923217550276e-06, |
|
"loss": 0.8666, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 4.489945155393053, |
|
"grad_norm": 7.61192512512207, |
|
"learning_rate": 3.086654478976234e-06, |
|
"loss": 0.754, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 4.493601462522852, |
|
"grad_norm": 5.770723342895508, |
|
"learning_rate": 3.0647166361974407e-06, |
|
"loss": 0.7219, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 4.497257769652651, |
|
"grad_norm": 10.299765586853027, |
|
"learning_rate": 3.0427787934186473e-06, |
|
"loss": 0.9244, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 4.50091407678245, |
|
"grad_norm": 7.810846328735352, |
|
"learning_rate": 3.020840950639854e-06, |
|
"loss": 0.5936, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 4.504570383912249, |
|
"grad_norm": 6.715174674987793, |
|
"learning_rate": 2.9989031078610604e-06, |
|
"loss": 0.7276, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 4.508226691042047, |
|
"grad_norm": 8.37287712097168, |
|
"learning_rate": 2.976965265082267e-06, |
|
"loss": 0.6952, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 4.5118829981718465, |
|
"grad_norm": 4.971324443817139, |
|
"learning_rate": 2.9550274223034735e-06, |
|
"loss": 0.7399, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 4.515539305301646, |
|
"grad_norm": 4.20208740234375, |
|
"learning_rate": 2.93308957952468e-06, |
|
"loss": 0.6338, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 4.519195612431444, |
|
"grad_norm": 7.0777177810668945, |
|
"learning_rate": 2.911151736745887e-06, |
|
"loss": 0.709, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 4.522851919561243, |
|
"grad_norm": 4.147567272186279, |
|
"learning_rate": 2.889213893967093e-06, |
|
"loss": 0.8053, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 4.526508226691042, |
|
"grad_norm": 6.275250434875488, |
|
"learning_rate": 2.8672760511882998e-06, |
|
"loss": 0.6594, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 4.530164533820841, |
|
"grad_norm": 8.304201126098633, |
|
"learning_rate": 2.8453382084095063e-06, |
|
"loss": 0.686, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 4.53382084095064, |
|
"grad_norm": 2.2459535598754883, |
|
"learning_rate": 2.823400365630713e-06, |
|
"loss": 0.6314, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 4.537477148080439, |
|
"grad_norm": 7.190771102905273, |
|
"learning_rate": 2.80146252285192e-06, |
|
"loss": 0.7207, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 4.541133455210238, |
|
"grad_norm": 5.161981105804443, |
|
"learning_rate": 2.7795246800731265e-06, |
|
"loss": 0.6005, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 4.544789762340036, |
|
"grad_norm": 10.310462951660156, |
|
"learning_rate": 2.7575868372943326e-06, |
|
"loss": 0.7278, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 4.548446069469835, |
|
"grad_norm": 5.244387626647949, |
|
"learning_rate": 2.735648994515539e-06, |
|
"loss": 0.7996, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 4.552102376599635, |
|
"grad_norm": 7.997178554534912, |
|
"learning_rate": 2.7137111517367457e-06, |
|
"loss": 0.792, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 4.555758683729433, |
|
"grad_norm": 7.470856666564941, |
|
"learning_rate": 2.6917733089579527e-06, |
|
"loss": 0.7353, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 4.559414990859232, |
|
"grad_norm": 8.2652006149292, |
|
"learning_rate": 2.6698354661791593e-06, |
|
"loss": 0.7909, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 4.5630712979890315, |
|
"grad_norm": 10.023780822753906, |
|
"learning_rate": 2.647897623400366e-06, |
|
"loss": 0.7524, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 4.56672760511883, |
|
"grad_norm": 5.3603949546813965, |
|
"learning_rate": 2.625959780621572e-06, |
|
"loss": 0.6137, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 4.570383912248629, |
|
"grad_norm": 10.119514465332031, |
|
"learning_rate": 2.6040219378427785e-06, |
|
"loss": 0.7993, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 4.5740402193784275, |
|
"grad_norm": 7.202580451965332, |
|
"learning_rate": 2.5820840950639855e-06, |
|
"loss": 0.6839, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 4.577696526508227, |
|
"grad_norm": 6.155348777770996, |
|
"learning_rate": 2.560146252285192e-06, |
|
"loss": 0.6466, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 4.581352833638025, |
|
"grad_norm": 7.591714859008789, |
|
"learning_rate": 2.5382084095063987e-06, |
|
"loss": 0.6854, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 4.585009140767824, |
|
"grad_norm": 8.901870727539062, |
|
"learning_rate": 2.5162705667276052e-06, |
|
"loss": 0.725, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 4.588665447897624, |
|
"grad_norm": 4.959688663482666, |
|
"learning_rate": 2.494332723948812e-06, |
|
"loss": 0.648, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 4.592321755027422, |
|
"grad_norm": 3.7066445350646973, |
|
"learning_rate": 2.4723948811700184e-06, |
|
"loss": 0.6049, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 4.595978062157221, |
|
"grad_norm": 7.572804927825928, |
|
"learning_rate": 2.450457038391225e-06, |
|
"loss": 0.6417, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 4.5996343692870205, |
|
"grad_norm": 6.546285152435303, |
|
"learning_rate": 2.4285191956124315e-06, |
|
"loss": 0.6253, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 4.603290676416819, |
|
"grad_norm": 7.330362319946289, |
|
"learning_rate": 2.406581352833638e-06, |
|
"loss": 0.6836, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 4.606946983546618, |
|
"grad_norm": 7.050893306732178, |
|
"learning_rate": 2.384643510054845e-06, |
|
"loss": 0.6751, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 4.6106032906764165, |
|
"grad_norm": 8.03470516204834, |
|
"learning_rate": 2.362705667276051e-06, |
|
"loss": 0.7185, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 4.614259597806216, |
|
"grad_norm": 10.927925109863281, |
|
"learning_rate": 2.3407678244972577e-06, |
|
"loss": 0.8281, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 4.617915904936015, |
|
"grad_norm": 12.535290718078613, |
|
"learning_rate": 2.3188299817184643e-06, |
|
"loss": 0.8109, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 4.621572212065813, |
|
"grad_norm": 10.733165740966797, |
|
"learning_rate": 2.296892138939671e-06, |
|
"loss": 0.7886, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 4.625228519195613, |
|
"grad_norm": 6.554678916931152, |
|
"learning_rate": 2.274954296160878e-06, |
|
"loss": 0.7984, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 4.628884826325411, |
|
"grad_norm": 6.383825302124023, |
|
"learning_rate": 2.2530164533820844e-06, |
|
"loss": 0.7802, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 4.63254113345521, |
|
"grad_norm": 6.0972795486450195, |
|
"learning_rate": 2.2310786106032906e-06, |
|
"loss": 0.5982, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 4.6361974405850095, |
|
"grad_norm": 5.708790302276611, |
|
"learning_rate": 2.209140767824497e-06, |
|
"loss": 0.8417, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 4.639853747714808, |
|
"grad_norm": 5.925148963928223, |
|
"learning_rate": 2.1872029250457037e-06, |
|
"loss": 0.7847, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 4.643510054844607, |
|
"grad_norm": 8.306936264038086, |
|
"learning_rate": 2.1652650822669107e-06, |
|
"loss": 0.6869, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 4.6471663619744055, |
|
"grad_norm": 5.944139003753662, |
|
"learning_rate": 2.1433272394881172e-06, |
|
"loss": 0.7387, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 4.650822669104205, |
|
"grad_norm": 11.451881408691406, |
|
"learning_rate": 2.121389396709324e-06, |
|
"loss": 0.7313, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 4.654478976234004, |
|
"grad_norm": 7.1728715896606445, |
|
"learning_rate": 2.09945155393053e-06, |
|
"loss": 0.7783, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 4.658135283363802, |
|
"grad_norm": 10.634977340698242, |
|
"learning_rate": 2.0775137111517365e-06, |
|
"loss": 0.7819, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 4.661791590493602, |
|
"grad_norm": 5.473633766174316, |
|
"learning_rate": 2.0555758683729435e-06, |
|
"loss": 0.914, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 4.6654478976234, |
|
"grad_norm": 7.64341926574707, |
|
"learning_rate": 2.03363802559415e-06, |
|
"loss": 0.6453, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 4.669104204753199, |
|
"grad_norm": 7.986457347869873, |
|
"learning_rate": 2.0117001828153566e-06, |
|
"loss": 0.6979, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 4.6727605118829985, |
|
"grad_norm": 7.322612762451172, |
|
"learning_rate": 1.989762340036563e-06, |
|
"loss": 0.8874, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 4.676416819012797, |
|
"grad_norm": 7.666032314300537, |
|
"learning_rate": 1.9678244972577698e-06, |
|
"loss": 0.8391, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 4.680073126142596, |
|
"grad_norm": 8.544524192810059, |
|
"learning_rate": 1.9458866544789763e-06, |
|
"loss": 0.7378, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 4.683729433272395, |
|
"grad_norm": 9.552132606506348, |
|
"learning_rate": 1.923948811700183e-06, |
|
"loss": 0.6094, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 4.687385740402194, |
|
"grad_norm": 8.779314994812012, |
|
"learning_rate": 1.9020109689213895e-06, |
|
"loss": 0.7032, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 4.691042047531993, |
|
"grad_norm": 4.859720230102539, |
|
"learning_rate": 1.8800731261425962e-06, |
|
"loss": 0.795, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 4.694698354661791, |
|
"grad_norm": 6.823448181152344, |
|
"learning_rate": 1.8581352833638026e-06, |
|
"loss": 0.6433, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 4.698354661791591, |
|
"grad_norm": 6.933642387390137, |
|
"learning_rate": 1.8361974405850092e-06, |
|
"loss": 0.763, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 4.702010968921389, |
|
"grad_norm": 7.405396938323975, |
|
"learning_rate": 1.8142595978062157e-06, |
|
"loss": 0.8466, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 4.705667276051188, |
|
"grad_norm": 8.228802680969238, |
|
"learning_rate": 1.7923217550274223e-06, |
|
"loss": 0.8017, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 4.709323583180987, |
|
"grad_norm": 5.067279815673828, |
|
"learning_rate": 1.770383912248629e-06, |
|
"loss": 0.6612, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 4.712979890310786, |
|
"grad_norm": 7.058690547943115, |
|
"learning_rate": 1.7484460694698354e-06, |
|
"loss": 0.7171, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 4.716636197440585, |
|
"grad_norm": 7.31235933303833, |
|
"learning_rate": 1.7265082266910422e-06, |
|
"loss": 0.6917, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 4.720292504570384, |
|
"grad_norm": 7.289247989654541, |
|
"learning_rate": 1.7045703839122487e-06, |
|
"loss": 0.6917, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 4.723948811700183, |
|
"grad_norm": 10.546690940856934, |
|
"learning_rate": 1.682632541133455e-06, |
|
"loss": 0.718, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 4.727605118829982, |
|
"grad_norm": 6.604415416717529, |
|
"learning_rate": 1.6606946983546619e-06, |
|
"loss": 0.7037, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 4.73126142595978, |
|
"grad_norm": 5.056285381317139, |
|
"learning_rate": 1.6387568555758684e-06, |
|
"loss": 0.6712, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 4.7349177330895795, |
|
"grad_norm": 6.835060119628906, |
|
"learning_rate": 1.616819012797075e-06, |
|
"loss": 0.8142, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 4.738574040219379, |
|
"grad_norm": 7.166338920593262, |
|
"learning_rate": 1.5948811700182816e-06, |
|
"loss": 0.6812, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 4.742230347349177, |
|
"grad_norm": 8.841276168823242, |
|
"learning_rate": 1.5729433272394881e-06, |
|
"loss": 0.695, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 4.745886654478976, |
|
"grad_norm": 6.730128288269043, |
|
"learning_rate": 1.5510054844606947e-06, |
|
"loss": 0.6517, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 4.749542961608775, |
|
"grad_norm": 6.670187473297119, |
|
"learning_rate": 1.5290676416819013e-06, |
|
"loss": 0.8538, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 4.753199268738574, |
|
"grad_norm": 5.65201997756958, |
|
"learning_rate": 1.507129798903108e-06, |
|
"loss": 0.5967, |
|
"step": 13000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 13675, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|