|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9983385637806904, |
|
"eval_steps": 500, |
|
"global_step": 338, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0029536643898836995, |
|
"grad_norm": 1.9736734572153813, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 1.0332, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005907328779767399, |
|
"grad_norm": 1.89466674300729, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 1.0858, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008860993169651098, |
|
"grad_norm": 2.0586700988484337, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 1.1422, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.011814657559534798, |
|
"grad_norm": 1.8769825182011597, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 1.1113, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.014768321949418498, |
|
"grad_norm": 1.683377634890378, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.0089, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.017721986339302196, |
|
"grad_norm": 1.6814695051713617, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.9674, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.020675650729185896, |
|
"grad_norm": 1.9017119730121659, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.9005, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.023629315119069596, |
|
"grad_norm": 1.603735219060158, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.9265, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.026582979508953296, |
|
"grad_norm": 1.8288678360919526, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.8121, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.029536643898836996, |
|
"grad_norm": 1.5570221294659097, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.7292, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.032490308288720696, |
|
"grad_norm": 2.5250052299606964, |
|
"learning_rate": 2e-05, |
|
"loss": 0.8079, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03544397267860439, |
|
"grad_norm": 1.7266275987207245, |
|
"learning_rate": 1.9999538500851633e-05, |
|
"loss": 0.6159, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.038397637068488095, |
|
"grad_norm": 1.687733702161376, |
|
"learning_rate": 1.9998154046002822e-05, |
|
"loss": 0.5926, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04135130145837179, |
|
"grad_norm": 1.4681611793409828, |
|
"learning_rate": 1.9995846763238514e-05, |
|
"loss": 0.5179, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.044304965848255495, |
|
"grad_norm": 1.661603577794359, |
|
"learning_rate": 1.9992616865520515e-05, |
|
"loss": 0.6002, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04725863023813919, |
|
"grad_norm": 1.5757267480206656, |
|
"learning_rate": 1.9988464650967834e-05, |
|
"loss": 0.548, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05021229462802289, |
|
"grad_norm": 1.443516459043832, |
|
"learning_rate": 1.9983390502829168e-05, |
|
"loss": 0.4763, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05316595901790659, |
|
"grad_norm": 1.9356317236988005, |
|
"learning_rate": 1.9977394889447526e-05, |
|
"loss": 0.4841, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05611962340779029, |
|
"grad_norm": 1.5992738201782284, |
|
"learning_rate": 1.9970478364217e-05, |
|
"loss": 0.4163, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05907328779767399, |
|
"grad_norm": 2.933784967885916, |
|
"learning_rate": 1.9962641565531694e-05, |
|
"loss": 0.4842, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06202695218755769, |
|
"grad_norm": 1.2537224524788972, |
|
"learning_rate": 1.9953885216726788e-05, |
|
"loss": 0.3521, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06498061657744139, |
|
"grad_norm": 1.6032713497627293, |
|
"learning_rate": 1.994421012601179e-05, |
|
"loss": 0.3752, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06793428096732508, |
|
"grad_norm": 1.9257499786171195, |
|
"learning_rate": 1.9933617186395917e-05, |
|
"loss": 0.427, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07088794535720878, |
|
"grad_norm": 1.585551383595158, |
|
"learning_rate": 1.99221073756057e-05, |
|
"loss": 0.3391, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07384160974709249, |
|
"grad_norm": 1.4395031762291213, |
|
"learning_rate": 1.990968175599471e-05, |
|
"loss": 0.3681, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07679527413697619, |
|
"grad_norm": 1.8379758969132987, |
|
"learning_rate": 1.9896341474445526e-05, |
|
"loss": 0.3644, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07974893852685988, |
|
"grad_norm": 2.536047764684934, |
|
"learning_rate": 1.9882087762263857e-05, |
|
"loss": 0.3729, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08270260291674358, |
|
"grad_norm": 1.7401811118309274, |
|
"learning_rate": 1.9866921935064907e-05, |
|
"loss": 0.3652, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08565626730662729, |
|
"grad_norm": 1.6279552899997376, |
|
"learning_rate": 1.985084539265195e-05, |
|
"loss": 0.3381, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08860993169651099, |
|
"grad_norm": 1.2722212246169622, |
|
"learning_rate": 1.983385961888711e-05, |
|
"loss": 0.3051, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09156359608639468, |
|
"grad_norm": 1.4978048532104433, |
|
"learning_rate": 1.9815966181554412e-05, |
|
"loss": 0.3502, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09451726047627838, |
|
"grad_norm": 1.7582762904526588, |
|
"learning_rate": 1.9797166732215078e-05, |
|
"loss": 0.4365, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09747092486616209, |
|
"grad_norm": 1.1718400265178528, |
|
"learning_rate": 1.977746300605507e-05, |
|
"loss": 0.3054, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.10042458925604578, |
|
"grad_norm": 1.8293028625760361, |
|
"learning_rate": 1.975685682172497e-05, |
|
"loss": 0.2785, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.10337825364592948, |
|
"grad_norm": 1.0440735267994106, |
|
"learning_rate": 1.973535008117207e-05, |
|
"loss": 0.3104, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10633191803581318, |
|
"grad_norm": 1.6728007851360074, |
|
"learning_rate": 1.9712944769464864e-05, |
|
"loss": 0.2799, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10928558242569689, |
|
"grad_norm": 1.405904514520795, |
|
"learning_rate": 1.9689642954609808e-05, |
|
"loss": 0.2881, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11223924681558058, |
|
"grad_norm": 1.7016870737912315, |
|
"learning_rate": 1.9665446787360444e-05, |
|
"loss": 0.3062, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11519291120546428, |
|
"grad_norm": 1.1987401007424765, |
|
"learning_rate": 1.9640358501018885e-05, |
|
"loss": 0.2892, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11814657559534798, |
|
"grad_norm": 1.5149380484224841, |
|
"learning_rate": 1.9614380411229693e-05, |
|
"loss": 0.308, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12110023998523167, |
|
"grad_norm": 1.1620416606100095, |
|
"learning_rate": 1.9587514915766124e-05, |
|
"loss": 0.2722, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12405390437511538, |
|
"grad_norm": 1.534285242480307, |
|
"learning_rate": 1.9559764494308838e-05, |
|
"loss": 0.2446, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12700756876499908, |
|
"grad_norm": 1.117214762293682, |
|
"learning_rate": 1.9531131708217005e-05, |
|
"loss": 0.2565, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12996123315488278, |
|
"grad_norm": 1.8690417660104388, |
|
"learning_rate": 1.950161920029191e-05, |
|
"loss": 0.2991, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13291489754476649, |
|
"grad_norm": 1.2823815577983428, |
|
"learning_rate": 1.9471229694533003e-05, |
|
"loss": 0.3053, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13586856193465016, |
|
"grad_norm": 1.0457394301123093, |
|
"learning_rate": 1.943996599588649e-05, |
|
"loss": 0.2692, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13882222632453387, |
|
"grad_norm": 1.6582540556983663, |
|
"learning_rate": 1.940783098998643e-05, |
|
"loss": 0.3117, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14177589071441757, |
|
"grad_norm": 2.8122374846048093, |
|
"learning_rate": 1.93748276428884e-05, |
|
"loss": 0.2413, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14472955510430127, |
|
"grad_norm": 1.06858926462545, |
|
"learning_rate": 1.9340959000795707e-05, |
|
"loss": 0.251, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14768321949418498, |
|
"grad_norm": 1.784288703961607, |
|
"learning_rate": 1.9306228189778255e-05, |
|
"loss": 0.3198, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15063688388406868, |
|
"grad_norm": 1.0358042162583945, |
|
"learning_rate": 1.927063841548398e-05, |
|
"loss": 0.2155, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15359054827395238, |
|
"grad_norm": 1.1257663919037064, |
|
"learning_rate": 1.9234192962842996e-05, |
|
"loss": 0.265, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15654421266383609, |
|
"grad_norm": 1.8812981570071463, |
|
"learning_rate": 1.9196895195764363e-05, |
|
"loss": 0.2678, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.15949787705371976, |
|
"grad_norm": 1.0219311054009077, |
|
"learning_rate": 1.9158748556825637e-05, |
|
"loss": 0.2619, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16245154144360346, |
|
"grad_norm": 0.8757775200019958, |
|
"learning_rate": 1.9119756566955092e-05, |
|
"loss": 0.2153, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16540520583348717, |
|
"grad_norm": 1.2257972336982437, |
|
"learning_rate": 1.907992282510675e-05, |
|
"loss": 0.2621, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.16835887022337087, |
|
"grad_norm": 1.1235673530163361, |
|
"learning_rate": 1.90392510079282e-05, |
|
"loss": 0.2597, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17131253461325457, |
|
"grad_norm": 1.1981603118820592, |
|
"learning_rate": 1.8997744869421248e-05, |
|
"loss": 0.2267, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17426619900313828, |
|
"grad_norm": 1.2580175658104467, |
|
"learning_rate": 1.8955408240595396e-05, |
|
"loss": 0.2157, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17721986339302198, |
|
"grad_norm": 1.4994807395260425, |
|
"learning_rate": 1.891224502911428e-05, |
|
"loss": 0.3247, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18017352778290566, |
|
"grad_norm": 1.084157032960403, |
|
"learning_rate": 1.886825921893497e-05, |
|
"loss": 0.2657, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18312719217278936, |
|
"grad_norm": 1.4470552854446672, |
|
"learning_rate": 1.8823454869940243e-05, |
|
"loss": 0.3487, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.18608085656267306, |
|
"grad_norm": 2.0068879631177334, |
|
"learning_rate": 1.8777836117563894e-05, |
|
"loss": 0.2727, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.18903452095255677, |
|
"grad_norm": 1.1466460571800723, |
|
"learning_rate": 1.873140717240899e-05, |
|
"loss": 0.2913, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19198818534244047, |
|
"grad_norm": 1.8682852110070804, |
|
"learning_rate": 1.8684172319859258e-05, |
|
"loss": 0.2383, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19494184973232417, |
|
"grad_norm": 1.2843394680645717, |
|
"learning_rate": 1.863613591968355e-05, |
|
"loss": 0.2472, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19789551412220788, |
|
"grad_norm": 1.017809753190489, |
|
"learning_rate": 1.858730240563342e-05, |
|
"loss": 0.2485, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.20084917851209155, |
|
"grad_norm": 2.2205775400272505, |
|
"learning_rate": 1.8537676285033886e-05, |
|
"loss": 0.2493, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.20380284290197526, |
|
"grad_norm": 1.235929269519101, |
|
"learning_rate": 1.848726213836744e-05, |
|
"loss": 0.2565, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.20675650729185896, |
|
"grad_norm": 1.8604954040590647, |
|
"learning_rate": 1.8436064618851225e-05, |
|
"loss": 0.3199, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20971017168174266, |
|
"grad_norm": 2.0980254275510446, |
|
"learning_rate": 1.838408845200758e-05, |
|
"loss": 0.3298, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21266383607162637, |
|
"grad_norm": 1.1425880293706356, |
|
"learning_rate": 1.8331338435227838e-05, |
|
"loss": 0.2163, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21561750046151007, |
|
"grad_norm": 1.4113107349066654, |
|
"learning_rate": 1.8277819437329577e-05, |
|
"loss": 0.2598, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.21857116485139377, |
|
"grad_norm": 1.9224862314097175, |
|
"learning_rate": 1.8223536398107177e-05, |
|
"loss": 0.3132, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22152482924127745, |
|
"grad_norm": 1.06320772052325, |
|
"learning_rate": 1.8168494327875918e-05, |
|
"loss": 0.2937, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22447849363116115, |
|
"grad_norm": 1.2174257014009797, |
|
"learning_rate": 1.8112698307009506e-05, |
|
"loss": 0.238, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.22743215802104486, |
|
"grad_norm": 1.5162497230780494, |
|
"learning_rate": 1.8056153485471167e-05, |
|
"loss": 0.1991, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23038582241092856, |
|
"grad_norm": 1.18750715957012, |
|
"learning_rate": 1.799886508233829e-05, |
|
"loss": 0.2978, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23333948680081226, |
|
"grad_norm": 2.973457653034149, |
|
"learning_rate": 1.7940838385320732e-05, |
|
"loss": 0.2601, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.23629315119069597, |
|
"grad_norm": 1.4297439267822007, |
|
"learning_rate": 1.788207875027274e-05, |
|
"loss": 0.3164, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23924681558057967, |
|
"grad_norm": 1.6564928680332287, |
|
"learning_rate": 1.7822591600698632e-05, |
|
"loss": 0.237, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24220047997046334, |
|
"grad_norm": 1.2481653938902246, |
|
"learning_rate": 1.776238242725217e-05, |
|
"loss": 0.2825, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.24515414436034705, |
|
"grad_norm": 1.4622528270629795, |
|
"learning_rate": 1.7701456787229805e-05, |
|
"loss": 0.2967, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.24810780875023075, |
|
"grad_norm": 1.1364730949104225, |
|
"learning_rate": 1.7639820304057745e-05, |
|
"loss": 0.2693, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.25106147314011446, |
|
"grad_norm": 1.0844100740250735, |
|
"learning_rate": 1.7577478666772886e-05, |
|
"loss": 0.2929, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.25401513752999816, |
|
"grad_norm": 1.0265123142436297, |
|
"learning_rate": 1.751443762949772e-05, |
|
"loss": 0.2386, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.25696880191988186, |
|
"grad_norm": 1.8332595816906205, |
|
"learning_rate": 1.7450703010909263e-05, |
|
"loss": 0.2268, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.25992246630976557, |
|
"grad_norm": 1.2274642417045811, |
|
"learning_rate": 1.738628069370195e-05, |
|
"loss": 0.1997, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.26287613069964927, |
|
"grad_norm": 1.8440511370248884, |
|
"learning_rate": 1.732117662404469e-05, |
|
"loss": 0.3254, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.26582979508953297, |
|
"grad_norm": 1.7570589127181166, |
|
"learning_rate": 1.7255396811032014e-05, |
|
"loss": 0.2358, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2687834594794167, |
|
"grad_norm": 1.5694781679384842, |
|
"learning_rate": 1.718894732612947e-05, |
|
"loss": 0.253, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2717371238693003, |
|
"grad_norm": 1.0385463235337, |
|
"learning_rate": 1.712183430261319e-05, |
|
"loss": 0.2717, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.274690788259184, |
|
"grad_norm": 1.4412499710422237, |
|
"learning_rate": 1.7054063935003813e-05, |
|
"loss": 0.2302, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.27764445264906773, |
|
"grad_norm": 3.529966786893214, |
|
"learning_rate": 1.698564247849473e-05, |
|
"loss": 0.2867, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.28059811703895143, |
|
"grad_norm": 1.2672492658805385, |
|
"learning_rate": 1.691657624837472e-05, |
|
"loss": 0.2045, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.28355178142883514, |
|
"grad_norm": 0.9176767851503596, |
|
"learning_rate": 1.684687161944506e-05, |
|
"loss": 0.2509, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.28650544581871884, |
|
"grad_norm": 1.161604448718551, |
|
"learning_rate": 1.677653502543113e-05, |
|
"loss": 0.2474, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.28945911020860254, |
|
"grad_norm": 1.1883037859108712, |
|
"learning_rate": 1.6705572958388576e-05, |
|
"loss": 0.2825, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.29241277459848625, |
|
"grad_norm": 1.3916922978511344, |
|
"learning_rate": 1.6633991968104095e-05, |
|
"loss": 0.2935, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.29536643898836995, |
|
"grad_norm": 1.1724380392159808, |
|
"learning_rate": 1.6561798661490904e-05, |
|
"loss": 0.2165, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29832010337825365, |
|
"grad_norm": 1.3803456662831326, |
|
"learning_rate": 1.6488999701978905e-05, |
|
"loss": 0.272, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.30127376776813736, |
|
"grad_norm": 1.2576499318824381, |
|
"learning_rate": 1.6415601808899658e-05, |
|
"loss": 0.2234, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.30422743215802106, |
|
"grad_norm": 1.1279853492268177, |
|
"learning_rate": 1.63416117568662e-05, |
|
"loss": 0.2501, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.30718109654790476, |
|
"grad_norm": 2.1142008653949533, |
|
"learning_rate": 1.6267036375147728e-05, |
|
"loss": 0.231, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.31013476093778847, |
|
"grad_norm": 1.5354148204855673, |
|
"learning_rate": 1.619188254703927e-05, |
|
"loss": 0.2572, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.31308842532767217, |
|
"grad_norm": 1.8111259417638643, |
|
"learning_rate": 1.6116157209226356e-05, |
|
"loss": 0.2485, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.3160420897175558, |
|
"grad_norm": 1.6621307355421575, |
|
"learning_rate": 1.6039867351144778e-05, |
|
"loss": 0.2292, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.3189957541074395, |
|
"grad_norm": 3.3263259030038843, |
|
"learning_rate": 1.5963020014335437e-05, |
|
"loss": 0.2319, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.3219494184973232, |
|
"grad_norm": 2.052088135053435, |
|
"learning_rate": 1.588562229179443e-05, |
|
"loss": 0.2785, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.32490308288720693, |
|
"grad_norm": 1.7457872664375444, |
|
"learning_rate": 1.5807681327318372e-05, |
|
"loss": 0.2375, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.32785674727709063, |
|
"grad_norm": 2.1656130103223292, |
|
"learning_rate": 1.5729204314845002e-05, |
|
"loss": 0.3204, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.33081041166697434, |
|
"grad_norm": 1.4347887292208887, |
|
"learning_rate": 1.56501984977892e-05, |
|
"loss": 0.1912, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.33376407605685804, |
|
"grad_norm": 1.7457376826745783, |
|
"learning_rate": 1.557067116837444e-05, |
|
"loss": 0.1869, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.33671774044674174, |
|
"grad_norm": 1.279310426755318, |
|
"learning_rate": 1.5490629666959668e-05, |
|
"loss": 0.2515, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.33967140483662545, |
|
"grad_norm": 1.2398989051076739, |
|
"learning_rate": 1.541008138136183e-05, |
|
"loss": 0.276, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.34262506922650915, |
|
"grad_norm": 0.8931524769308568, |
|
"learning_rate": 1.5329033746173975e-05, |
|
"loss": 0.1926, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.34557873361639285, |
|
"grad_norm": 0.9382290625981602, |
|
"learning_rate": 1.5247494242079024e-05, |
|
"loss": 0.1994, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.34853239800627656, |
|
"grad_norm": 1.6152374848968118, |
|
"learning_rate": 1.5165470395159314e-05, |
|
"loss": 0.2737, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.35148606239616026, |
|
"grad_norm": 0.7437908075523608, |
|
"learning_rate": 1.5082969776201948e-05, |
|
"loss": 0.2452, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.35443972678604396, |
|
"grad_norm": 1.433187333629193, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.2928, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3573933911759276, |
|
"grad_norm": 1.1151731420543107, |
|
"learning_rate": 1.4916568724649688e-05, |
|
"loss": 0.2469, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3603470555658113, |
|
"grad_norm": 1.628208821537734, |
|
"learning_rate": 1.483268365084351e-05, |
|
"loss": 0.2672, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.363300719955695, |
|
"grad_norm": 1.655636248263217, |
|
"learning_rate": 1.4748352521159492e-05, |
|
"loss": 0.2558, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3662543843455787, |
|
"grad_norm": 1.1190196781911896, |
|
"learning_rate": 1.466358311934654e-05, |
|
"loss": 0.2424, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3692080487354624, |
|
"grad_norm": 1.5683734426508247, |
|
"learning_rate": 1.4578383269606004e-05, |
|
"loss": 0.2078, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3721617131253461, |
|
"grad_norm": 2.1959742679571392, |
|
"learning_rate": 1.4492760835869504e-05, |
|
"loss": 0.1833, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.37511537751522983, |
|
"grad_norm": 1.8629992032442229, |
|
"learning_rate": 1.4406723721073088e-05, |
|
"loss": 0.2172, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.37806904190511353, |
|
"grad_norm": 1.3715247051576143, |
|
"learning_rate": 1.4320279866427798e-05, |
|
"loss": 0.2275, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.38102270629499724, |
|
"grad_norm": 1.9490061377077916, |
|
"learning_rate": 1.4233437250686695e-05, |
|
"loss": 0.2788, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.38397637068488094, |
|
"grad_norm": 2.5933766685145283, |
|
"learning_rate": 1.4146203889408418e-05, |
|
"loss": 0.2166, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.38693003507476464, |
|
"grad_norm": 1.2370288412958634, |
|
"learning_rate": 1.4058587834217356e-05, |
|
"loss": 0.2072, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.38988369946464835, |
|
"grad_norm": 1.1081749176693048, |
|
"learning_rate": 1.3970597172060482e-05, |
|
"loss": 0.1989, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.39283736385453205, |
|
"grad_norm": 1.4828838112332934, |
|
"learning_rate": 1.3882240024460928e-05, |
|
"loss": 0.3022, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.39579102824441575, |
|
"grad_norm": 1.1733959356793078, |
|
"learning_rate": 1.3793524546768358e-05, |
|
"loss": 0.2889, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3987446926342994, |
|
"grad_norm": 0.8750941794002657, |
|
"learning_rate": 1.3704458927406261e-05, |
|
"loss": 0.2043, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.4016983570241831, |
|
"grad_norm": 0.9229984145111342, |
|
"learning_rate": 1.3615051387116131e-05, |
|
"loss": 0.1931, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.4046520214140668, |
|
"grad_norm": 0.9607360356833068, |
|
"learning_rate": 1.3525310178198707e-05, |
|
"loss": 0.2502, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.4076056858039505, |
|
"grad_norm": 1.267256607998377, |
|
"learning_rate": 1.3435243583752294e-05, |
|
"loss": 0.2551, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.4105593501938342, |
|
"grad_norm": 1.1535151890824327, |
|
"learning_rate": 1.3344859916908206e-05, |
|
"loss": 0.2519, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.4135130145837179, |
|
"grad_norm": 1.2308120883855471, |
|
"learning_rate": 1.325416752006351e-05, |
|
"loss": 0.2028, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.4164666789736016, |
|
"grad_norm": 1.1261523707817411, |
|
"learning_rate": 1.3163174764110985e-05, |
|
"loss": 0.2891, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.4194203433634853, |
|
"grad_norm": 1.0484777228642377, |
|
"learning_rate": 1.3071890047666498e-05, |
|
"loss": 0.2847, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.42237400775336903, |
|
"grad_norm": 1.3574789457653618, |
|
"learning_rate": 1.2980321796293838e-05, |
|
"loss": 0.2313, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.42532767214325273, |
|
"grad_norm": 1.6359214874264179, |
|
"learning_rate": 1.288847846172701e-05, |
|
"loss": 0.2613, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.42828133653313644, |
|
"grad_norm": 1.4849630914722172, |
|
"learning_rate": 1.2796368521090143e-05, |
|
"loss": 0.2497, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.43123500092302014, |
|
"grad_norm": 0.9672458928556958, |
|
"learning_rate": 1.2704000476115079e-05, |
|
"loss": 0.2125, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.43418866531290384, |
|
"grad_norm": 1.3631344827108736, |
|
"learning_rate": 1.2611382852356632e-05, |
|
"loss": 0.2102, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.43714232970278755, |
|
"grad_norm": 1.081373449791384, |
|
"learning_rate": 1.2518524198405699e-05, |
|
"loss": 0.1994, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.4400959940926712, |
|
"grad_norm": 3.031111169053163, |
|
"learning_rate": 1.2425433085100224e-05, |
|
"loss": 0.251, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.4430496584825549, |
|
"grad_norm": 1.109367118461991, |
|
"learning_rate": 1.233211810473411e-05, |
|
"loss": 0.2145, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4460033228724386, |
|
"grad_norm": 1.8019223476694273, |
|
"learning_rate": 1.2238587870264152e-05, |
|
"loss": 0.2416, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.4489569872623223, |
|
"grad_norm": 0.9740173075655284, |
|
"learning_rate": 1.2144851014515055e-05, |
|
"loss": 0.2483, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.451910651652206, |
|
"grad_norm": 2.103510651583566, |
|
"learning_rate": 1.2050916189382646e-05, |
|
"loss": 0.2503, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4548643160420897, |
|
"grad_norm": 0.9776576320454566, |
|
"learning_rate": 1.1956792065035281e-05, |
|
"loss": 0.2097, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4578179804319734, |
|
"grad_norm": 0.9661788560533139, |
|
"learning_rate": 1.1862487329113606e-05, |
|
"loss": 0.2307, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4607716448218571, |
|
"grad_norm": 1.089240042431727, |
|
"learning_rate": 1.1768010685928686e-05, |
|
"loss": 0.2716, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4637253092117408, |
|
"grad_norm": 1.8188209063972023, |
|
"learning_rate": 1.1673370855658592e-05, |
|
"loss": 0.2543, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4666789736016245, |
|
"grad_norm": 0.9268264783045698, |
|
"learning_rate": 1.1578576573543541e-05, |
|
"loss": 0.1993, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.46963263799150823, |
|
"grad_norm": 2.8113037410793633, |
|
"learning_rate": 1.1483636589079627e-05, |
|
"loss": 0.2359, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.47258630238139193, |
|
"grad_norm": 1.1434071278207998, |
|
"learning_rate": 1.1388559665211241e-05, |
|
"loss": 0.2429, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.47553996677127564, |
|
"grad_norm": 1.7302412316542266, |
|
"learning_rate": 1.1293354577522264e-05, |
|
"loss": 0.2826, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.47849363116115934, |
|
"grad_norm": 2.0316255286578735, |
|
"learning_rate": 1.1198030113426074e-05, |
|
"loss": 0.2273, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.481447295551043, |
|
"grad_norm": 0.8326584843590035, |
|
"learning_rate": 1.1102595071354471e-05, |
|
"loss": 0.2394, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4844009599409267, |
|
"grad_norm": 1.317424629649938, |
|
"learning_rate": 1.1007058259945584e-05, |
|
"loss": 0.2339, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4873546243308104, |
|
"grad_norm": 1.021243726388158, |
|
"learning_rate": 1.0911428497230834e-05, |
|
"loss": 0.2483, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4903082887206941, |
|
"grad_norm": 0.9876546362231726, |
|
"learning_rate": 1.0815714609821027e-05, |
|
"loss": 0.3242, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4932619531105778, |
|
"grad_norm": 0.8098325451574229, |
|
"learning_rate": 1.0719925432091671e-05, |
|
"loss": 0.2444, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4962156175004615, |
|
"grad_norm": 2.17413162626723, |
|
"learning_rate": 1.0624069805367558e-05, |
|
"loss": 0.3416, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4991692818903452, |
|
"grad_norm": 1.8720289242626653, |
|
"learning_rate": 1.0528156577106703e-05, |
|
"loss": 0.2682, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.5021229462802289, |
|
"grad_norm": 0.8874485016784218, |
|
"learning_rate": 1.043219460008374e-05, |
|
"loss": 0.2733, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5050766106701126, |
|
"grad_norm": 1.0480044654604301, |
|
"learning_rate": 1.0336192731572805e-05, |
|
"loss": 0.2298, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.5080302750599963, |
|
"grad_norm": 1.181818854385163, |
|
"learning_rate": 1.0240159832530007e-05, |
|
"loss": 0.2572, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.51098393944988, |
|
"grad_norm": 0.98648646319354, |
|
"learning_rate": 1.0144104766775574e-05, |
|
"loss": 0.2495, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.5139376038397637, |
|
"grad_norm": 1.0277098370436357, |
|
"learning_rate": 1.004803640017571e-05, |
|
"loss": 0.2619, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.5168912682296474, |
|
"grad_norm": 1.1062050987306524, |
|
"learning_rate": 9.951963599824294e-06, |
|
"loss": 0.2238, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.5198449326195311, |
|
"grad_norm": 1.2488436891812305, |
|
"learning_rate": 9.855895233224431e-06, |
|
"loss": 0.2372, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.5227985970094148, |
|
"grad_norm": 1.3481996131119136, |
|
"learning_rate": 9.759840167469995e-06, |
|
"loss": 0.2746, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.5257522613992985, |
|
"grad_norm": 0.8826183612671726, |
|
"learning_rate": 9.663807268427197e-06, |
|
"loss": 0.2335, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.5287059257891822, |
|
"grad_norm": 0.9244242887212063, |
|
"learning_rate": 9.56780539991626e-06, |
|
"loss": 0.2178, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.5316595901790659, |
|
"grad_norm": 1.4163564901321666, |
|
"learning_rate": 9.471843422893299e-06, |
|
"loss": 0.2747, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5346132545689496, |
|
"grad_norm": 1.009293146826116, |
|
"learning_rate": 9.375930194632447e-06, |
|
"loss": 0.2688, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.5375669189588334, |
|
"grad_norm": 1.1818455803606223, |
|
"learning_rate": 9.28007456790833e-06, |
|
"loss": 0.2621, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.540520583348717, |
|
"grad_norm": 1.0611769717320774, |
|
"learning_rate": 9.184285390178978e-06, |
|
"loss": 0.2437, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.5434742477386006, |
|
"grad_norm": 1.2390997213154737, |
|
"learning_rate": 9.08857150276917e-06, |
|
"loss": 0.2952, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.5464279121284844, |
|
"grad_norm": 1.835044765816236, |
|
"learning_rate": 8.992941740054418e-06, |
|
"loss": 0.3033, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.549381576518368, |
|
"grad_norm": 0.8704926207718849, |
|
"learning_rate": 8.897404928645529e-06, |
|
"loss": 0.2365, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.5523352409082518, |
|
"grad_norm": 1.4546396972602982, |
|
"learning_rate": 8.80196988657393e-06, |
|
"loss": 0.2468, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.5552889052981355, |
|
"grad_norm": 1.1497614503627624, |
|
"learning_rate": 8.706645422477739e-06, |
|
"loss": 0.2231, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.5582425696880192, |
|
"grad_norm": 1.898082986482141, |
|
"learning_rate": 8.611440334788762e-06, |
|
"loss": 0.2955, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.5611962340779029, |
|
"grad_norm": 1.732309153229397, |
|
"learning_rate": 8.516363410920376e-06, |
|
"loss": 0.2623, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5641498984677866, |
|
"grad_norm": 1.3066685408925918, |
|
"learning_rate": 8.42142342645646e-06, |
|
"loss": 0.2101, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5671035628576703, |
|
"grad_norm": 1.162748592458113, |
|
"learning_rate": 8.326629144341408e-06, |
|
"loss": 0.2041, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.570057227247554, |
|
"grad_norm": 1.29586252973902, |
|
"learning_rate": 8.231989314071318e-06, |
|
"loss": 0.2386, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5730108916374377, |
|
"grad_norm": 1.5953660392118898, |
|
"learning_rate": 8.137512670886397e-06, |
|
"loss": 0.3249, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5759645560273214, |
|
"grad_norm": 1.1905039053627462, |
|
"learning_rate": 8.043207934964722e-06, |
|
"loss": 0.2688, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5789182204172051, |
|
"grad_norm": 1.1837724923413109, |
|
"learning_rate": 7.949083810617358e-06, |
|
"loss": 0.2218, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5818718848070888, |
|
"grad_norm": 1.0337142264120576, |
|
"learning_rate": 7.855148985484946e-06, |
|
"loss": 0.2313, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5848255491969725, |
|
"grad_norm": 1.023527322376044, |
|
"learning_rate": 7.761412129735853e-06, |
|
"loss": 0.2546, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5877792135868561, |
|
"grad_norm": 1.9398516606289615, |
|
"learning_rate": 7.667881895265895e-06, |
|
"loss": 0.2541, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5907328779767399, |
|
"grad_norm": 1.2286452090124362, |
|
"learning_rate": 7.574566914899779e-06, |
|
"loss": 0.2435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5936865423666235, |
|
"grad_norm": 1.9724170175167155, |
|
"learning_rate": 7.481475801594302e-06, |
|
"loss": 0.2256, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5966402067565073, |
|
"grad_norm": 0.9322838897748312, |
|
"learning_rate": 7.388617147643371e-06, |
|
"loss": 0.2217, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.599593871146391, |
|
"grad_norm": 1.4987943471159813, |
|
"learning_rate": 7.295999523884921e-06, |
|
"loss": 0.2415, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.6025475355362747, |
|
"grad_norm": 2.3935521553878902, |
|
"learning_rate": 7.203631478909857e-06, |
|
"loss": 0.2644, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.6055011999261584, |
|
"grad_norm": 1.0320215273603501, |
|
"learning_rate": 7.111521538272997e-06, |
|
"loss": 0.2676, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.6084548643160421, |
|
"grad_norm": 0.7108542636852405, |
|
"learning_rate": 7.019678203706164e-06, |
|
"loss": 0.2172, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.6114085287059258, |
|
"grad_norm": 0.9743452858582518, |
|
"learning_rate": 6.928109952333506e-06, |
|
"loss": 0.1604, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.6143621930958095, |
|
"grad_norm": 1.031069019097842, |
|
"learning_rate": 6.83682523588902e-06, |
|
"loss": 0.2489, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.6173158574856932, |
|
"grad_norm": 1.1126940698355048, |
|
"learning_rate": 6.745832479936492e-06, |
|
"loss": 0.2271, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.6202695218755769, |
|
"grad_norm": 1.0020460027427114, |
|
"learning_rate": 6.655140083091794e-06, |
|
"loss": 0.209, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.6232231862654606, |
|
"grad_norm": 0.8970125803197203, |
|
"learning_rate": 6.564756416247712e-06, |
|
"loss": 0.2583, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.6261768506553443, |
|
"grad_norm": 1.0568957445259135, |
|
"learning_rate": 6.474689821801295e-06, |
|
"loss": 0.2324, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.629130515045228, |
|
"grad_norm": 1.051239767211671, |
|
"learning_rate": 6.384948612883872e-06, |
|
"loss": 0.2223, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.6320841794351116, |
|
"grad_norm": 1.3654867580389307, |
|
"learning_rate": 6.2955410725937405e-06, |
|
"loss": 0.3261, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.6350378438249954, |
|
"grad_norm": 1.5940508984964765, |
|
"learning_rate": 6.206475453231644e-06, |
|
"loss": 0.3056, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.637991508214879, |
|
"grad_norm": 1.6484237300537123, |
|
"learning_rate": 6.117759975539075e-06, |
|
"loss": 0.2511, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.6409451726047628, |
|
"grad_norm": 0.9783636169511857, |
|
"learning_rate": 6.029402827939519e-06, |
|
"loss": 0.1926, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.6438988369946465, |
|
"grad_norm": 1.2177544643933664, |
|
"learning_rate": 5.941412165782645e-06, |
|
"loss": 0.2448, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.6468525013845302, |
|
"grad_norm": 1.3485731634469964, |
|
"learning_rate": 5.853796110591583e-06, |
|
"loss": 0.3118, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.6498061657744139, |
|
"grad_norm": 1.0074910225979905, |
|
"learning_rate": 5.766562749313309e-06, |
|
"loss": 0.2329, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6527598301642976, |
|
"grad_norm": 1.9631275122537244, |
|
"learning_rate": 5.6797201335722064e-06, |
|
"loss": 0.2364, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.6557134945541813, |
|
"grad_norm": 1.062222369251804, |
|
"learning_rate": 5.593276278926912e-06, |
|
"loss": 0.2315, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.658667158944065, |
|
"grad_norm": 1.07696139078638, |
|
"learning_rate": 5.507239164130501e-06, |
|
"loss": 0.1934, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.6616208233339487, |
|
"grad_norm": 1.0422028056726698, |
|
"learning_rate": 5.421616730394e-06, |
|
"loss": 0.2931, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.6645744877238324, |
|
"grad_norm": 1.1093276167484338, |
|
"learning_rate": 5.336416880653461e-06, |
|
"loss": 0.2677, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6675281521137161, |
|
"grad_norm": 1.0629629635370519, |
|
"learning_rate": 5.251647478840511e-06, |
|
"loss": 0.207, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.6704818165035997, |
|
"grad_norm": 0.779645708058216, |
|
"learning_rate": 5.167316349156495e-06, |
|
"loss": 0.2009, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.6734354808934835, |
|
"grad_norm": 0.8926314737189482, |
|
"learning_rate": 5.083431275350312e-06, |
|
"loss": 0.19, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.6763891452833671, |
|
"grad_norm": 1.376351113952889, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.2771, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.6793428096732509, |
|
"grad_norm": 0.7572945693080168, |
|
"learning_rate": 4.917030223798057e-06, |
|
"loss": 0.191, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6822964740631345, |
|
"grad_norm": 1.2668574548219917, |
|
"learning_rate": 4.834529604840686e-06, |
|
"loss": 0.2342, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6852501384530183, |
|
"grad_norm": 0.9504831876512666, |
|
"learning_rate": 4.7525057579209775e-06, |
|
"loss": 0.2033, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.688203802842902, |
|
"grad_norm": 1.2471827348726847, |
|
"learning_rate": 4.670966253826027e-06, |
|
"loss": 0.2503, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6911574672327857, |
|
"grad_norm": 1.2316083853653141, |
|
"learning_rate": 4.589918618638173e-06, |
|
"loss": 0.2345, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6941111316226694, |
|
"grad_norm": 1.233920943028571, |
|
"learning_rate": 4.5093703330403385e-06, |
|
"loss": 0.2549, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6970647960125531, |
|
"grad_norm": 1.2774449733259186, |
|
"learning_rate": 4.429328831625565e-06, |
|
"loss": 0.2747, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.7000184604024368, |
|
"grad_norm": 1.151879743444661, |
|
"learning_rate": 4.349801502210801e-06, |
|
"loss": 0.211, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.7029721247923205, |
|
"grad_norm": 1.0883996040038033, |
|
"learning_rate": 4.270795685155001e-06, |
|
"loss": 0.225, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.7059257891822042, |
|
"grad_norm": 0.8330598619298225, |
|
"learning_rate": 4.192318672681631e-06, |
|
"loss": 0.2428, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.7088794535720879, |
|
"grad_norm": 1.743882855971486, |
|
"learning_rate": 4.1143777082055715e-06, |
|
"loss": 0.3097, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7118331179619716, |
|
"grad_norm": 0.8382109254728533, |
|
"learning_rate": 4.036979985664566e-06, |
|
"loss": 0.1831, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.7147867823518552, |
|
"grad_norm": 1.3270682727089078, |
|
"learning_rate": 3.960132648855226e-06, |
|
"loss": 0.2422, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.717740446741739, |
|
"grad_norm": 1.240272547956328, |
|
"learning_rate": 3.883842790773647e-06, |
|
"loss": 0.2562, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.7206941111316226, |
|
"grad_norm": 1.2319353987670736, |
|
"learning_rate": 3.8081174529607346e-06, |
|
"loss": 0.281, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.7236477755215064, |
|
"grad_norm": 1.1603454653088285, |
|
"learning_rate": 3.732963624852275e-06, |
|
"loss": 0.2749, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.72660143991139, |
|
"grad_norm": 1.0523508127263002, |
|
"learning_rate": 3.6583882431338047e-06, |
|
"loss": 0.201, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.7295551043012738, |
|
"grad_norm": 1.5484575857761667, |
|
"learning_rate": 3.584398191100341e-06, |
|
"loss": 0.1748, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.7325087686911574, |
|
"grad_norm": 0.927816740497892, |
|
"learning_rate": 3.511000298021098e-06, |
|
"loss": 0.2038, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.7354624330810412, |
|
"grad_norm": 1.281465915405747, |
|
"learning_rate": 3.4382013385090985e-06, |
|
"loss": 0.1967, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.7384160974709248, |
|
"grad_norm": 0.8383135259558974, |
|
"learning_rate": 3.3660080318959043e-06, |
|
"loss": 0.3009, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7413697618608086, |
|
"grad_norm": 0.9034285090369809, |
|
"learning_rate": 3.2944270416114256e-06, |
|
"loss": 0.2712, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.7443234262506923, |
|
"grad_norm": 1.7330918162831006, |
|
"learning_rate": 3.223464974568874e-06, |
|
"loss": 0.3026, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.747277090640576, |
|
"grad_norm": 2.278797836193121, |
|
"learning_rate": 3.153128380554941e-06, |
|
"loss": 0.2026, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.7502307550304597, |
|
"grad_norm": 1.6530429628004708, |
|
"learning_rate": 3.0834237516252817e-06, |
|
"loss": 0.3184, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.7531844194203433, |
|
"grad_norm": 1.014176332937703, |
|
"learning_rate": 3.0143575215052732e-06, |
|
"loss": 0.2137, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7561380838102271, |
|
"grad_norm": 1.3834895222118382, |
|
"learning_rate": 2.94593606499619e-06, |
|
"loss": 0.2532, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.7590917482001107, |
|
"grad_norm": 0.9517980209975256, |
|
"learning_rate": 2.878165697386812e-06, |
|
"loss": 0.223, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.7620454125899945, |
|
"grad_norm": 1.4826707785316666, |
|
"learning_rate": 2.8110526738705345e-06, |
|
"loss": 0.2331, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.7649990769798781, |
|
"grad_norm": 0.9535584029950059, |
|
"learning_rate": 2.7446031889679893e-06, |
|
"loss": 0.2252, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.7679527413697619, |
|
"grad_norm": 1.1476536352431068, |
|
"learning_rate": 2.678823375955314e-06, |
|
"loss": 0.2673, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7709064057596455, |
|
"grad_norm": 1.443599601569734, |
|
"learning_rate": 2.6137193062980506e-06, |
|
"loss": 0.2148, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.7738600701495293, |
|
"grad_norm": 1.0031835343451487, |
|
"learning_rate": 2.5492969890907383e-06, |
|
"loss": 0.2404, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.7768137345394129, |
|
"grad_norm": 1.5720112441700467, |
|
"learning_rate": 2.485562370502279e-06, |
|
"loss": 0.2102, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.7797673989292967, |
|
"grad_norm": 1.515634223478538, |
|
"learning_rate": 2.4225213332271203e-06, |
|
"loss": 0.2801, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.7827210633191803, |
|
"grad_norm": 1.6679133246539453, |
|
"learning_rate": 2.3601796959422585e-06, |
|
"loss": 0.2728, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.7856747277090641, |
|
"grad_norm": 1.140587687664246, |
|
"learning_rate": 2.2985432127701945e-06, |
|
"loss": 0.2343, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.7886283920989478, |
|
"grad_norm": 1.6372470878039884, |
|
"learning_rate": 2.2376175727478346e-06, |
|
"loss": 0.3012, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.7915820564888315, |
|
"grad_norm": 1.0418170981548798, |
|
"learning_rate": 2.1774083993013715e-06, |
|
"loss": 0.2903, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7945357208787152, |
|
"grad_norm": 0.9841407460639144, |
|
"learning_rate": 2.1179212497272582e-06, |
|
"loss": 0.2021, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7974893852685988, |
|
"grad_norm": 3.3040280272424076, |
|
"learning_rate": 2.0591616146792705e-06, |
|
"loss": 0.2567, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.8004430496584826, |
|
"grad_norm": 1.0889363962398735, |
|
"learning_rate": 2.0011349176617133e-06, |
|
"loss": 0.2387, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.8033967140483662, |
|
"grad_norm": 1.1052353260085468, |
|
"learning_rate": 1.9438465145288377e-06, |
|
"loss": 0.2549, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.80635037843825, |
|
"grad_norm": 1.0811411366280128, |
|
"learning_rate": 1.8873016929904942e-06, |
|
"loss": 0.2245, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.8093040428281336, |
|
"grad_norm": 1.5617796782808597, |
|
"learning_rate": 1.8315056721240831e-06, |
|
"loss": 0.2689, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.8122577072180174, |
|
"grad_norm": 0.9419563951986655, |
|
"learning_rate": 1.7764636018928249e-06, |
|
"loss": 0.2274, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.815211371607901, |
|
"grad_norm": 1.0888276242305772, |
|
"learning_rate": 1.722180562670428e-06, |
|
"loss": 0.2137, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.8181650359977848, |
|
"grad_norm": 1.3638497191830405, |
|
"learning_rate": 1.6686615647721638e-06, |
|
"loss": 0.2936, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.8211187003876684, |
|
"grad_norm": 1.1621534586524414, |
|
"learning_rate": 1.6159115479924259e-06, |
|
"loss": 0.2835, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.8240723647775522, |
|
"grad_norm": 1.5160217377536473, |
|
"learning_rate": 1.5639353811487744e-06, |
|
"loss": 0.2013, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.8270260291674358, |
|
"grad_norm": 1.0562241070452318, |
|
"learning_rate": 1.5127378616325606e-06, |
|
"loss": 0.2387, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8299796935573196, |
|
"grad_norm": 0.9587608909012787, |
|
"learning_rate": 1.462323714966114e-06, |
|
"loss": 0.2439, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.8329333579472032, |
|
"grad_norm": 1.6757197295398196, |
|
"learning_rate": 1.4126975943665844e-06, |
|
"loss": 0.2315, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.8358870223370869, |
|
"grad_norm": 1.3676443709621444, |
|
"learning_rate": 1.3638640803164516e-06, |
|
"loss": 0.2285, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.8388406867269707, |
|
"grad_norm": 1.1179995558525562, |
|
"learning_rate": 1.3158276801407432e-06, |
|
"loss": 0.2649, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.8417943511168543, |
|
"grad_norm": 1.042657568622807, |
|
"learning_rate": 1.2685928275910142e-06, |
|
"loss": 0.2123, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.8447480155067381, |
|
"grad_norm": 1.5556578514626542, |
|
"learning_rate": 1.222163882436107e-06, |
|
"loss": 0.2612, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.8477016798966217, |
|
"grad_norm": 1.430426782366678, |
|
"learning_rate": 1.1765451300597574e-06, |
|
"loss": 0.2679, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.8506553442865055, |
|
"grad_norm": 1.346297948811944, |
|
"learning_rate": 1.1317407810650372e-06, |
|
"loss": 0.3629, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.8536090086763891, |
|
"grad_norm": 0.903041997125672, |
|
"learning_rate": 1.0877549708857228e-06, |
|
"loss": 0.232, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.8565626730662729, |
|
"grad_norm": 1.0773880128337658, |
|
"learning_rate": 1.0445917594046073e-06, |
|
"loss": 0.1946, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.8595163374561565, |
|
"grad_norm": 1.1827269325419392, |
|
"learning_rate": 1.0022551305787564e-06, |
|
"loss": 0.274, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.8624700018460403, |
|
"grad_norm": 1.2938978043172173, |
|
"learning_rate": 9.607489920717983e-07, |
|
"loss": 0.2568, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.8654236662359239, |
|
"grad_norm": 0.8777587732368793, |
|
"learning_rate": 9.200771748932513e-07, |
|
"loss": 0.195, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.8683773306258077, |
|
"grad_norm": 1.0574272517036634, |
|
"learning_rate": 8.802434330449128e-07, |
|
"loss": 0.2284, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.8713309950156913, |
|
"grad_norm": 0.9462220870406266, |
|
"learning_rate": 8.412514431743657e-07, |
|
"loss": 0.2023, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8742846594055751, |
|
"grad_norm": 1.3664321425766135, |
|
"learning_rate": 8.031048042356393e-07, |
|
"loss": 0.2713, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.8772383237954587, |
|
"grad_norm": 2.196437108751979, |
|
"learning_rate": 7.65807037157007e-07, |
|
"loss": 0.2231, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.8801919881853424, |
|
"grad_norm": 1.0022132609661685, |
|
"learning_rate": 7.293615845160196e-07, |
|
"loss": 0.2272, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.8831456525752261, |
|
"grad_norm": 0.8691720804489206, |
|
"learning_rate": 6.937718102217461e-07, |
|
"loss": 0.2079, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.8860993169651098, |
|
"grad_norm": 1.2735957834600269, |
|
"learning_rate": 6.590409992042957e-07, |
|
"loss": 0.3447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8890529813549936, |
|
"grad_norm": 0.9596393088771338, |
|
"learning_rate": 6.251723571116031e-07, |
|
"loss": 0.2269, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.8920066457448772, |
|
"grad_norm": 0.9568122348658286, |
|
"learning_rate": 5.921690100135713e-07, |
|
"loss": 0.2327, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.894960310134761, |
|
"grad_norm": 0.9463458760012983, |
|
"learning_rate": 5.600340041135133e-07, |
|
"loss": 0.2629, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.8979139745246446, |
|
"grad_norm": 1.1532749148406225, |
|
"learning_rate": 5.287703054670012e-07, |
|
"loss": 0.2508, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.9008676389145284, |
|
"grad_norm": 1.0281277964315167, |
|
"learning_rate": 4.983807997080925e-07, |
|
"loss": 0.2212, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.903821303304412, |
|
"grad_norm": 0.8750621395665438, |
|
"learning_rate": 4.6886829178299676e-07, |
|
"loss": 0.2405, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.9067749676942958, |
|
"grad_norm": 1.1195160374002207, |
|
"learning_rate": 4.402355056911656e-07, |
|
"loss": 0.2042, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.9097286320841794, |
|
"grad_norm": 1.2073055574446565, |
|
"learning_rate": 4.124850842338779e-07, |
|
"loss": 0.3165, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.9126822964740632, |
|
"grad_norm": 0.9971547949248887, |
|
"learning_rate": 3.8561958877030957e-07, |
|
"loss": 0.2248, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.9156359608639468, |
|
"grad_norm": 1.2258045934933872, |
|
"learning_rate": 3.5964149898111587e-07, |
|
"loss": 0.261, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9185896252538306, |
|
"grad_norm": 1.2822010085661448, |
|
"learning_rate": 3.345532126395579e-07, |
|
"loss": 0.2145, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.9215432896437142, |
|
"grad_norm": 1.197892649344162, |
|
"learning_rate": 3.1035704539019384e-07, |
|
"loss": 0.2274, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.9244969540335979, |
|
"grad_norm": 1.3780412739693457, |
|
"learning_rate": 2.870552305351382e-07, |
|
"loss": 0.2529, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.9274506184234816, |
|
"grad_norm": 1.2245421854784801, |
|
"learning_rate": 2.646499188279328e-07, |
|
"loss": 0.2578, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.9304042828133653, |
|
"grad_norm": 0.8725967365032492, |
|
"learning_rate": 2.4314317827503375e-07, |
|
"loss": 0.1964, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.933357947203249, |
|
"grad_norm": 1.0171829897640234, |
|
"learning_rate": 2.2253699394493066e-07, |
|
"loss": 0.192, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.9363116115931327, |
|
"grad_norm": 1.5120721916483608, |
|
"learning_rate": 2.028332677849254e-07, |
|
"loss": 0.2038, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.9392652759830165, |
|
"grad_norm": 0.9364220238859826, |
|
"learning_rate": 1.840338184455881e-07, |
|
"loss": 0.302, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.9422189403729001, |
|
"grad_norm": 1.048113515822863, |
|
"learning_rate": 1.6614038111289034e-07, |
|
"loss": 0.2429, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.9451726047627839, |
|
"grad_norm": 1.650208720659303, |
|
"learning_rate": 1.49154607348051e-07, |
|
"loss": 0.273, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9481262691526675, |
|
"grad_norm": 0.8862126425841627, |
|
"learning_rate": 1.330780649350938e-07, |
|
"loss": 0.229, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.9510799335425513, |
|
"grad_norm": 1.3008396761106353, |
|
"learning_rate": 1.1791223773614635e-07, |
|
"loss": 0.2084, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.9540335979324349, |
|
"grad_norm": 1.2411283844792398, |
|
"learning_rate": 1.0365852555447642e-07, |
|
"loss": 0.2308, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.9569872623223187, |
|
"grad_norm": 2.1540616271945563, |
|
"learning_rate": 9.031824400528854e-08, |
|
"loss": 0.2664, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.9599409267122023, |
|
"grad_norm": 0.9623032655438389, |
|
"learning_rate": 7.789262439430012e-08, |
|
"loss": 0.253, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.962894591102086, |
|
"grad_norm": 1.5329453334978849, |
|
"learning_rate": 6.638281360408339e-08, |
|
"loss": 0.2565, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.9658482554919697, |
|
"grad_norm": 1.8116742792949059, |
|
"learning_rate": 5.578987398821345e-08, |
|
"loss": 0.1815, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.9688019198818534, |
|
"grad_norm": 1.1469022052933218, |
|
"learning_rate": 4.6114783273213395e-08, |
|
"loss": 0.1761, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.9717555842717371, |
|
"grad_norm": 1.3429884598164736, |
|
"learning_rate": 3.735843446830867e-08, |
|
"loss": 0.2152, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.9747092486616208, |
|
"grad_norm": 1.06929888763052, |
|
"learning_rate": 2.9521635783001932e-08, |
|
"loss": 0.3005, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9776629130515045, |
|
"grad_norm": 0.94720389771271, |
|
"learning_rate": 2.2605110552477162e-08, |
|
"loss": 0.2281, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.9806165774413882, |
|
"grad_norm": 1.4632786705472418, |
|
"learning_rate": 1.6609497170834154e-08, |
|
"loss": 0.2023, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.983570241831272, |
|
"grad_norm": 0.920915062499091, |
|
"learning_rate": 1.1535349032167908e-08, |
|
"loss": 0.2609, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.9865239062211556, |
|
"grad_norm": 0.9150221985245088, |
|
"learning_rate": 7.3831344794872415e-09, |
|
"loss": 0.2174, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.9894775706110394, |
|
"grad_norm": 1.4231218123910203, |
|
"learning_rate": 4.153236761488266e-09, |
|
"loss": 0.2448, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.992431235000923, |
|
"grad_norm": 1.4419487005726424, |
|
"learning_rate": 1.8459539971804608e-09, |
|
"loss": 0.247, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.9953848993908068, |
|
"grad_norm": 1.5776000940081785, |
|
"learning_rate": 4.614991483686826e-10, |
|
"loss": 0.2729, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.9983385637806904, |
|
"grad_norm": 1.218373936040361, |
|
"learning_rate": 0.0, |
|
"loss": 0.2897, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.9983385637806904, |
|
"step": 338, |
|
"total_flos": 405222433030144.0, |
|
"train_loss": 0.2837119762449575, |
|
"train_runtime": 5278.9341, |
|
"train_samples_per_second": 8.209, |
|
"train_steps_per_second": 0.064 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 338, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 405222433030144.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|