| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.977457168620378, |
| "eval_steps": 500, |
| "global_step": 345, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.014427412082957619, |
| "grad_norm": 6.30643255752035, |
| "learning_rate": 2.285714285714286e-06, |
| "loss": 0.8669, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.028854824165915238, |
| "grad_norm": 6.348193987490592, |
| "learning_rate": 4.571428571428572e-06, |
| "loss": 0.87, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04328223624887286, |
| "grad_norm": 5.780015353700753, |
| "learning_rate": 6.857142857142858e-06, |
| "loss": 0.8477, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.057709648331830475, |
| "grad_norm": 4.234197549907419, |
| "learning_rate": 9.142857142857144e-06, |
| "loss": 0.8118, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0721370604147881, |
| "grad_norm": 2.3155214212097306, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 0.7747, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.08656447249774572, |
| "grad_norm": 5.1118273278397846, |
| "learning_rate": 1.3714285714285716e-05, |
| "loss": 0.8053, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.10099188458070334, |
| "grad_norm": 7.1277227926615, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.7794, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.11541929666366095, |
| "grad_norm": 8.187688384697006, |
| "learning_rate": 1.8285714285714288e-05, |
| "loss": 0.7978, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.12984670874661858, |
| "grad_norm": 5.093195709417533, |
| "learning_rate": 2.057142857142857e-05, |
| "loss": 0.7518, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.1442741208295762, |
| "grad_norm": 3.1265792874681977, |
| "learning_rate": 2.2857142857142858e-05, |
| "loss": 0.7116, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1587015329125338, |
| "grad_norm": 2.5188366289278323, |
| "learning_rate": 2.5142857142857143e-05, |
| "loss": 0.6783, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.17312894499549145, |
| "grad_norm": 1.4940573526303949, |
| "learning_rate": 2.742857142857143e-05, |
| "loss": 0.6532, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.18755635707844906, |
| "grad_norm": 1.6352594758559187, |
| "learning_rate": 2.9714285714285717e-05, |
| "loss": 0.6347, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.20198376916140667, |
| "grad_norm": 1.2187588508966425, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.6195, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.2164111812443643, |
| "grad_norm": 1.2755607008946352, |
| "learning_rate": 3.4285714285714284e-05, |
| "loss": 0.6142, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.2308385933273219, |
| "grad_norm": 1.0456746979084692, |
| "learning_rate": 3.6571428571428576e-05, |
| "loss": 0.6038, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.24526600541027954, |
| "grad_norm": 1.4162214811220066, |
| "learning_rate": 3.885714285714286e-05, |
| "loss": 0.5997, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.25969341749323716, |
| "grad_norm": 1.123092592959995, |
| "learning_rate": 4.114285714285714e-05, |
| "loss": 0.5855, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.27412082957619477, |
| "grad_norm": 1.247093949716292, |
| "learning_rate": 4.342857142857143e-05, |
| "loss": 0.5783, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.2885482416591524, |
| "grad_norm": 0.9162444696210892, |
| "learning_rate": 4.5714285714285716e-05, |
| "loss": 0.5762, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.30297565374211, |
| "grad_norm": 1.7011597008607717, |
| "learning_rate": 4.8e-05, |
| "loss": 0.5788, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3174030658250676, |
| "grad_norm": 1.0313827493696333, |
| "learning_rate": 5.0285714285714286e-05, |
| "loss": 0.5711, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3318304779080252, |
| "grad_norm": 2.0638257126228083, |
| "learning_rate": 5.257142857142858e-05, |
| "loss": 0.589, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3462578899909829, |
| "grad_norm": 1.2864831655829803, |
| "learning_rate": 5.485714285714286e-05, |
| "loss": 0.5638, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.3606853020739405, |
| "grad_norm": 1.9339557478184641, |
| "learning_rate": 5.714285714285715e-05, |
| "loss": 0.58, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.3751127141568981, |
| "grad_norm": 1.5799611967220424, |
| "learning_rate": 5.9428571428571434e-05, |
| "loss": 0.5647, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.38954012623985573, |
| "grad_norm": 1.251826447578104, |
| "learning_rate": 6.171428571428573e-05, |
| "loss": 0.5586, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.40396753832281335, |
| "grad_norm": 1.5898160634262704, |
| "learning_rate": 6.400000000000001e-05, |
| "loss": 0.5526, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.41839495040577096, |
| "grad_norm": 1.1139638002205856, |
| "learning_rate": 6.62857142857143e-05, |
| "loss": 0.5503, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4328223624887286, |
| "grad_norm": 1.2940676544230694, |
| "learning_rate": 6.857142857142857e-05, |
| "loss": 0.556, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.4472497745716862, |
| "grad_norm": 1.4125777791117147, |
| "learning_rate": 7.085714285714287e-05, |
| "loss": 0.5429, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.4616771866546438, |
| "grad_norm": 0.6917885537888634, |
| "learning_rate": 7.314285714285715e-05, |
| "loss": 0.537, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.47610459873760147, |
| "grad_norm": 1.0491224041512421, |
| "learning_rate": 7.542857142857144e-05, |
| "loss": 0.5431, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.4905320108205591, |
| "grad_norm": 1.1841266996810977, |
| "learning_rate": 7.771428571428572e-05, |
| "loss": 0.5408, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.5049594229035167, |
| "grad_norm": 1.3485916713931527, |
| "learning_rate": 8e-05, |
| "loss": 0.5369, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5193868349864743, |
| "grad_norm": 1.381633742212091, |
| "learning_rate": 7.999794598960815e-05, |
| "loss": 0.5447, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5338142470694319, |
| "grad_norm": 1.113965104171002, |
| "learning_rate": 7.999178416938051e-05, |
| "loss": 0.5343, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.5482416591523895, |
| "grad_norm": 3.2942969304440877, |
| "learning_rate": 7.998151517213926e-05, |
| "loss": 0.5223, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.5626690712353472, |
| "grad_norm": 1.8632536229114898, |
| "learning_rate": 7.996714005251569e-05, |
| "loss": 0.5358, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.5770964833183048, |
| "grad_norm": 1.3196787168296893, |
| "learning_rate": 7.994866028684212e-05, |
| "loss": 0.5372, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5915238954012624, |
| "grad_norm": 1.1177800163283473, |
| "learning_rate": 7.992607777300004e-05, |
| "loss": 0.5274, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.60595130748422, |
| "grad_norm": 1.016337599180268, |
| "learning_rate": 7.989939483022537e-05, |
| "loss": 0.5209, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.6203787195671776, |
| "grad_norm": 1.395873276215236, |
| "learning_rate": 7.98686141988702e-05, |
| "loss": 0.5209, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.6348061316501352, |
| "grad_norm": 0.5976851739788052, |
| "learning_rate": 7.983373904012138e-05, |
| "loss": 0.5189, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.6492335437330928, |
| "grad_norm": 0.8805510948652211, |
| "learning_rate": 7.97947729356758e-05, |
| "loss": 0.5158, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.6636609558160504, |
| "grad_norm": 0.9279297933278495, |
| "learning_rate": 7.975171988737267e-05, |
| "loss": 0.5237, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.6780883678990082, |
| "grad_norm": 1.2997587022594848, |
| "learning_rate": 7.970458431678239e-05, |
| "loss": 0.5426, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.6925157799819658, |
| "grad_norm": 0.7359557953904674, |
| "learning_rate": 7.965337106475256e-05, |
| "loss": 0.5146, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.7069431920649234, |
| "grad_norm": 0.950140709774444, |
| "learning_rate": 7.959808539091077e-05, |
| "loss": 0.5207, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.721370604147881, |
| "grad_norm": 0.9262413860600672, |
| "learning_rate": 7.953873297312447e-05, |
| "loss": 0.5114, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7357980162308386, |
| "grad_norm": 0.659161414732944, |
| "learning_rate": 7.947531990691778e-05, |
| "loss": 0.5065, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.7502254283137962, |
| "grad_norm": 0.6193539698881468, |
| "learning_rate": 7.940785270484556e-05, |
| "loss": 0.5082, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.7646528403967539, |
| "grad_norm": 0.6484786655929663, |
| "learning_rate": 7.933633829582451e-05, |
| "loss": 0.5073, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.7790802524797115, |
| "grad_norm": 0.5246514937970682, |
| "learning_rate": 7.926078402442161e-05, |
| "loss": 0.5034, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.7935076645626691, |
| "grad_norm": 0.7555014101024001, |
| "learning_rate": 7.918119765009979e-05, |
| "loss": 0.5011, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.8079350766456267, |
| "grad_norm": 0.7908819876305151, |
| "learning_rate": 7.909758734642103e-05, |
| "loss": 0.5034, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.8223624887285843, |
| "grad_norm": 0.8314592679002677, |
| "learning_rate": 7.900996170020697e-05, |
| "loss": 0.4941, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.8367899008115419, |
| "grad_norm": 0.6541181086969657, |
| "learning_rate": 7.8918329710657e-05, |
| "loss": 0.4971, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.8512173128944995, |
| "grad_norm": 0.572541817797756, |
| "learning_rate": 7.882270078842407e-05, |
| "loss": 0.4945, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.8656447249774571, |
| "grad_norm": 0.7068090025887374, |
| "learning_rate": 7.872308475464818e-05, |
| "loss": 0.496, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8800721370604148, |
| "grad_norm": 0.5503187486959946, |
| "learning_rate": 7.861949183994774e-05, |
| "loss": 0.4921, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.8944995491433724, |
| "grad_norm": 0.6381310310432255, |
| "learning_rate": 7.851193268336894e-05, |
| "loss": 0.4993, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.90892696122633, |
| "grad_norm": 0.7118765648942602, |
| "learning_rate": 7.840041833129304e-05, |
| "loss": 0.488, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.9233543733092876, |
| "grad_norm": 0.8229902231799235, |
| "learning_rate": 7.828496023630193e-05, |
| "loss": 0.4886, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.9377817853922452, |
| "grad_norm": 0.8472076467453001, |
| "learning_rate": 7.816557025600196e-05, |
| "loss": 0.4954, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.9522091974752029, |
| "grad_norm": 0.9110400155251186, |
| "learning_rate": 7.804226065180615e-05, |
| "loss": 0.4869, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.9666366095581606, |
| "grad_norm": 1.0607731617003107, |
| "learning_rate": 7.791504408767492e-05, |
| "loss": 0.4867, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.9810640216411182, |
| "grad_norm": 0.8705421661545191, |
| "learning_rate": 7.778393362881549e-05, |
| "loss": 0.4873, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.9954914337240758, |
| "grad_norm": 0.598223438503932, |
| "learning_rate": 7.764894274034014e-05, |
| "loss": 0.4866, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.0099188458070334, |
| "grad_norm": 1.3726649121858878, |
| "learning_rate": 7.751008528588322e-05, |
| "loss": 0.8287, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.024346257889991, |
| "grad_norm": 1.4843499216749163, |
| "learning_rate": 7.736737552617749e-05, |
| "loss": 0.4874, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.0387736699729486, |
| "grad_norm": 0.5218559993997877, |
| "learning_rate": 7.722082811758939e-05, |
| "loss": 0.4768, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.0532010820559061, |
| "grad_norm": 1.3743390311137267, |
| "learning_rate": 7.707045811061396e-05, |
| "loss": 0.4805, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.0676284941388638, |
| "grad_norm": 0.6090236020230758, |
| "learning_rate": 7.691628094832901e-05, |
| "loss": 0.4731, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.0820559062218216, |
| "grad_norm": 0.897847351023635, |
| "learning_rate": 7.675831246480923e-05, |
| "loss": 0.4821, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.096483318304779, |
| "grad_norm": 0.7029565517391541, |
| "learning_rate": 7.659656888349997e-05, |
| "loss": 0.4724, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.1109107303877368, |
| "grad_norm": 0.6183945563644426, |
| "learning_rate": 7.643106681555106e-05, |
| "loss": 0.4763, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.1253381424706943, |
| "grad_norm": 0.5847084533277753, |
| "learning_rate": 7.626182325811089e-05, |
| "loss": 0.4664, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.139765554553652, |
| "grad_norm": 0.6749737142635733, |
| "learning_rate": 7.60888555925807e-05, |
| "loss": 0.4671, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.1541929666366095, |
| "grad_norm": 0.4481798380260001, |
| "learning_rate": 7.591218158282968e-05, |
| "loss": 0.4656, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1686203787195673, |
| "grad_norm": 0.649578910722019, |
| "learning_rate": 7.573181937337037e-05, |
| "loss": 0.4685, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.1830477908025248, |
| "grad_norm": 0.511575614745596, |
| "learning_rate": 7.554778748749543e-05, |
| "loss": 0.4608, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.1974752028854825, |
| "grad_norm": 0.5161021268023156, |
| "learning_rate": 7.536010482537514e-05, |
| "loss": 0.4613, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.21190261496844, |
| "grad_norm": 0.46897677283090455, |
| "learning_rate": 7.516879066211644e-05, |
| "loss": 0.4691, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.2263300270513977, |
| "grad_norm": 0.5762897639302133, |
| "learning_rate": 7.497386464578329e-05, |
| "loss": 0.4654, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.2407574391343552, |
| "grad_norm": 0.3969665274048659, |
| "learning_rate": 7.477534679537885e-05, |
| "loss": 0.4587, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.255184851217313, |
| "grad_norm": 0.4524782612023369, |
| "learning_rate": 7.457325749878951e-05, |
| "loss": 0.4534, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.2696122633002704, |
| "grad_norm": 0.5470294599409099, |
| "learning_rate": 7.436761751069103e-05, |
| "loss": 0.4643, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.2840396753832282, |
| "grad_norm": 0.5658245365895949, |
| "learning_rate": 7.415844795041704e-05, |
| "loss": 0.4602, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.2984670874661859, |
| "grad_norm": 0.6284954594621484, |
| "learning_rate": 7.394577029979004e-05, |
| "loss": 0.4676, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.3128944995491434, |
| "grad_norm": 0.7345913995003851, |
| "learning_rate": 7.372960640091529e-05, |
| "loss": 0.4606, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.327321911632101, |
| "grad_norm": 0.8342633496573308, |
| "learning_rate": 7.350997845393752e-05, |
| "loss": 0.4557, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.3417493237150586, |
| "grad_norm": 0.8330096859025692, |
| "learning_rate": 7.328690901476095e-05, |
| "loss": 0.4647, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.3561767357980163, |
| "grad_norm": 0.6546676985057208, |
| "learning_rate": 7.306042099273297e-05, |
| "loss": 0.4592, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.3706041478809738, |
| "grad_norm": 0.47502637705371126, |
| "learning_rate": 7.283053764829106e-05, |
| "loss": 0.4605, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.3850315599639313, |
| "grad_norm": 0.5531078683869538, |
| "learning_rate": 7.259728259057417e-05, |
| "loss": 0.4567, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.399458972046889, |
| "grad_norm": 0.515899958416822, |
| "learning_rate": 7.236067977499791e-05, |
| "loss": 0.4578, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.4138863841298468, |
| "grad_norm": 0.3492664441384964, |
| "learning_rate": 7.212075350079437e-05, |
| "loss": 0.4561, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.4283137962128043, |
| "grad_norm": 0.42413300170898927, |
| "learning_rate": 7.187752840851661e-05, |
| "loss": 0.4569, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.442741208295762, |
| "grad_norm": 0.4947663891832909, |
| "learning_rate": 7.163102947750794e-05, |
| "loss": 0.456, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.4571686203787195, |
| "grad_norm": 0.36507776313239376, |
| "learning_rate": 7.13812820233367e-05, |
| "loss": 0.4592, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.4715960324616773, |
| "grad_norm": 0.37547804843247373, |
| "learning_rate": 7.112831169519617e-05, |
| "loss": 0.459, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.4860234445446348, |
| "grad_norm": 0.36635807000670995, |
| "learning_rate": 7.087214447327049e-05, |
| "loss": 0.4561, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.5004508566275925, |
| "grad_norm": 0.315478417939894, |
| "learning_rate": 7.061280666606646e-05, |
| "loss": 0.4563, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.5148782687105502, |
| "grad_norm": 0.4096625613828037, |
| "learning_rate": 7.035032490771165e-05, |
| "loss": 0.4541, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.5293056807935077, |
| "grad_norm": 0.4422620826291203, |
| "learning_rate": 7.008472615521898e-05, |
| "loss": 0.4508, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.5437330928764652, |
| "grad_norm": 0.3213468597989991, |
| "learning_rate": 6.98160376857184e-05, |
| "loss": 0.458, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.558160504959423, |
| "grad_norm": 0.35471415827924724, |
| "learning_rate": 6.954428709365527e-05, |
| "loss": 0.4563, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.5725879170423807, |
| "grad_norm": 0.4247233136060684, |
| "learning_rate": 6.926950228795663e-05, |
| "loss": 0.4516, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.5870153291253382, |
| "grad_norm": 0.31840084731849594, |
| "learning_rate": 6.89917114891648e-05, |
| "loss": 0.4547, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.6014427412082957, |
| "grad_norm": 0.3573055805732088, |
| "learning_rate": 6.871094322653916e-05, |
| "loss": 0.4574, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.6158701532912534, |
| "grad_norm": 0.33089511640034097, |
| "learning_rate": 6.842722633512614e-05, |
| "loss": 0.4568, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.630297565374211, |
| "grad_norm": 0.32234159444311866, |
| "learning_rate": 6.814058995279793e-05, |
| "loss": 0.4506, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.6447249774571686, |
| "grad_norm": 0.2842035714714675, |
| "learning_rate": 6.785106351725992e-05, |
| "loss": 0.4451, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.6591523895401261, |
| "grad_norm": 0.24782641096472402, |
| "learning_rate": 6.755867676302747e-05, |
| "loss": 0.4524, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.6735798016230838, |
| "grad_norm": 0.29530488172037256, |
| "learning_rate": 6.726345971837217e-05, |
| "loss": 0.4523, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.6880072137060416, |
| "grad_norm": 0.29231108013584617, |
| "learning_rate": 6.69654427022379e-05, |
| "loss": 0.448, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.702434625788999, |
| "grad_norm": 0.3209263624489444, |
| "learning_rate": 6.666465632112707e-05, |
| "loss": 0.4523, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.7168620378719566, |
| "grad_norm": 0.4315596822756952, |
| "learning_rate": 6.636113146595729e-05, |
| "loss": 0.4491, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.7312894499549143, |
| "grad_norm": 0.4570225349432179, |
| "learning_rate": 6.60548993088889e-05, |
| "loss": 0.4464, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.745716862037872, |
| "grad_norm": 0.44762480786064185, |
| "learning_rate": 6.574599130012355e-05, |
| "loss": 0.4548, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.7601442741208295, |
| "grad_norm": 0.4937434929135096, |
| "learning_rate": 6.543443916467426e-05, |
| "loss": 0.4503, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.7745716862037872, |
| "grad_norm": 0.606568052119448, |
| "learning_rate": 6.512027489910718e-05, |
| "loss": 0.4486, |
| "step": 123 |
| }, |
| { |
| "epoch": 1.788999098286745, |
| "grad_norm": 0.6858758315433683, |
| "learning_rate": 6.480353076825566e-05, |
| "loss": 0.449, |
| "step": 124 |
| }, |
| { |
| "epoch": 1.8034265103697025, |
| "grad_norm": 0.5123808792652511, |
| "learning_rate": 6.448423930190653e-05, |
| "loss": 0.4464, |
| "step": 125 |
| }, |
| { |
| "epoch": 1.81785392245266, |
| "grad_norm": 0.38964320431553595, |
| "learning_rate": 6.416243329145923e-05, |
| "loss": 0.4475, |
| "step": 126 |
| }, |
| { |
| "epoch": 1.8322813345356177, |
| "grad_norm": 0.35099016991264836, |
| "learning_rate": 6.383814578655829e-05, |
| "loss": 0.4547, |
| "step": 127 |
| }, |
| { |
| "epoch": 1.8467087466185754, |
| "grad_norm": 0.3451471240491199, |
| "learning_rate": 6.351141009169893e-05, |
| "loss": 0.4502, |
| "step": 128 |
| }, |
| { |
| "epoch": 1.861136158701533, |
| "grad_norm": 0.33153601295599006, |
| "learning_rate": 6.31822597628068e-05, |
| "loss": 0.4487, |
| "step": 129 |
| }, |
| { |
| "epoch": 1.8755635707844904, |
| "grad_norm": 0.34266592441777854, |
| "learning_rate": 6.28507286037917e-05, |
| "loss": 0.4477, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.8899909828674482, |
| "grad_norm": 0.3492224166038735, |
| "learning_rate": 6.251685066307592e-05, |
| "loss": 0.4577, |
| "step": 131 |
| }, |
| { |
| "epoch": 1.9044183949504059, |
| "grad_norm": 0.2600600833378922, |
| "learning_rate": 6.218066023009743e-05, |
| "loss": 0.4491, |
| "step": 132 |
| }, |
| { |
| "epoch": 1.9188458070333634, |
| "grad_norm": 0.2930478733859803, |
| "learning_rate": 6.184219183178842e-05, |
| "loss": 0.4378, |
| "step": 133 |
| }, |
| { |
| "epoch": 1.9332732191163209, |
| "grad_norm": 0.344123397095677, |
| "learning_rate": 6.150148022902922e-05, |
| "loss": 0.4486, |
| "step": 134 |
| }, |
| { |
| "epoch": 1.9477006311992786, |
| "grad_norm": 0.32732494053257644, |
| "learning_rate": 6.11585604130785e-05, |
| "loss": 0.4451, |
| "step": 135 |
| }, |
| { |
| "epoch": 1.9621280432822363, |
| "grad_norm": 0.25454887232448653, |
| "learning_rate": 6.081346760197953e-05, |
| "loss": 0.4435, |
| "step": 136 |
| }, |
| { |
| "epoch": 1.9765554553651938, |
| "grad_norm": 0.21336525188734806, |
| "learning_rate": 6.04662372369433e-05, |
| "loss": 0.4459, |
| "step": 137 |
| }, |
| { |
| "epoch": 1.9909828674481513, |
| "grad_norm": 0.21510264038063648, |
| "learning_rate": 6.0116904978708716e-05, |
| "loss": 0.4451, |
| "step": 138 |
| }, |
| { |
| "epoch": 2.0054102795311093, |
| "grad_norm": 0.3886088967850276, |
| "learning_rate": 5.976550670388023e-05, |
| "loss": 0.7365, |
| "step": 139 |
| }, |
| { |
| "epoch": 2.019837691614067, |
| "grad_norm": 0.5461141945560231, |
| "learning_rate": 5.941207850124325e-05, |
| "loss": 0.4274, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.0342651036970243, |
| "grad_norm": 0.7233438360497401, |
| "learning_rate": 5.9056656668057806e-05, |
| "loss": 0.4257, |
| "step": 141 |
| }, |
| { |
| "epoch": 2.048692515779982, |
| "grad_norm": 0.902604447839341, |
| "learning_rate": 5.8699277706330854e-05, |
| "loss": 0.4327, |
| "step": 142 |
| }, |
| { |
| "epoch": 2.0631199278629397, |
| "grad_norm": 0.9842345625256362, |
| "learning_rate": 5.833997831906746e-05, |
| "loss": 0.4206, |
| "step": 143 |
| }, |
| { |
| "epoch": 2.0775473399458972, |
| "grad_norm": 0.7550138291557669, |
| "learning_rate": 5.7978795406501365e-05, |
| "loss": 0.4213, |
| "step": 144 |
| }, |
| { |
| "epoch": 2.0919747520288547, |
| "grad_norm": 0.5725375243656562, |
| "learning_rate": 5.761576606230538e-05, |
| "loss": 0.4232, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.1064021641118122, |
| "grad_norm": 0.5871563051625412, |
| "learning_rate": 5.725092756978177e-05, |
| "loss": 0.4268, |
| "step": 146 |
| }, |
| { |
| "epoch": 2.12082957619477, |
| "grad_norm": 0.6848078352834541, |
| "learning_rate": 5.688431739803328e-05, |
| "loss": 0.4231, |
| "step": 147 |
| }, |
| { |
| "epoch": 2.1352569882777277, |
| "grad_norm": 0.47360287031992565, |
| "learning_rate": 5.651597319811505e-05, |
| "loss": 0.4245, |
| "step": 148 |
| }, |
| { |
| "epoch": 2.149684400360685, |
| "grad_norm": 0.43267908202913546, |
| "learning_rate": 5.6145932799167795e-05, |
| "loss": 0.421, |
| "step": 149 |
| }, |
| { |
| "epoch": 2.164111812443643, |
| "grad_norm": 0.5225940009578477, |
| "learning_rate": 5.5774234204532746e-05, |
| "loss": 0.4171, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.1785392245266006, |
| "grad_norm": 0.345292795118154, |
| "learning_rate": 5.5400915587848713e-05, |
| "loss": 0.4176, |
| "step": 151 |
| }, |
| { |
| "epoch": 2.192966636609558, |
| "grad_norm": 0.37397788119190706, |
| "learning_rate": 5.502601528913161e-05, |
| "loss": 0.4185, |
| "step": 152 |
| }, |
| { |
| "epoch": 2.2073940486925157, |
| "grad_norm": 0.33142951490345385, |
| "learning_rate": 5.464957181083692e-05, |
| "loss": 0.4185, |
| "step": 153 |
| }, |
| { |
| "epoch": 2.2218214607754736, |
| "grad_norm": 0.2921058390866845, |
| "learning_rate": 5.427162381390543e-05, |
| "loss": 0.417, |
| "step": 154 |
| }, |
| { |
| "epoch": 2.236248872858431, |
| "grad_norm": 0.34198696626119557, |
| "learning_rate": 5.389221011379281e-05, |
| "loss": 0.4165, |
| "step": 155 |
| }, |
| { |
| "epoch": 2.2506762849413886, |
| "grad_norm": 0.26908479849148176, |
| "learning_rate": 5.351136967648323e-05, |
| "loss": 0.4193, |
| "step": 156 |
| }, |
| { |
| "epoch": 2.265103697024346, |
| "grad_norm": 0.31962185227765055, |
| "learning_rate": 5.3129141614487456e-05, |
| "loss": 0.4279, |
| "step": 157 |
| }, |
| { |
| "epoch": 2.279531109107304, |
| "grad_norm": 0.376211538661627, |
| "learning_rate": 5.274556518282607e-05, |
| "loss": 0.4195, |
| "step": 158 |
| }, |
| { |
| "epoch": 2.2939585211902616, |
| "grad_norm": 0.28546559354766526, |
| "learning_rate": 5.23606797749979e-05, |
| "loss": 0.4199, |
| "step": 159 |
| }, |
| { |
| "epoch": 2.308385933273219, |
| "grad_norm": 0.35404717031780875, |
| "learning_rate": 5.1974524918934336e-05, |
| "loss": 0.4194, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.3228133453561766, |
| "grad_norm": 0.32804234637360613, |
| "learning_rate": 5.15871402729397e-05, |
| "loss": 0.4215, |
| "step": 161 |
| }, |
| { |
| "epoch": 2.3372407574391345, |
| "grad_norm": 0.25853378935309307, |
| "learning_rate": 5.1198565621618444e-05, |
| "loss": 0.42, |
| "step": 162 |
| }, |
| { |
| "epoch": 2.351668169522092, |
| "grad_norm": 0.29254513463752485, |
| "learning_rate": 5.0808840871789155e-05, |
| "loss": 0.4137, |
| "step": 163 |
| }, |
| { |
| "epoch": 2.3660955816050495, |
| "grad_norm": 0.2324430211066698, |
| "learning_rate": 5.0418006048386134e-05, |
| "loss": 0.4174, |
| "step": 164 |
| }, |
| { |
| "epoch": 2.3805229936880075, |
| "grad_norm": 0.22977260261166277, |
| "learning_rate": 5.002610129034883e-05, |
| "loss": 0.418, |
| "step": 165 |
| }, |
| { |
| "epoch": 2.394950405770965, |
| "grad_norm": 0.25178175225388516, |
| "learning_rate": 4.963316684649951e-05, |
| "loss": 0.4215, |
| "step": 166 |
| }, |
| { |
| "epoch": 2.4093778178539225, |
| "grad_norm": 0.18022661655296157, |
| "learning_rate": 4.923924307140974e-05, |
| "loss": 0.414, |
| "step": 167 |
| }, |
| { |
| "epoch": 2.42380522993688, |
| "grad_norm": 0.23950853172671158, |
| "learning_rate": 4.8844370421255886e-05, |
| "loss": 0.419, |
| "step": 168 |
| }, |
| { |
| "epoch": 2.4382326420198375, |
| "grad_norm": 0.19718161732313788, |
| "learning_rate": 4.8448589449664305e-05, |
| "loss": 0.4124, |
| "step": 169 |
| }, |
| { |
| "epoch": 2.4526600541027954, |
| "grad_norm": 0.1804834440563653, |
| "learning_rate": 4.805194080354641e-05, |
| "loss": 0.4179, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.467087466185753, |
| "grad_norm": 0.20353053079969263, |
| "learning_rate": 4.765446521892426e-05, |
| "loss": 0.4104, |
| "step": 171 |
| }, |
| { |
| "epoch": 2.4815148782687104, |
| "grad_norm": 0.16177819342894753, |
| "learning_rate": 4.725620351674693e-05, |
| "loss": 0.4202, |
| "step": 172 |
| }, |
| { |
| "epoch": 2.4959422903516684, |
| "grad_norm": 0.16071769506654357, |
| "learning_rate": 4.685719659869815e-05, |
| "loss": 0.4083, |
| "step": 173 |
| }, |
| { |
| "epoch": 2.510369702434626, |
| "grad_norm": 0.1725361486750181, |
| "learning_rate": 4.645748544299574e-05, |
| "loss": 0.4153, |
| "step": 174 |
| }, |
| { |
| "epoch": 2.5247971145175834, |
| "grad_norm": 0.16753825050295582, |
| "learning_rate": 4.605711110018307e-05, |
| "loss": 0.4123, |
| "step": 175 |
| }, |
| { |
| "epoch": 2.539224526600541, |
| "grad_norm": 0.17717032081528933, |
| "learning_rate": 4.565611468891318e-05, |
| "loss": 0.4129, |
| "step": 176 |
| }, |
| { |
| "epoch": 2.5536519386834984, |
| "grad_norm": 0.1564598566236543, |
| "learning_rate": 4.525453739172586e-05, |
| "loss": 0.4117, |
| "step": 177 |
| }, |
| { |
| "epoch": 2.5680793507664563, |
| "grad_norm": 0.15287663289000603, |
| "learning_rate": 4.48524204508182e-05, |
| "loss": 0.4183, |
| "step": 178 |
| }, |
| { |
| "epoch": 2.582506762849414, |
| "grad_norm": 0.18206218031669835, |
| "learning_rate": 4.444980516380895e-05, |
| "loss": 0.4117, |
| "step": 179 |
| }, |
| { |
| "epoch": 2.5969341749323718, |
| "grad_norm": 0.16895498094131148, |
| "learning_rate": 4.4046732879497295e-05, |
| "loss": 0.4148, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.6113615870153293, |
| "grad_norm": 0.20384116046961775, |
| "learning_rate": 4.364324499361626e-05, |
| "loss": 0.4121, |
| "step": 181 |
| }, |
| { |
| "epoch": 2.625788999098287, |
| "grad_norm": 0.18201505177744084, |
| "learning_rate": 4.3239382944581384e-05, |
| "loss": 0.4154, |
| "step": 182 |
| }, |
| { |
| "epoch": 2.6402164111812443, |
| "grad_norm": 0.16531279832670212, |
| "learning_rate": 4.283518820923492e-05, |
| "loss": 0.4134, |
| "step": 183 |
| }, |
| { |
| "epoch": 2.654643823264202, |
| "grad_norm": 0.17869608399055636, |
| "learning_rate": 4.243070229858624e-05, |
| "loss": 0.4167, |
| "step": 184 |
| }, |
| { |
| "epoch": 2.6690712353471597, |
| "grad_norm": 0.15659192579938305, |
| "learning_rate": 4.202596675354851e-05, |
| "loss": 0.415, |
| "step": 185 |
| }, |
| { |
| "epoch": 2.6834986474301172, |
| "grad_norm": 0.1729110016630772, |
| "learning_rate": 4.1621023140672524e-05, |
| "loss": 0.4149, |
| "step": 186 |
| }, |
| { |
| "epoch": 2.6979260595130747, |
| "grad_norm": 0.17987624911793657, |
| "learning_rate": 4.121591304787772e-05, |
| "loss": 0.4128, |
| "step": 187 |
| }, |
| { |
| "epoch": 2.7123534715960327, |
| "grad_norm": 0.16277022431055213, |
| "learning_rate": 4.081067808018111e-05, |
| "loss": 0.4115, |
| "step": 188 |
| }, |
| { |
| "epoch": 2.72678088367899, |
| "grad_norm": 0.1614060894054725, |
| "learning_rate": 4.040535985542445e-05, |
| "loss": 0.4188, |
| "step": 189 |
| }, |
| { |
| "epoch": 2.7412082957619477, |
| "grad_norm": 0.1498519807080618, |
| "learning_rate": 4e-05, |
| "loss": 0.4172, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.755635707844905, |
| "grad_norm": 0.1604036678202687, |
| "learning_rate": 3.959464014457557e-05, |
| "loss": 0.4077, |
| "step": 191 |
| }, |
| { |
| "epoch": 2.7700631199278627, |
| "grad_norm": 0.13770932722249057, |
| "learning_rate": 3.91893219198189e-05, |
| "loss": 0.4195, |
| "step": 192 |
| }, |
| { |
| "epoch": 2.7844905320108206, |
| "grad_norm": 0.15035210016285183, |
| "learning_rate": 3.87840869521223e-05, |
| "loss": 0.4134, |
| "step": 193 |
| }, |
| { |
| "epoch": 2.798917944093778, |
| "grad_norm": 0.15201640612716522, |
| "learning_rate": 3.837897685932748e-05, |
| "loss": 0.4106, |
| "step": 194 |
| }, |
| { |
| "epoch": 2.8133453561767356, |
| "grad_norm": 0.13650157280906988, |
| "learning_rate": 3.7974033246451496e-05, |
| "loss": 0.4156, |
| "step": 195 |
| }, |
| { |
| "epoch": 2.8277727682596936, |
| "grad_norm": 0.17964938669673042, |
| "learning_rate": 3.7569297701413765e-05, |
| "loss": 0.4154, |
| "step": 196 |
| }, |
| { |
| "epoch": 2.842200180342651, |
| "grad_norm": 0.1243561060549184, |
| "learning_rate": 3.716481179076509e-05, |
| "loss": 0.4197, |
| "step": 197 |
| }, |
| { |
| "epoch": 2.8566275924256086, |
| "grad_norm": 0.17089769484487582, |
| "learning_rate": 3.676061705541864e-05, |
| "loss": 0.4152, |
| "step": 198 |
| }, |
| { |
| "epoch": 2.871055004508566, |
| "grad_norm": 0.17561155960318975, |
| "learning_rate": 3.635675500638375e-05, |
| "loss": 0.4167, |
| "step": 199 |
| }, |
| { |
| "epoch": 2.885482416591524, |
| "grad_norm": 0.16307396978150157, |
| "learning_rate": 3.595326712050272e-05, |
| "loss": 0.418, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.8999098286744815, |
| "grad_norm": 0.18681533479112983, |
| "learning_rate": 3.555019483619106e-05, |
| "loss": 0.418, |
| "step": 201 |
| }, |
| { |
| "epoch": 2.914337240757439, |
| "grad_norm": 0.1692680534023291, |
| "learning_rate": 3.5147579549181805e-05, |
| "loss": 0.4095, |
| "step": 202 |
| }, |
| { |
| "epoch": 2.928764652840397, |
| "grad_norm": 0.1647112968457325, |
| "learning_rate": 3.4745462608274143e-05, |
| "loss": 0.421, |
| "step": 203 |
| }, |
| { |
| "epoch": 2.9431920649233545, |
| "grad_norm": 0.1645824019282664, |
| "learning_rate": 3.434388531108683e-05, |
| "loss": 0.4201, |
| "step": 204 |
| }, |
| { |
| "epoch": 2.957619477006312, |
| "grad_norm": 0.16193821543079018, |
| "learning_rate": 3.394288889981695e-05, |
| "loss": 0.4144, |
| "step": 205 |
| }, |
| { |
| "epoch": 2.9720468890892695, |
| "grad_norm": 0.15576654979169963, |
| "learning_rate": 3.354251455700427e-05, |
| "loss": 0.421, |
| "step": 206 |
| }, |
| { |
| "epoch": 2.986474301172227, |
| "grad_norm": 0.11204665102016201, |
| "learning_rate": 3.314280340130187e-05, |
| "loss": 0.4169, |
| "step": 207 |
| }, |
| { |
| "epoch": 3.000901713255185, |
| "grad_norm": 0.2597744580379488, |
| "learning_rate": 3.274379648325308e-05, |
| "loss": 0.7047, |
| "step": 208 |
| }, |
| { |
| "epoch": 3.0153291253381425, |
| "grad_norm": 0.29242080182868324, |
| "learning_rate": 3.234553478107575e-05, |
| "loss": 0.3922, |
| "step": 209 |
| }, |
| { |
| "epoch": 3.0297565374211, |
| "grad_norm": 0.15554632560519327, |
| "learning_rate": 3.194805919645359e-05, |
| "loss": 0.3914, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.044183949504058, |
| "grad_norm": 0.22638176078144323, |
| "learning_rate": 3.155141055033571e-05, |
| "loss": 0.389, |
| "step": 211 |
| }, |
| { |
| "epoch": 3.0586113615870154, |
| "grad_norm": 0.22235251051875934, |
| "learning_rate": 3.115562957874413e-05, |
| "loss": 0.3894, |
| "step": 212 |
| }, |
| { |
| "epoch": 3.073038773669973, |
| "grad_norm": 0.14895254239929756, |
| "learning_rate": 3.0760756928590265e-05, |
| "loss": 0.3855, |
| "step": 213 |
| }, |
| { |
| "epoch": 3.0874661857529304, |
| "grad_norm": 0.21985837426895496, |
| "learning_rate": 3.0366833153500502e-05, |
| "loss": 0.3899, |
| "step": 214 |
| }, |
| { |
| "epoch": 3.1018935978358884, |
| "grad_norm": 0.1448532296100453, |
| "learning_rate": 2.997389870965118e-05, |
| "loss": 0.3853, |
| "step": 215 |
| }, |
| { |
| "epoch": 3.116321009918846, |
| "grad_norm": 0.18340169272282977, |
| "learning_rate": 2.958199395161388e-05, |
| "loss": 0.3885, |
| "step": 216 |
| }, |
| { |
| "epoch": 3.1307484220018034, |
| "grad_norm": 0.16252308646393857, |
| "learning_rate": 2.9191159128210865e-05, |
| "loss": 0.388, |
| "step": 217 |
| }, |
| { |
| "epoch": 3.145175834084761, |
| "grad_norm": 0.15643803474572993, |
| "learning_rate": 2.8801434378381566e-05, |
| "loss": 0.3918, |
| "step": 218 |
| }, |
| { |
| "epoch": 3.159603246167719, |
| "grad_norm": 0.16477382717354483, |
| "learning_rate": 2.841285972706032e-05, |
| "loss": 0.3848, |
| "step": 219 |
| }, |
| { |
| "epoch": 3.1740306582506763, |
| "grad_norm": 0.1428234224200868, |
| "learning_rate": 2.8025475081065684e-05, |
| "loss": 0.3916, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.188458070333634, |
| "grad_norm": 0.15459593532143248, |
| "learning_rate": 2.7639320225002108e-05, |
| "loss": 0.3868, |
| "step": 221 |
| }, |
| { |
| "epoch": 3.2028854824165913, |
| "grad_norm": 0.1376918823828853, |
| "learning_rate": 2.725443481717394e-05, |
| "loss": 0.3869, |
| "step": 222 |
| }, |
| { |
| "epoch": 3.2173128944995493, |
| "grad_norm": 0.12950376396508245, |
| "learning_rate": 2.687085838551255e-05, |
| "loss": 0.391, |
| "step": 223 |
| }, |
| { |
| "epoch": 3.2317403065825068, |
| "grad_norm": 0.15236052575941866, |
| "learning_rate": 2.6488630323516785e-05, |
| "loss": 0.3854, |
| "step": 224 |
| }, |
| { |
| "epoch": 3.2461677186654643, |
| "grad_norm": 0.12413662200660247, |
| "learning_rate": 2.6107789886207195e-05, |
| "loss": 0.3932, |
| "step": 225 |
| }, |
| { |
| "epoch": 3.260595130748422, |
| "grad_norm": 0.12948714851227347, |
| "learning_rate": 2.5728376186094582e-05, |
| "loss": 0.392, |
| "step": 226 |
| }, |
| { |
| "epoch": 3.2750225428313797, |
| "grad_norm": 0.13509083763614343, |
| "learning_rate": 2.5350428189163095e-05, |
| "loss": 0.3893, |
| "step": 227 |
| }, |
| { |
| "epoch": 3.2894499549143372, |
| "grad_norm": 0.11596299194935494, |
| "learning_rate": 2.4973984710868394e-05, |
| "loss": 0.3853, |
| "step": 228 |
| }, |
| { |
| "epoch": 3.3038773669972947, |
| "grad_norm": 0.11495064647362904, |
| "learning_rate": 2.4599084412151283e-05, |
| "loss": 0.3881, |
| "step": 229 |
| }, |
| { |
| "epoch": 3.3183047790802522, |
| "grad_norm": 0.11377790156854924, |
| "learning_rate": 2.4225765795467267e-05, |
| "loss": 0.3881, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.33273219116321, |
| "grad_norm": 0.11176541174980999, |
| "learning_rate": 2.3854067200832226e-05, |
| "loss": 0.3849, |
| "step": 231 |
| }, |
| { |
| "epoch": 3.3471596032461677, |
| "grad_norm": 0.10932782133038507, |
| "learning_rate": 2.348402680188496e-05, |
| "loss": 0.3913, |
| "step": 232 |
| }, |
| { |
| "epoch": 3.361587015329125, |
| "grad_norm": 0.12116739999194517, |
| "learning_rate": 2.3115682601966726e-05, |
| "loss": 0.3909, |
| "step": 233 |
| }, |
| { |
| "epoch": 3.376014427412083, |
| "grad_norm": 0.11683332854779228, |
| "learning_rate": 2.274907243021824e-05, |
| "loss": 0.384, |
| "step": 234 |
| }, |
| { |
| "epoch": 3.3904418394950406, |
| "grad_norm": 0.10329122415194655, |
| "learning_rate": 2.2384233937694626e-05, |
| "loss": 0.3891, |
| "step": 235 |
| }, |
| { |
| "epoch": 3.404869251577998, |
| "grad_norm": 0.11676332764357526, |
| "learning_rate": 2.202120459349864e-05, |
| "loss": 0.3879, |
| "step": 236 |
| }, |
| { |
| "epoch": 3.4192966636609556, |
| "grad_norm": 0.11043415196225377, |
| "learning_rate": 2.1660021680932565e-05, |
| "loss": 0.3907, |
| "step": 237 |
| }, |
| { |
| "epoch": 3.4337240757439136, |
| "grad_norm": 0.10352103209720392, |
| "learning_rate": 2.130072229366916e-05, |
| "loss": 0.3868, |
| "step": 238 |
| }, |
| { |
| "epoch": 3.448151487826871, |
| "grad_norm": 0.11106271281959253, |
| "learning_rate": 2.0943343331942208e-05, |
| "loss": 0.3872, |
| "step": 239 |
| }, |
| { |
| "epoch": 3.4625788999098286, |
| "grad_norm": 0.100859861129825, |
| "learning_rate": 2.0587921498756768e-05, |
| "loss": 0.3841, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.4770063119927865, |
| "grad_norm": 0.11902184783806945, |
| "learning_rate": 2.0234493296119776e-05, |
| "loss": 0.389, |
| "step": 241 |
| }, |
| { |
| "epoch": 3.491433724075744, |
| "grad_norm": 0.09752054045307186, |
| "learning_rate": 1.9883095021291294e-05, |
| "loss": 0.3894, |
| "step": 242 |
| }, |
| { |
| "epoch": 3.5058611361587015, |
| "grad_norm": 0.1157887524298405, |
| "learning_rate": 1.9533762763056714e-05, |
| "loss": 0.3864, |
| "step": 243 |
| }, |
| { |
| "epoch": 3.520288548241659, |
| "grad_norm": 0.0962545228356216, |
| "learning_rate": 1.918653239802048e-05, |
| "loss": 0.3911, |
| "step": 244 |
| }, |
| { |
| "epoch": 3.5347159603246165, |
| "grad_norm": 0.11589978846585437, |
| "learning_rate": 1.8841439586921515e-05, |
| "loss": 0.3873, |
| "step": 245 |
| }, |
| { |
| "epoch": 3.5491433724075745, |
| "grad_norm": 0.10235501875925748, |
| "learning_rate": 1.849851977097078e-05, |
| "loss": 0.3919, |
| "step": 246 |
| }, |
| { |
| "epoch": 3.563570784490532, |
| "grad_norm": 0.10642762647275054, |
| "learning_rate": 1.8157808168211605e-05, |
| "loss": 0.3862, |
| "step": 247 |
| }, |
| { |
| "epoch": 3.5779981965734895, |
| "grad_norm": 0.10705125409234852, |
| "learning_rate": 1.7819339769902568e-05, |
| "loss": 0.3826, |
| "step": 248 |
| }, |
| { |
| "epoch": 3.5924256086564474, |
| "grad_norm": 0.11011000435589068, |
| "learning_rate": 1.7483149336924105e-05, |
| "loss": 0.3896, |
| "step": 249 |
| }, |
| { |
| "epoch": 3.606853020739405, |
| "grad_norm": 0.10299367912221409, |
| "learning_rate": 1.71492713962083e-05, |
| "loss": 0.3818, |
| "step": 250 |
| }, |
| { |
| "epoch": 3.6212804328223624, |
| "grad_norm": 0.09896534243305305, |
| "learning_rate": 1.6817740237193213e-05, |
| "loss": 0.3899, |
| "step": 251 |
| }, |
| { |
| "epoch": 3.63570784490532, |
| "grad_norm": 0.10057029872247607, |
| "learning_rate": 1.648858990830108e-05, |
| "loss": 0.3865, |
| "step": 252 |
| }, |
| { |
| "epoch": 3.6501352569882775, |
| "grad_norm": 0.10556137735012057, |
| "learning_rate": 1.6161854213441724e-05, |
| "loss": 0.3857, |
| "step": 253 |
| }, |
| { |
| "epoch": 3.6645626690712354, |
| "grad_norm": 0.09912849463045817, |
| "learning_rate": 1.5837566708540776e-05, |
| "loss": 0.3882, |
| "step": 254 |
| }, |
| { |
| "epoch": 3.678990081154193, |
| "grad_norm": 0.10873331871358806, |
| "learning_rate": 1.5515760698093485e-05, |
| "loss": 0.3913, |
| "step": 255 |
| }, |
| { |
| "epoch": 3.693417493237151, |
| "grad_norm": 0.10135375429134282, |
| "learning_rate": 1.5196469231744338e-05, |
| "loss": 0.3918, |
| "step": 256 |
| }, |
| { |
| "epoch": 3.7078449053201084, |
| "grad_norm": 0.101442765978251, |
| "learning_rate": 1.4879725100892821e-05, |
| "loss": 0.3898, |
| "step": 257 |
| }, |
| { |
| "epoch": 3.722272317403066, |
| "grad_norm": 0.09944828474807176, |
| "learning_rate": 1.456556083532577e-05, |
| "loss": 0.3888, |
| "step": 258 |
| }, |
| { |
| "epoch": 3.7366997294860234, |
| "grad_norm": 0.10063514199400612, |
| "learning_rate": 1.4254008699876468e-05, |
| "loss": 0.3875, |
| "step": 259 |
| }, |
| { |
| "epoch": 3.751127141568981, |
| "grad_norm": 0.1073202284969319, |
| "learning_rate": 1.394510069111112e-05, |
| "loss": 0.3825, |
| "step": 260 |
| }, |
| { |
| "epoch": 3.765554553651939, |
| "grad_norm": 0.11199636802016412, |
| "learning_rate": 1.3638868534042732e-05, |
| "loss": 0.3912, |
| "step": 261 |
| }, |
| { |
| "epoch": 3.7799819657348963, |
| "grad_norm": 0.09460154248342248, |
| "learning_rate": 1.3335343678872947e-05, |
| "loss": 0.3919, |
| "step": 262 |
| }, |
| { |
| "epoch": 3.794409377817854, |
| "grad_norm": 0.10030251095406782, |
| "learning_rate": 1.3034557297762108e-05, |
| "loss": 0.3897, |
| "step": 263 |
| }, |
| { |
| "epoch": 3.8088367899008118, |
| "grad_norm": 0.09707674946485532, |
| "learning_rate": 1.2736540281627833e-05, |
| "loss": 0.3882, |
| "step": 264 |
| }, |
| { |
| "epoch": 3.8232642019837693, |
| "grad_norm": 0.10066191197693501, |
| "learning_rate": 1.2441323236972536e-05, |
| "loss": 0.3838, |
| "step": 265 |
| }, |
| { |
| "epoch": 3.8376916140667268, |
| "grad_norm": 0.09882561767806158, |
| "learning_rate": 1.2148936482740106e-05, |
| "loss": 0.3876, |
| "step": 266 |
| }, |
| { |
| "epoch": 3.8521190261496843, |
| "grad_norm": 0.09393577574639751, |
| "learning_rate": 1.1859410047202076e-05, |
| "loss": 0.3949, |
| "step": 267 |
| }, |
| { |
| "epoch": 3.8665464382326418, |
| "grad_norm": 0.10491601613830169, |
| "learning_rate": 1.1572773664873877e-05, |
| "loss": 0.3945, |
| "step": 268 |
| }, |
| { |
| "epoch": 3.8809738503155997, |
| "grad_norm": 0.09433909557518863, |
| "learning_rate": 1.1289056773460848e-05, |
| "loss": 0.3907, |
| "step": 269 |
| }, |
| { |
| "epoch": 3.895401262398557, |
| "grad_norm": 0.09718276334267877, |
| "learning_rate": 1.100828851083521e-05, |
| "loss": 0.3892, |
| "step": 270 |
| }, |
| { |
| "epoch": 3.9098286744815147, |
| "grad_norm": 0.09130729699370443, |
| "learning_rate": 1.0730497712043375e-05, |
| "loss": 0.3877, |
| "step": 271 |
| }, |
| { |
| "epoch": 3.9242560865644727, |
| "grad_norm": 0.0989960086350818, |
| "learning_rate": 1.0455712906344742e-05, |
| "loss": 0.3905, |
| "step": 272 |
| }, |
| { |
| "epoch": 3.93868349864743, |
| "grad_norm": 0.08478658948822386, |
| "learning_rate": 1.0183962314281616e-05, |
| "loss": 0.3809, |
| "step": 273 |
| }, |
| { |
| "epoch": 3.9531109107303877, |
| "grad_norm": 0.08732293651393247, |
| "learning_rate": 9.91527384478102e-06, |
| "loss": 0.3909, |
| "step": 274 |
| }, |
| { |
| "epoch": 3.967538322813345, |
| "grad_norm": 0.09248422321017552, |
| "learning_rate": 9.649675092288366e-06, |
| "loss": 0.3904, |
| "step": 275 |
| }, |
| { |
| "epoch": 3.981965734896303, |
| "grad_norm": 0.08874068919252195, |
| "learning_rate": 9.387193333933542e-06, |
| "loss": 0.3901, |
| "step": 276 |
| }, |
| { |
| "epoch": 3.9963931469792606, |
| "grad_norm": 0.10386496694166722, |
| "learning_rate": 9.127855526729518e-06, |
| "loss": 0.4421, |
| "step": 277 |
| }, |
| { |
| "epoch": 4.010820559062219, |
| "grad_norm": 0.17465874605366377, |
| "learning_rate": 8.87168830480385e-06, |
| "loss": 0.5908, |
| "step": 278 |
| }, |
| { |
| "epoch": 4.025247971145176, |
| "grad_norm": 0.10653039126787628, |
| "learning_rate": 8.618717976663316e-06, |
| "loss": 0.3731, |
| "step": 279 |
| }, |
| { |
| "epoch": 4.039675383228134, |
| "grad_norm": 0.09575070816517416, |
| "learning_rate": 8.368970522492064e-06, |
| "loss": 0.368, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.054102795311091, |
| "grad_norm": 0.10014800890252488, |
| "learning_rate": 8.122471591483405e-06, |
| "loss": 0.379, |
| "step": 281 |
| }, |
| { |
| "epoch": 4.068530207394049, |
| "grad_norm": 0.10719719334181581, |
| "learning_rate": 7.879246499205635e-06, |
| "loss": 0.3747, |
| "step": 282 |
| }, |
| { |
| "epoch": 4.082957619477006, |
| "grad_norm": 0.1034029506118448, |
| "learning_rate": 7.639320225002106e-06, |
| "loss": 0.3675, |
| "step": 283 |
| }, |
| { |
| "epoch": 4.097385031559964, |
| "grad_norm": 0.09719883839292859, |
| "learning_rate": 7.402717409425846e-06, |
| "loss": 0.3745, |
| "step": 284 |
| }, |
| { |
| "epoch": 4.111812443642922, |
| "grad_norm": 0.09348527541393592, |
| "learning_rate": 7.169462351708958e-06, |
| "loss": 0.3746, |
| "step": 285 |
| }, |
| { |
| "epoch": 4.1262398557258795, |
| "grad_norm": 0.09622234742870885, |
| "learning_rate": 6.939579007267041e-06, |
| "loss": 0.3669, |
| "step": 286 |
| }, |
| { |
| "epoch": 4.140667267808837, |
| "grad_norm": 0.10062068129651956, |
| "learning_rate": 6.7130909852390504e-06, |
| "loss": 0.377, |
| "step": 287 |
| }, |
| { |
| "epoch": 4.1550946798917945, |
| "grad_norm": 0.09063820087374816, |
| "learning_rate": 6.490021546062495e-06, |
| "loss": 0.3725, |
| "step": 288 |
| }, |
| { |
| "epoch": 4.169522091974752, |
| "grad_norm": 0.0978449706487065, |
| "learning_rate": 6.270393599084719e-06, |
| "loss": 0.3701, |
| "step": 289 |
| }, |
| { |
| "epoch": 4.1839495040577095, |
| "grad_norm": 0.09420394070648874, |
| "learning_rate": 6.054229700209959e-06, |
| "loss": 0.3686, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.198376916140667, |
| "grad_norm": 0.09135183952593588, |
| "learning_rate": 5.841552049582979e-06, |
| "loss": 0.3668, |
| "step": 291 |
| }, |
| { |
| "epoch": 4.2128043282236245, |
| "grad_norm": 0.08941854382744684, |
| "learning_rate": 5.632382489308983e-06, |
| "loss": 0.3753, |
| "step": 292 |
| }, |
| { |
| "epoch": 4.227231740306583, |
| "grad_norm": 0.09033071999727058, |
| "learning_rate": 5.4267425012105e-06, |
| "loss": 0.371, |
| "step": 293 |
| }, |
| { |
| "epoch": 4.24165915238954, |
| "grad_norm": 0.08363022499101917, |
| "learning_rate": 5.224653204621155e-06, |
| "loss": 0.3699, |
| "step": 294 |
| }, |
| { |
| "epoch": 4.256086564472498, |
| "grad_norm": 0.0794997380043983, |
| "learning_rate": 5.026135354216717e-06, |
| "loss": 0.3703, |
| "step": 295 |
| }, |
| { |
| "epoch": 4.270513976555455, |
| "grad_norm": 0.08331150232989441, |
| "learning_rate": 4.8312093378835645e-06, |
| "loss": 0.3729, |
| "step": 296 |
| }, |
| { |
| "epoch": 4.284941388638413, |
| "grad_norm": 0.08516826877199297, |
| "learning_rate": 4.63989517462486e-06, |
| "loss": 0.3757, |
| "step": 297 |
| }, |
| { |
| "epoch": 4.29936880072137, |
| "grad_norm": 0.08386630568073708, |
| "learning_rate": 4.452212512504579e-06, |
| "loss": 0.3766, |
| "step": 298 |
| }, |
| { |
| "epoch": 4.313796212804328, |
| "grad_norm": 0.08120526732790356, |
| "learning_rate": 4.268180626629641e-06, |
| "loss": 0.3751, |
| "step": 299 |
| }, |
| { |
| "epoch": 4.328223624887286, |
| "grad_norm": 0.0797417427617793, |
| "learning_rate": 4.087818417170337e-06, |
| "loss": 0.3711, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.342651036970244, |
| "grad_norm": 0.08091306914486351, |
| "learning_rate": 3.9111444074193e-06, |
| "loss": 0.3704, |
| "step": 301 |
| }, |
| { |
| "epoch": 4.357078449053201, |
| "grad_norm": 0.08277906868820106, |
| "learning_rate": 3.7381767418891303e-06, |
| "loss": 0.3736, |
| "step": 302 |
| }, |
| { |
| "epoch": 4.371505861136159, |
| "grad_norm": 0.08109844725998167, |
| "learning_rate": 3.568933184448944e-06, |
| "loss": 0.3679, |
| "step": 303 |
| }, |
| { |
| "epoch": 4.385933273219116, |
| "grad_norm": 0.076043565671384, |
| "learning_rate": 3.403431116500038e-06, |
| "loss": 0.3737, |
| "step": 304 |
| }, |
| { |
| "epoch": 4.400360685302074, |
| "grad_norm": 0.0786325856472425, |
| "learning_rate": 3.241687535190776e-06, |
| "loss": 0.3722, |
| "step": 305 |
| }, |
| { |
| "epoch": 4.414788097385031, |
| "grad_norm": 0.07882441543843179, |
| "learning_rate": 3.08371905167101e-06, |
| "loss": 0.3746, |
| "step": 306 |
| }, |
| { |
| "epoch": 4.429215509467989, |
| "grad_norm": 0.0813528283180034, |
| "learning_rate": 2.929541889386056e-06, |
| "loss": 0.3698, |
| "step": 307 |
| }, |
| { |
| "epoch": 4.443642921550947, |
| "grad_norm": 0.07778147610676125, |
| "learning_rate": 2.7791718824106186e-06, |
| "loss": 0.3747, |
| "step": 308 |
| }, |
| { |
| "epoch": 4.458070333633905, |
| "grad_norm": 0.07497215994153009, |
| "learning_rate": 2.6326244738225183e-06, |
| "loss": 0.3793, |
| "step": 309 |
| }, |
| { |
| "epoch": 4.472497745716862, |
| "grad_norm": 0.0751254260494879, |
| "learning_rate": 2.489914714116788e-06, |
| "loss": 0.3707, |
| "step": 310 |
| }, |
| { |
| "epoch": 4.48692515779982, |
| "grad_norm": 0.0748304985473765, |
| "learning_rate": 2.3510572596598678e-06, |
| "loss": 0.3728, |
| "step": 311 |
| }, |
| { |
| "epoch": 4.501352569882777, |
| "grad_norm": 0.07793338755657392, |
| "learning_rate": 2.2160663711845176e-06, |
| "loss": 0.3733, |
| "step": 312 |
| }, |
| { |
| "epoch": 4.515779981965735, |
| "grad_norm": 0.07545418335066799, |
| "learning_rate": 2.084955912325093e-06, |
| "loss": 0.3663, |
| "step": 313 |
| }, |
| { |
| "epoch": 4.530207394048692, |
| "grad_norm": 0.0784362383534773, |
| "learning_rate": 1.957739348193859e-06, |
| "loss": 0.3694, |
| "step": 314 |
| }, |
| { |
| "epoch": 4.544634806131651, |
| "grad_norm": 0.07501282928300265, |
| "learning_rate": 1.8344297439980475e-06, |
| "loss": 0.3739, |
| "step": 315 |
| }, |
| { |
| "epoch": 4.559062218214608, |
| "grad_norm": 0.07184746061800508, |
| "learning_rate": 1.715039763698081e-06, |
| "loss": 0.372, |
| "step": 316 |
| }, |
| { |
| "epoch": 4.573489630297566, |
| "grad_norm": 0.0768286859431056, |
| "learning_rate": 1.5995816687069687e-06, |
| "loss": 0.367, |
| "step": 317 |
| }, |
| { |
| "epoch": 4.587917042380523, |
| "grad_norm": 0.07399515759551432, |
| "learning_rate": 1.4880673166310612e-06, |
| "loss": 0.3734, |
| "step": 318 |
| }, |
| { |
| "epoch": 4.602344454463481, |
| "grad_norm": 0.07232964813350647, |
| "learning_rate": 1.3805081600522585e-06, |
| "loss": 0.3697, |
| "step": 319 |
| }, |
| { |
| "epoch": 4.616771866546438, |
| "grad_norm": 0.07389169724144744, |
| "learning_rate": 1.276915245351833e-06, |
| "loss": 0.3666, |
| "step": 320 |
| }, |
| { |
| "epoch": 4.631199278629396, |
| "grad_norm": 0.071013638237935, |
| "learning_rate": 1.1772992115759351e-06, |
| "loss": 0.3704, |
| "step": 321 |
| }, |
| { |
| "epoch": 4.645626690712353, |
| "grad_norm": 0.07478471657750946, |
| "learning_rate": 1.081670289343002e-06, |
| "loss": 0.372, |
| "step": 322 |
| }, |
| { |
| "epoch": 4.660054102795311, |
| "grad_norm": 0.0713434887145259, |
| "learning_rate": 9.900382997930413e-07, |
| "loss": 0.3754, |
| "step": 323 |
| }, |
| { |
| "epoch": 4.674481514878269, |
| "grad_norm": 0.0730981387918326, |
| "learning_rate": 9.024126535789812e-07, |
| "loss": 0.3684, |
| "step": 324 |
| }, |
| { |
| "epoch": 4.6889089269612265, |
| "grad_norm": 0.07158489318241744, |
| "learning_rate": 8.188023499002206e-07, |
| "loss": 0.3808, |
| "step": 325 |
| }, |
| { |
| "epoch": 4.703336339044184, |
| "grad_norm": 0.07013751377947393, |
| "learning_rate": 7.392159755783957e-07, |
| "loss": 0.3626, |
| "step": 326 |
| }, |
| { |
| "epoch": 4.7177637511271415, |
| "grad_norm": 0.07150689349177662, |
| "learning_rate": 6.636617041754978e-07, |
| "loss": 0.3723, |
| "step": 327 |
| }, |
| { |
| "epoch": 4.732191163210099, |
| "grad_norm": 0.07003675588222115, |
| "learning_rate": 5.921472951544527e-07, |
| "loss": 0.3689, |
| "step": 328 |
| }, |
| { |
| "epoch": 4.7466185752930565, |
| "grad_norm": 0.06957634634931112, |
| "learning_rate": 5.246800930822371e-07, |
| "loss": 0.3751, |
| "step": 329 |
| }, |
| { |
| "epoch": 4.761045987376015, |
| "grad_norm": 0.07044531133634477, |
| "learning_rate": 4.6126702687554483e-07, |
| "loss": 0.371, |
| "step": 330 |
| }, |
| { |
| "epoch": 4.775473399458972, |
| "grad_norm": 0.07077686025594564, |
| "learning_rate": 4.0191460908923563e-07, |
| "loss": 0.3676, |
| "step": 331 |
| }, |
| { |
| "epoch": 4.78990081154193, |
| "grad_norm": 0.07253004421887527, |
| "learning_rate": 3.4662893524745276e-07, |
| "loss": 0.3781, |
| "step": 332 |
| }, |
| { |
| "epoch": 4.804328223624887, |
| "grad_norm": 0.07510496456067554, |
| "learning_rate": 2.954156832176214e-07, |
| "loss": 0.3783, |
| "step": 333 |
| }, |
| { |
| "epoch": 4.818755635707845, |
| "grad_norm": 0.0706180896307413, |
| "learning_rate": 2.482801126273371e-07, |
| "loss": 0.371, |
| "step": 334 |
| }, |
| { |
| "epoch": 4.833183047790802, |
| "grad_norm": 0.06847497940017412, |
| "learning_rate": 2.0522706432419382e-07, |
| "loss": 0.3702, |
| "step": 335 |
| }, |
| { |
| "epoch": 4.84761045987376, |
| "grad_norm": 0.07040073406374138, |
| "learning_rate": 1.6626095987862134e-07, |
| "loss": 0.3703, |
| "step": 336 |
| }, |
| { |
| "epoch": 4.862037871956717, |
| "grad_norm": 0.06922886880619938, |
| "learning_rate": 1.3138580112979083e-07, |
| "loss": 0.3693, |
| "step": 337 |
| }, |
| { |
| "epoch": 4.876465284039675, |
| "grad_norm": 0.07071379713825318, |
| "learning_rate": 1.0060516977462797e-07, |
| "loss": 0.3683, |
| "step": 338 |
| }, |
| { |
| "epoch": 4.890892696122633, |
| "grad_norm": 0.0707082821934966, |
| "learning_rate": 7.39222269999651e-08, |
| "loss": 0.3795, |
| "step": 339 |
| }, |
| { |
| "epoch": 4.905320108205591, |
| "grad_norm": 0.06885902870580198, |
| "learning_rate": 5.133971315788966e-08, |
| "loss": 0.3671, |
| "step": 340 |
| }, |
| { |
| "epoch": 4.919747520288548, |
| "grad_norm": 0.06873108316208869, |
| "learning_rate": 3.285994748430721e-08, |
| "loss": 0.3738, |
| "step": 341 |
| }, |
| { |
| "epoch": 4.934174932371506, |
| "grad_norm": 0.06944038886061686, |
| "learning_rate": 1.8484827860754118e-08, |
| "loss": 0.3691, |
| "step": 342 |
| }, |
| { |
| "epoch": 4.948602344454463, |
| "grad_norm": 0.07137425763584286, |
| "learning_rate": 8.215830619486831e-09, |
| "loss": 0.3709, |
| "step": 343 |
| }, |
| { |
| "epoch": 4.963029756537421, |
| "grad_norm": 0.0716171836993154, |
| "learning_rate": 2.054010391856487e-09, |
| "loss": 0.3704, |
| "step": 344 |
| }, |
| { |
| "epoch": 4.977457168620378, |
| "grad_norm": 0.0713787661901706, |
| "learning_rate": 0.0, |
| "loss": 0.3736, |
| "step": 345 |
| }, |
| { |
| "epoch": 4.977457168620378, |
| "step": 345, |
| "total_flos": 9.173613467414823e+18, |
| "train_loss": 0.4462836230146712, |
| "train_runtime": 80545.5288, |
| "train_samples_per_second": 2.202, |
| "train_steps_per_second": 0.004 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 345, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.173613467414823e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|