| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9989187240944313, |
| "eval_steps": 500, |
| "global_step": 5547, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005406379527842855, |
| "grad_norm": 5.67321238470604, |
| "learning_rate": 1.801801801801802e-07, |
| "loss": 0.8785, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01081275905568571, |
| "grad_norm": 5.2575759647356906, |
| "learning_rate": 3.603603603603604e-07, |
| "loss": 0.8654, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.016219138583528563, |
| "grad_norm": 3.8360253130807958, |
| "learning_rate": 5.405405405405406e-07, |
| "loss": 0.8205, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02162551811137142, |
| "grad_norm": 1.722668988638544, |
| "learning_rate": 7.207207207207208e-07, |
| "loss": 0.778, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.027031897639214274, |
| "grad_norm": 1.3197714991034968, |
| "learning_rate": 9.00900900900901e-07, |
| "loss": 0.7286, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03243827716705713, |
| "grad_norm": 0.8474482237034886, |
| "learning_rate": 1.0810810810810812e-06, |
| "loss": 0.6968, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.03784465669489998, |
| "grad_norm": 0.5645420283585227, |
| "learning_rate": 1.2612612612612613e-06, |
| "loss": 0.6689, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04325103622274284, |
| "grad_norm": 0.43605656948964683, |
| "learning_rate": 1.4414414414414416e-06, |
| "loss": 0.6408, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.04865741575058569, |
| "grad_norm": 0.4339497028480959, |
| "learning_rate": 1.6216216216216219e-06, |
| "loss": 0.6153, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05406379527842855, |
| "grad_norm": 0.3843592033040236, |
| "learning_rate": 1.801801801801802e-06, |
| "loss": 0.6082, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0594701748062714, |
| "grad_norm": 0.37685068673558353, |
| "learning_rate": 1.9819819819819822e-06, |
| "loss": 0.6049, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06487655433411425, |
| "grad_norm": 0.4392453448959536, |
| "learning_rate": 2.1621621621621623e-06, |
| "loss": 0.5889, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07028293386195711, |
| "grad_norm": 0.4212233804351266, |
| "learning_rate": 2.3423423423423424e-06, |
| "loss": 0.5842, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07568931338979996, |
| "grad_norm": 0.38709432000579613, |
| "learning_rate": 2.5225225225225225e-06, |
| "loss": 0.592, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08109569291764282, |
| "grad_norm": 0.3988233764060424, |
| "learning_rate": 2.702702702702703e-06, |
| "loss": 0.5732, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08650207244548568, |
| "grad_norm": 0.41395637177292804, |
| "learning_rate": 2.882882882882883e-06, |
| "loss": 0.5679, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09190845197332853, |
| "grad_norm": 0.37677030114794524, |
| "learning_rate": 3.063063063063063e-06, |
| "loss": 0.5583, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09731483150117139, |
| "grad_norm": 0.38451911721974225, |
| "learning_rate": 3.2432432432432437e-06, |
| "loss": 0.5658, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.10272121102901424, |
| "grad_norm": 0.36190379869625294, |
| "learning_rate": 3.423423423423424e-06, |
| "loss": 0.5554, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1081275905568571, |
| "grad_norm": 0.3927866832932917, |
| "learning_rate": 3.603603603603604e-06, |
| "loss": 0.5534, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11353397008469994, |
| "grad_norm": 0.4109637951464883, |
| "learning_rate": 3.7837837837837844e-06, |
| "loss": 0.5527, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1189403496125428, |
| "grad_norm": 0.4189875109517182, |
| "learning_rate": 3.9639639639639645e-06, |
| "loss": 0.5521, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12434672914038565, |
| "grad_norm": 0.44103289873218365, |
| "learning_rate": 4.1441441441441446e-06, |
| "loss": 0.55, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1297531086682285, |
| "grad_norm": 0.47624121719255225, |
| "learning_rate": 4.324324324324325e-06, |
| "loss": 0.5455, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.13515948819607138, |
| "grad_norm": 0.4127382950104387, |
| "learning_rate": 4.504504504504505e-06, |
| "loss": 0.5392, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.14056586772391422, |
| "grad_norm": 0.42849081039324655, |
| "learning_rate": 4.684684684684685e-06, |
| "loss": 0.5317, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1459722472517571, |
| "grad_norm": 0.4104060308344588, |
| "learning_rate": 4.864864864864866e-06, |
| "loss": 0.5317, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.15137862677959993, |
| "grad_norm": 0.5046982359974199, |
| "learning_rate": 5.045045045045045e-06, |
| "loss": 0.5342, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.15678500630744277, |
| "grad_norm": 0.4507880118410215, |
| "learning_rate": 5.225225225225226e-06, |
| "loss": 0.5325, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.16219138583528564, |
| "grad_norm": 0.42877102726223915, |
| "learning_rate": 5.405405405405406e-06, |
| "loss": 0.5236, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16759776536312848, |
| "grad_norm": 0.5283894117116334, |
| "learning_rate": 5.585585585585585e-06, |
| "loss": 0.5316, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17300414489097135, |
| "grad_norm": 0.45448942603717846, |
| "learning_rate": 5.765765765765766e-06, |
| "loss": 0.5304, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1784105244188142, |
| "grad_norm": 0.4459611601163911, |
| "learning_rate": 5.945945945945947e-06, |
| "loss": 0.5307, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.18381690394665706, |
| "grad_norm": 0.4167802385045301, |
| "learning_rate": 6.126126126126126e-06, |
| "loss": 0.5142, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.1892232834744999, |
| "grad_norm": 0.45167071134408077, |
| "learning_rate": 6.3063063063063065e-06, |
| "loss": 0.5252, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.19462966300234277, |
| "grad_norm": 0.3815004250489287, |
| "learning_rate": 6.486486486486487e-06, |
| "loss": 0.5203, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.20003604253018561, |
| "grad_norm": 0.4189611440474181, |
| "learning_rate": 6.666666666666667e-06, |
| "loss": 0.5198, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.20544242205802848, |
| "grad_norm": 0.4356383135556994, |
| "learning_rate": 6.846846846846848e-06, |
| "loss": 0.5164, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.21084880158587133, |
| "grad_norm": 0.4146665581812368, |
| "learning_rate": 7.027027027027028e-06, |
| "loss": 0.5201, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2162551811137142, |
| "grad_norm": 0.46098403607909094, |
| "learning_rate": 7.207207207207208e-06, |
| "loss": 0.5241, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.22166156064155704, |
| "grad_norm": 0.4173832279688485, |
| "learning_rate": 7.387387387387388e-06, |
| "loss": 0.5141, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.22706794016939988, |
| "grad_norm": 0.45342411753034784, |
| "learning_rate": 7.567567567567569e-06, |
| "loss": 0.5058, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.23247431969724275, |
| "grad_norm": 0.5556218847582134, |
| "learning_rate": 7.747747747747749e-06, |
| "loss": 0.5132, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2378806992250856, |
| "grad_norm": 0.4159604294450067, |
| "learning_rate": 7.927927927927929e-06, |
| "loss": 0.5116, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.24328707875292846, |
| "grad_norm": 0.5011827344554423, |
| "learning_rate": 8.108108108108109e-06, |
| "loss": 0.5168, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2486934582807713, |
| "grad_norm": 0.4837033851909487, |
| "learning_rate": 8.288288288288289e-06, |
| "loss": 0.5078, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.25409983780861417, |
| "grad_norm": 0.43704376571990733, |
| "learning_rate": 8.46846846846847e-06, |
| "loss": 0.5033, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.259506217336457, |
| "grad_norm": 0.3998543920237395, |
| "learning_rate": 8.64864864864865e-06, |
| "loss": 0.5023, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.26491259686429985, |
| "grad_norm": 0.5026204387708488, |
| "learning_rate": 8.82882882882883e-06, |
| "loss": 0.5101, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.27031897639214275, |
| "grad_norm": 0.5354755864920291, |
| "learning_rate": 9.00900900900901e-06, |
| "loss": 0.508, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.2757253559199856, |
| "grad_norm": 0.4703091181508223, |
| "learning_rate": 9.189189189189191e-06, |
| "loss": 0.5057, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.28113173544782843, |
| "grad_norm": 0.5066877793509437, |
| "learning_rate": 9.36936936936937e-06, |
| "loss": 0.5026, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.2865381149756713, |
| "grad_norm": 0.46090960041448786, |
| "learning_rate": 9.54954954954955e-06, |
| "loss": 0.5106, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.2919444945035142, |
| "grad_norm": 0.48562395925030005, |
| "learning_rate": 9.729729729729732e-06, |
| "loss": 0.4974, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.297350874031357, |
| "grad_norm": 0.4646077201771921, |
| "learning_rate": 9.90990990990991e-06, |
| "loss": 0.4999, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.30275725355919986, |
| "grad_norm": 0.4546070354869126, |
| "learning_rate": 9.999975246862685e-06, |
| "loss": 0.5103, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3081636330870427, |
| "grad_norm": 0.4529892857679444, |
| "learning_rate": 9.999777223234682e-06, |
| "loss": 0.5015, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.31357001261488554, |
| "grad_norm": 0.42533238661448763, |
| "learning_rate": 9.999381183821387e-06, |
| "loss": 0.5079, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.31897639214272844, |
| "grad_norm": 0.4319966793689572, |
| "learning_rate": 9.998787144307906e-06, |
| "loss": 0.4946, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.3243827716705713, |
| "grad_norm": 0.5664739889982127, |
| "learning_rate": 9.997995128221131e-06, |
| "loss": 0.4963, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.3297891511984141, |
| "grad_norm": 0.4571640893613164, |
| "learning_rate": 9.9970051669288e-06, |
| "loss": 0.4937, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.33519553072625696, |
| "grad_norm": 0.46148944851299945, |
| "learning_rate": 9.995817299638244e-06, |
| "loss": 0.5002, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.34060191025409986, |
| "grad_norm": 0.4844168889608816, |
| "learning_rate": 9.994431573394861e-06, |
| "loss": 0.5029, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.3460082897819427, |
| "grad_norm": 0.4279693386473206, |
| "learning_rate": 9.99284804308023e-06, |
| "loss": 0.4952, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.35141466930978554, |
| "grad_norm": 0.5233101609153901, |
| "learning_rate": 9.991066771409941e-06, |
| "loss": 0.4915, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.3568210488376284, |
| "grad_norm": 0.4633208414221673, |
| "learning_rate": 9.989087828931121e-06, |
| "loss": 0.4981, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3622274283654713, |
| "grad_norm": 0.450997223108701, |
| "learning_rate": 9.986911294019631e-06, |
| "loss": 0.4975, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3676338078933141, |
| "grad_norm": 0.42452529740346523, |
| "learning_rate": 9.984537252876969e-06, |
| "loss": 0.4908, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.37304018742115697, |
| "grad_norm": 0.46365207035760786, |
| "learning_rate": 9.981965799526846e-06, |
| "loss": 0.5016, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.3784465669489998, |
| "grad_norm": 0.5296232726547591, |
| "learning_rate": 9.97919703581147e-06, |
| "loss": 0.4876, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.38385294647684265, |
| "grad_norm": 0.401880074927354, |
| "learning_rate": 9.976231071387513e-06, |
| "loss": 0.4903, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.38925932600468555, |
| "grad_norm": 0.42396559048043103, |
| "learning_rate": 9.973068023721761e-06, |
| "loss": 0.4898, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.3946657055325284, |
| "grad_norm": 0.46944427807049693, |
| "learning_rate": 9.969708018086472e-06, |
| "loss": 0.4881, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.40007208506037123, |
| "grad_norm": 0.4333253518146232, |
| "learning_rate": 9.966151187554403e-06, |
| "loss": 0.4895, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.40547846458821407, |
| "grad_norm": 0.37661719489991125, |
| "learning_rate": 9.962397672993552e-06, |
| "loss": 0.487, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.41088484411605697, |
| "grad_norm": 0.4603392631171023, |
| "learning_rate": 9.958447623061564e-06, |
| "loss": 0.4872, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4162912236438998, |
| "grad_norm": 0.3927558003883759, |
| "learning_rate": 9.954301194199864e-06, |
| "loss": 0.4903, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.42169760317174265, |
| "grad_norm": 0.42897879593990096, |
| "learning_rate": 9.949958550627436e-06, |
| "loss": 0.4885, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.4271039826995855, |
| "grad_norm": 0.4924374446694773, |
| "learning_rate": 9.945419864334344e-06, |
| "loss": 0.4774, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4325103622274284, |
| "grad_norm": 0.42518945879483444, |
| "learning_rate": 9.940685315074898e-06, |
| "loss": 0.4754, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.43791674175527123, |
| "grad_norm": 0.399260485682431, |
| "learning_rate": 9.935755090360554e-06, |
| "loss": 0.4765, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4433231212831141, |
| "grad_norm": 0.37083672732602235, |
| "learning_rate": 9.930629385452475e-06, |
| "loss": 0.4757, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.4487295008109569, |
| "grad_norm": 0.41759222116367195, |
| "learning_rate": 9.925308403353801e-06, |
| "loss": 0.4871, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.45413588033879976, |
| "grad_norm": 0.4969932090759188, |
| "learning_rate": 9.919792354801614e-06, |
| "loss": 0.4792, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.45954225986664266, |
| "grad_norm": 0.5029960802938596, |
| "learning_rate": 9.914081458258582e-06, |
| "loss": 0.4896, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.4649486393944855, |
| "grad_norm": 0.40244747307174517, |
| "learning_rate": 9.908175939904317e-06, |
| "loss": 0.492, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.47035501892232834, |
| "grad_norm": 0.4109529990790928, |
| "learning_rate": 9.902076033626409e-06, |
| "loss": 0.4863, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.4757613984501712, |
| "grad_norm": 0.4151789891424962, |
| "learning_rate": 9.89578198101117e-06, |
| "loss": 0.48, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.4811677779780141, |
| "grad_norm": 0.4884869421566706, |
| "learning_rate": 9.88929403133406e-06, |
| "loss": 0.4875, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.4865741575058569, |
| "grad_norm": 0.39469839728031286, |
| "learning_rate": 9.882612441549817e-06, |
| "loss": 0.4886, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.49198053703369976, |
| "grad_norm": 0.41142281651530643, |
| "learning_rate": 9.875737476282283e-06, |
| "loss": 0.4837, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4973869165615426, |
| "grad_norm": 0.4420691443729092, |
| "learning_rate": 9.868669407813919e-06, |
| "loss": 0.4877, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5027932960893855, |
| "grad_norm": 0.37836126000922937, |
| "learning_rate": 9.86140851607502e-06, |
| "loss": 0.4826, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5081996756172283, |
| "grad_norm": 0.42066137745562854, |
| "learning_rate": 9.85395508863264e-06, |
| "loss": 0.4827, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5136060551450712, |
| "grad_norm": 0.45522508321704436, |
| "learning_rate": 9.846309420679181e-06, |
| "loss": 0.4807, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.519012434672914, |
| "grad_norm": 0.424109403832704, |
| "learning_rate": 9.838471815020731e-06, |
| "loss": 0.483, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5244188142007569, |
| "grad_norm": 0.4571075574503357, |
| "learning_rate": 9.830442582065046e-06, |
| "loss": 0.4847, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5298251937285997, |
| "grad_norm": 0.39544147521974715, |
| "learning_rate": 9.822222039809265e-06, |
| "loss": 0.4894, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5352315732564425, |
| "grad_norm": 0.41512982878770877, |
| "learning_rate": 9.813810513827324e-06, |
| "loss": 0.4757, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5406379527842855, |
| "grad_norm": 0.44241530882704766, |
| "learning_rate": 9.805208337257048e-06, |
| "loss": 0.4844, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.5460443323121283, |
| "grad_norm": 0.39829234416158904, |
| "learning_rate": 9.79641585078697e-06, |
| "loss": 0.4712, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.5514507118399712, |
| "grad_norm": 0.37741532471866907, |
| "learning_rate": 9.787433402642823e-06, |
| "loss": 0.4793, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.556857091367814, |
| "grad_norm": 0.4148300916885638, |
| "learning_rate": 9.778261348573766e-06, |
| "loss": 0.4838, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.5622634708956569, |
| "grad_norm": 0.4432803310345476, |
| "learning_rate": 9.76890005183828e-06, |
| "loss": 0.4808, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.5676698504234997, |
| "grad_norm": 0.44053440283249773, |
| "learning_rate": 9.759349883189788e-06, |
| "loss": 0.4855, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.5730762299513426, |
| "grad_norm": 0.47129417304470445, |
| "learning_rate": 9.749611220861975e-06, |
| "loss": 0.4825, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.5784826094791854, |
| "grad_norm": 0.3519052622952217, |
| "learning_rate": 9.739684450553796e-06, |
| "loss": 0.4672, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.5838889890070283, |
| "grad_norm": 0.41946435282373756, |
| "learning_rate": 9.729569965414214e-06, |
| "loss": 0.4749, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.5892953685348712, |
| "grad_norm": 0.40367405116733107, |
| "learning_rate": 9.719268166026619e-06, |
| "loss": 0.4714, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.594701748062714, |
| "grad_norm": 0.389163994716956, |
| "learning_rate": 9.70877946039297e-06, |
| "loss": 0.4762, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6001081275905569, |
| "grad_norm": 0.3924144038563765, |
| "learning_rate": 9.698104263917632e-06, |
| "loss": 0.479, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6055145071183997, |
| "grad_norm": 0.38077440580004723, |
| "learning_rate": 9.687242999390923e-06, |
| "loss": 0.4743, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6109208866462426, |
| "grad_norm": 0.4144915670436874, |
| "learning_rate": 9.676196096972375e-06, |
| "loss": 0.4831, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6163272661740854, |
| "grad_norm": 0.4019523099418982, |
| "learning_rate": 9.664963994173695e-06, |
| "loss": 0.4811, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6217336457019282, |
| "grad_norm": 0.3870772083799463, |
| "learning_rate": 9.653547135841432e-06, |
| "loss": 0.482, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6271400252297711, |
| "grad_norm": 0.3774486403943126, |
| "learning_rate": 9.641945974139368e-06, |
| "loss": 0.4808, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.632546404757614, |
| "grad_norm": 0.3669418201630717, |
| "learning_rate": 9.630160968530601e-06, |
| "loss": 0.4742, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.6379527842854569, |
| "grad_norm": 0.3767330377559856, |
| "learning_rate": 9.618192585759358e-06, |
| "loss": 0.4793, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.6433591638132997, |
| "grad_norm": 0.4109728050110914, |
| "learning_rate": 9.606041299832499e-06, |
| "loss": 0.476, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.6487655433411426, |
| "grad_norm": 0.42214280261521075, |
| "learning_rate": 9.593707592000751e-06, |
| "loss": 0.4719, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.6541719228689854, |
| "grad_norm": 0.40015675805718526, |
| "learning_rate": 9.581191950739651e-06, |
| "loss": 0.4802, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.6595783023968282, |
| "grad_norm": 0.3652325798758447, |
| "learning_rate": 9.568494871730184e-06, |
| "loss": 0.4751, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.6649846819246711, |
| "grad_norm": 0.4758040665812572, |
| "learning_rate": 9.555616857839171e-06, |
| "loss": 0.476, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.6703910614525139, |
| "grad_norm": 0.4088256926011169, |
| "learning_rate": 9.542558419099348e-06, |
| "loss": 0.4671, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.6757974409803568, |
| "grad_norm": 0.3777516778350075, |
| "learning_rate": 9.529320072689157e-06, |
| "loss": 0.4663, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.6812038205081997, |
| "grad_norm": 0.40279858714603456, |
| "learning_rate": 9.515902342912268e-06, |
| "loss": 0.4696, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.6866102000360426, |
| "grad_norm": 0.4553420901856075, |
| "learning_rate": 9.50230576117682e-06, |
| "loss": 0.4742, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.6920165795638854, |
| "grad_norm": 0.4339586123054069, |
| "learning_rate": 9.488530865974365e-06, |
| "loss": 0.4701, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.6974229590917282, |
| "grad_norm": 0.4249972919470697, |
| "learning_rate": 9.47457820285855e-06, |
| "loss": 0.4701, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7028293386195711, |
| "grad_norm": 0.5108244833979698, |
| "learning_rate": 9.460448324423508e-06, |
| "loss": 0.4767, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7082357181474139, |
| "grad_norm": 0.41029950466124815, |
| "learning_rate": 9.446141790281961e-06, |
| "loss": 0.4757, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7136420976752568, |
| "grad_norm": 0.395665406767247, |
| "learning_rate": 9.431659167043079e-06, |
| "loss": 0.4657, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7190484772030996, |
| "grad_norm": 0.3916187354896928, |
| "learning_rate": 9.417001028290019e-06, |
| "loss": 0.47, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7244548567309426, |
| "grad_norm": 0.3841663885450239, |
| "learning_rate": 9.402167954557218e-06, |
| "loss": 0.4622, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7298612362587854, |
| "grad_norm": 0.33000158409293234, |
| "learning_rate": 9.387160533307398e-06, |
| "loss": 0.4735, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7352676157866282, |
| "grad_norm": 0.35110054752545317, |
| "learning_rate": 9.371979358908302e-06, |
| "loss": 0.4647, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.7406739953144711, |
| "grad_norm": 0.4060026085740451, |
| "learning_rate": 9.356625032609157e-06, |
| "loss": 0.4716, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.7460803748423139, |
| "grad_norm": 0.4014001214789219, |
| "learning_rate": 9.341098162516848e-06, |
| "loss": 0.4753, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.7514867543701568, |
| "grad_norm": 0.4466537387424745, |
| "learning_rate": 9.325399363571853e-06, |
| "loss": 0.4637, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.7568931338979996, |
| "grad_norm": 0.3789496760613153, |
| "learning_rate": 9.309529257523873e-06, |
| "loss": 0.4833, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.7622995134258425, |
| "grad_norm": 0.3871711262176569, |
| "learning_rate": 9.293488472907213e-06, |
| "loss": 0.4741, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.7677058929536853, |
| "grad_norm": 0.33522935773230744, |
| "learning_rate": 9.277277645015895e-06, |
| "loss": 0.4645, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.7731122724815283, |
| "grad_norm": 0.36926574454217775, |
| "learning_rate": 9.260897415878484e-06, |
| "loss": 0.4737, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.7785186520093711, |
| "grad_norm": 0.38628683202935965, |
| "learning_rate": 9.244348434232676e-06, |
| "loss": 0.4807, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.7839250315372139, |
| "grad_norm": 0.3723802508008121, |
| "learning_rate": 9.227631355499588e-06, |
| "loss": 0.4711, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.7893314110650568, |
| "grad_norm": 0.43275316141725356, |
| "learning_rate": 9.210746841757816e-06, |
| "loss": 0.4606, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.7947377905928996, |
| "grad_norm": 0.36470233384616396, |
| "learning_rate": 9.193695561717207e-06, |
| "loss": 0.4789, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8001441701207425, |
| "grad_norm": 0.39548085338311784, |
| "learning_rate": 9.176478190692369e-06, |
| "loss": 0.4713, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8055505496485853, |
| "grad_norm": 0.3553750033222167, |
| "learning_rate": 9.159095410575931e-06, |
| "loss": 0.4725, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8109569291764281, |
| "grad_norm": 0.3637209745858356, |
| "learning_rate": 9.14154790981154e-06, |
| "loss": 0.4594, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.816363308704271, |
| "grad_norm": 0.3827679215177506, |
| "learning_rate": 9.12383638336659e-06, |
| "loss": 0.4731, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.8217696882321139, |
| "grad_norm": 0.3932319357502074, |
| "learning_rate": 9.105961532704695e-06, |
| "loss": 0.4744, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.8271760677599568, |
| "grad_norm": 0.37420610924572006, |
| "learning_rate": 9.08792406575792e-06, |
| "loss": 0.4596, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8325824472877996, |
| "grad_norm": 0.36958869694379687, |
| "learning_rate": 9.069724696898727e-06, |
| "loss": 0.4644, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8379888268156425, |
| "grad_norm": 0.4296266126218128, |
| "learning_rate": 9.051364146911696e-06, |
| "loss": 0.4695, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.8433952063434853, |
| "grad_norm": 0.3552866307907092, |
| "learning_rate": 9.03284314296497e-06, |
| "loss": 0.4699, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.8488015858713281, |
| "grad_norm": 0.36327016829544306, |
| "learning_rate": 9.01416241858146e-06, |
| "loss": 0.4669, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.854207965399171, |
| "grad_norm": 0.375420429355353, |
| "learning_rate": 8.995322713609792e-06, |
| "loss": 0.4672, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.8596143449270138, |
| "grad_norm": 0.5173900256611019, |
| "learning_rate": 8.976324774195005e-06, |
| "loss": 0.4683, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.8650207244548568, |
| "grad_norm": 0.39427484151317893, |
| "learning_rate": 8.957169352749005e-06, |
| "loss": 0.4652, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.8704271039826996, |
| "grad_norm": 0.4127231026821577, |
| "learning_rate": 8.937857207920751e-06, |
| "loss": 0.4693, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.8758334835105425, |
| "grad_norm": 0.3557084122875894, |
| "learning_rate": 8.918389104566232e-06, |
| "loss": 0.4653, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.8812398630383853, |
| "grad_norm": 0.32279027303173025, |
| "learning_rate": 8.898765813718155e-06, |
| "loss": 0.4575, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.8866462425662281, |
| "grad_norm": 0.3597815860403744, |
| "learning_rate": 8.878988112555415e-06, |
| "loss": 0.4635, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.892052622094071, |
| "grad_norm": 0.3672011391559523, |
| "learning_rate": 8.85905678437232e-06, |
| "loss": 0.4637, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.8974590016219138, |
| "grad_norm": 0.39802107641409196, |
| "learning_rate": 8.838972618547561e-06, |
| "loss": 0.4668, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9028653811497567, |
| "grad_norm": 0.35901725656975336, |
| "learning_rate": 8.81873641051295e-06, |
| "loss": 0.4626, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.9082717606775995, |
| "grad_norm": 0.45574284613082794, |
| "learning_rate": 8.798348961721925e-06, |
| "loss": 0.4618, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.9136781402054425, |
| "grad_norm": 0.33960849857370073, |
| "learning_rate": 8.777811079617793e-06, |
| "loss": 0.4735, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.9190845197332853, |
| "grad_norm": 0.36806947123886746, |
| "learning_rate": 8.757123577601771e-06, |
| "loss": 0.4642, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.9244908992611282, |
| "grad_norm": 0.36728162811734544, |
| "learning_rate": 8.736287275000755e-06, |
| "loss": 0.465, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.929897278788971, |
| "grad_norm": 0.38164336488797146, |
| "learning_rate": 8.715302997034876e-06, |
| "loss": 0.4702, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.9353036583168138, |
| "grad_norm": 0.34605322849280384, |
| "learning_rate": 8.694171574784818e-06, |
| "loss": 0.4674, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9407100378446567, |
| "grad_norm": 0.3353439147558085, |
| "learning_rate": 8.672893845158908e-06, |
| "loss": 0.4701, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9461164173724995, |
| "grad_norm": 0.3437002297587831, |
| "learning_rate": 8.651470650859955e-06, |
| "loss": 0.4599, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.9515227969003424, |
| "grad_norm": 0.3431363969879203, |
| "learning_rate": 8.629902840351898e-06, |
| "loss": 0.4637, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.9569291764281853, |
| "grad_norm": 0.3765462141591892, |
| "learning_rate": 8.608191267826179e-06, |
| "loss": 0.4694, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.9623355559560282, |
| "grad_norm": 0.420048049416004, |
| "learning_rate": 8.586336793167926e-06, |
| "loss": 0.4641, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 0.412279889648995, |
| "learning_rate": 8.5643402819219e-06, |
| "loss": 0.4566, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.9731483150117138, |
| "grad_norm": 0.3299568555620076, |
| "learning_rate": 8.542202605258204e-06, |
| "loss": 0.463, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.9785546945395567, |
| "grad_norm": 0.32198105439404867, |
| "learning_rate": 8.519924639937786e-06, |
| "loss": 0.4617, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.9839610740673995, |
| "grad_norm": 0.3549245136848414, |
| "learning_rate": 8.49750726827772e-06, |
| "loss": 0.4565, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.9893674535952424, |
| "grad_norm": 0.3392271575380573, |
| "learning_rate": 8.474951378116253e-06, |
| "loss": 0.4639, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.9947738331230852, |
| "grad_norm": 0.3208227345701, |
| "learning_rate": 8.452257862777653e-06, |
| "loss": 0.4546, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.000180212650928, |
| "grad_norm": 0.4559641919273857, |
| "learning_rate": 8.42942762103681e-06, |
| "loss": 0.4837, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.005586592178771, |
| "grad_norm": 0.3598410288175877, |
| "learning_rate": 8.406461557083666e-06, |
| "loss": 0.4404, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.0109929717066137, |
| "grad_norm": 0.3857145460836866, |
| "learning_rate": 8.383360580487378e-06, |
| "loss": 0.4393, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.0163993512344567, |
| "grad_norm": 0.34505752597289024, |
| "learning_rate": 8.360125606160323e-06, |
| "loss": 0.4422, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.0218057307622994, |
| "grad_norm": 0.3739277339941646, |
| "learning_rate": 8.336757554321832e-06, |
| "loss": 0.4424, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.0272121102901424, |
| "grad_norm": 0.3968787668713752, |
| "learning_rate": 8.313257350461774e-06, |
| "loss": 0.4376, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.0326184898179853, |
| "grad_norm": 0.3451897271410753, |
| "learning_rate": 8.289625925303877e-06, |
| "loss": 0.4425, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.038024869345828, |
| "grad_norm": 0.40010047495902706, |
| "learning_rate": 8.265864214768883e-06, |
| "loss": 0.4503, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.043431248873671, |
| "grad_norm": 0.3736188460908676, |
| "learning_rate": 8.241973159937482e-06, |
| "loss": 0.4406, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.0488376284015137, |
| "grad_norm": 0.3394542766186862, |
| "learning_rate": 8.217953707013025e-06, |
| "loss": 0.4393, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.0542440079293567, |
| "grad_norm": 0.35077872709329283, |
| "learning_rate": 8.193806807284064e-06, |
| "loss": 0.4383, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.0596503874571994, |
| "grad_norm": 0.3441941331677373, |
| "learning_rate": 8.169533417086673e-06, |
| "loss": 0.4286, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.0650567669850424, |
| "grad_norm": 0.34884852607611294, |
| "learning_rate": 8.145134497766566e-06, |
| "loss": 0.4467, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.070463146512885, |
| "grad_norm": 0.40097746242132437, |
| "learning_rate": 8.120611015641036e-06, |
| "loss": 0.4363, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.075869526040728, |
| "grad_norm": 0.33184835023647064, |
| "learning_rate": 8.095963941960667e-06, |
| "loss": 0.437, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.081275905568571, |
| "grad_norm": 0.394546885758411, |
| "learning_rate": 8.071194252870887e-06, |
| "loss": 0.432, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.0866822850964137, |
| "grad_norm": 0.472784994513626, |
| "learning_rate": 8.046302929373286e-06, |
| "loss": 0.4367, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.0920886646242567, |
| "grad_norm": 0.3602670786653786, |
| "learning_rate": 8.021290957286787e-06, |
| "loss": 0.4352, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.0974950441520994, |
| "grad_norm": 0.3963387130392289, |
| "learning_rate": 7.996159327208581e-06, |
| "loss": 0.4434, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.1029014236799424, |
| "grad_norm": 0.37403782295160953, |
| "learning_rate": 7.97090903447491e-06, |
| "loss": 0.4326, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.108307803207785, |
| "grad_norm": 0.37350913921356577, |
| "learning_rate": 7.945541079121642e-06, |
| "loss": 0.4485, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.113714182735628, |
| "grad_norm": 0.3661212920976343, |
| "learning_rate": 7.920056465844658e-06, |
| "loss": 0.4328, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.119120562263471, |
| "grad_norm": 0.3507951321263283, |
| "learning_rate": 7.894456203960075e-06, |
| "loss": 0.4339, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.1245269417913137, |
| "grad_norm": 0.31935101139873434, |
| "learning_rate": 7.868741307364255e-06, |
| "loss": 0.4307, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.1299333213191567, |
| "grad_norm": 0.3240469373544592, |
| "learning_rate": 7.842912794493667e-06, |
| "loss": 0.4357, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.1353397008469994, |
| "grad_norm": 0.4024576218630106, |
| "learning_rate": 7.81697168828454e-06, |
| "loss": 0.4429, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.1407460803748424, |
| "grad_norm": 0.4057186928939639, |
| "learning_rate": 7.790919016132351e-06, |
| "loss": 0.4435, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.146152459902685, |
| "grad_norm": 0.4339123108369387, |
| "learning_rate": 7.764755809851141e-06, |
| "loss": 0.4375, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.151558839430528, |
| "grad_norm": 0.3423301493159426, |
| "learning_rate": 7.738483105632644e-06, |
| "loss": 0.4408, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.1569652189583708, |
| "grad_norm": 0.3049599421413694, |
| "learning_rate": 7.712101944005256e-06, |
| "loss": 0.442, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.1623715984862137, |
| "grad_norm": 0.3235699906736669, |
| "learning_rate": 7.685613369792815e-06, |
| "loss": 0.4389, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.1677779780140565, |
| "grad_norm": 0.38824198475727123, |
| "learning_rate": 7.65901843207323e-06, |
| "loss": 0.4372, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.1731843575418994, |
| "grad_norm": 0.3485465278129701, |
| "learning_rate": 7.63231818413692e-06, |
| "loss": 0.4313, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.1785907370697424, |
| "grad_norm": 0.3607061695090595, |
| "learning_rate": 7.605513683445118e-06, |
| "loss": 0.433, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.183997116597585, |
| "grad_norm": 0.35864049794241826, |
| "learning_rate": 7.578605991587974e-06, |
| "loss": 0.43, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.189403496125428, |
| "grad_norm": 0.3622129404816991, |
| "learning_rate": 7.5515961742425146e-06, |
| "loss": 0.4357, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.1948098756532708, |
| "grad_norm": 0.37719764002603634, |
| "learning_rate": 7.524485301130443e-06, |
| "loss": 0.4363, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.2002162551811137, |
| "grad_norm": 0.32038054153975193, |
| "learning_rate": 7.497274445975762e-06, |
| "loss": 0.4283, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.2056226347089565, |
| "grad_norm": 0.3897896894072551, |
| "learning_rate": 7.469964686462261e-06, |
| "loss": 0.4416, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.2110290142367994, |
| "grad_norm": 0.32144151391797593, |
| "learning_rate": 7.4425571041908254e-06, |
| "loss": 0.4388, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.2164353937646424, |
| "grad_norm": 0.3553047783046372, |
| "learning_rate": 7.415052784636603e-06, |
| "loss": 0.4401, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.2218417732924851, |
| "grad_norm": 0.31787401750902194, |
| "learning_rate": 7.387452817106017e-06, |
| "loss": 0.4313, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.227248152820328, |
| "grad_norm": 0.3736244875654426, |
| "learning_rate": 7.359758294693618e-06, |
| "loss": 0.4392, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.2326545323481708, |
| "grad_norm": 0.34863542131710556, |
| "learning_rate": 7.331970314238799e-06, |
| "loss": 0.4405, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.2380609118760137, |
| "grad_norm": 0.414690288534652, |
| "learning_rate": 7.304089976282348e-06, |
| "loss": 0.4401, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.2434672914038565, |
| "grad_norm": 0.356866165228421, |
| "learning_rate": 7.276118385022865e-06, |
| "loss": 0.4241, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.2488736709316994, |
| "grad_norm": 0.33264484884680307, |
| "learning_rate": 7.248056648273034e-06, |
| "loss": 0.4425, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.2542800504595424, |
| "grad_norm": 0.4175310788334551, |
| "learning_rate": 7.2199058774157375e-06, |
| "loss": 0.4276, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.2596864299873851, |
| "grad_norm": 0.38229588901030637, |
| "learning_rate": 7.1916671873600515e-06, |
| "loss": 0.4312, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.2650928095152278, |
| "grad_norm": 0.338696312422094, |
| "learning_rate": 7.163341696497084e-06, |
| "loss": 0.4405, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.2704991890430708, |
| "grad_norm": 0.32136223620818055, |
| "learning_rate": 7.134930526655679e-06, |
| "loss": 0.4347, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.2759055685709138, |
| "grad_norm": 0.3590441906111087, |
| "learning_rate": 7.106434803057998e-06, |
| "loss": 0.4392, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.2813119480987565, |
| "grad_norm": 0.3822900334441054, |
| "learning_rate": 7.077855654274939e-06, |
| "loss": 0.4329, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.2867183276265994, |
| "grad_norm": 0.4150924729603716, |
| "learning_rate": 7.04919421218145e-06, |
| "loss": 0.4344, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.2921247071544422, |
| "grad_norm": 0.31977805162237566, |
| "learning_rate": 7.020451611911703e-06, |
| "loss": 0.4274, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.2975310866822851, |
| "grad_norm": 0.4042413750463481, |
| "learning_rate": 6.9916289918141265e-06, |
| "loss": 0.4383, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.3029374662101278, |
| "grad_norm": 0.32750161889881924, |
| "learning_rate": 6.962727493406335e-06, |
| "loss": 0.4363, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.3083438457379708, |
| "grad_norm": 0.34681784503652924, |
| "learning_rate": 6.9337482613299065e-06, |
| "loss": 0.4251, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.3137502252658138, |
| "grad_norm": 0.31392667825247955, |
| "learning_rate": 6.904692443305059e-06, |
| "loss": 0.439, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.3191566047936565, |
| "grad_norm": 0.3080535811767778, |
| "learning_rate": 6.87556119008519e-06, |
| "loss": 0.4268, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.3245629843214994, |
| "grad_norm": 0.37030845399385603, |
| "learning_rate": 6.8463556554113005e-06, |
| "loss": 0.4353, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.3299693638493422, |
| "grad_norm": 0.3473034342384458, |
| "learning_rate": 6.8170769959663045e-06, |
| "loss": 0.4292, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.3353757433771851, |
| "grad_norm": 0.322256198293079, |
| "learning_rate": 6.787726371329214e-06, |
| "loss": 0.4402, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.3407821229050279, |
| "grad_norm": 0.3907219151376363, |
| "learning_rate": 6.7583049439292205e-06, |
| "loss": 0.4369, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.3461885024328708, |
| "grad_norm": 0.34928113227903806, |
| "learning_rate": 6.728813878999652e-06, |
| "loss": 0.4377, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.3515948819607138, |
| "grad_norm": 0.35544626757027864, |
| "learning_rate": 6.699254344531821e-06, |
| "loss": 0.4309, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3570012614885565, |
| "grad_norm": 0.366218747083373, |
| "learning_rate": 6.669627511228778e-06, |
| "loss": 0.434, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.3624076410163992, |
| "grad_norm": 0.3580871935273299, |
| "learning_rate": 6.6399345524589366e-06, |
| "loss": 0.4401, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.3678140205442422, |
| "grad_norm": 0.29886314913995143, |
| "learning_rate": 6.610176644209602e-06, |
| "loss": 0.4266, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.3732204000720851, |
| "grad_norm": 0.3571328312104908, |
| "learning_rate": 6.580354965040396e-06, |
| "loss": 0.4393, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.3786267795999279, |
| "grad_norm": 0.3568154757493318, |
| "learning_rate": 6.550470696036591e-06, |
| "loss": 0.4276, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.3840331591277708, |
| "grad_norm": 0.3020834353942124, |
| "learning_rate": 6.520525020762318e-06, |
| "loss": 0.4374, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.3894395386556138, |
| "grad_norm": 0.4345861239807074, |
| "learning_rate": 6.490519125213701e-06, |
| "loss": 0.44, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.3948459181834565, |
| "grad_norm": 0.4164116140474957, |
| "learning_rate": 6.460454197771881e-06, |
| "loss": 0.4347, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.4002522977112992, |
| "grad_norm": 0.3698597319632245, |
| "learning_rate": 6.430331429155956e-06, |
| "loss": 0.4398, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.4056586772391422, |
| "grad_norm": 0.3557941383592286, |
| "learning_rate": 6.400152012375818e-06, |
| "loss": 0.4361, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.4110650567669851, |
| "grad_norm": 0.3703620913980966, |
| "learning_rate": 6.3699171426849036e-06, |
| "loss": 0.433, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.4164714362948279, |
| "grad_norm": 0.312372238883981, |
| "learning_rate": 6.339628017532858e-06, |
| "loss": 0.4305, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.4218778158226708, |
| "grad_norm": 0.32819677760603516, |
| "learning_rate": 6.309285836518113e-06, |
| "loss": 0.4289, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.4272841953505135, |
| "grad_norm": 0.34835896987461035, |
| "learning_rate": 6.2788918013403695e-06, |
| "loss": 0.4312, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.4326905748783565, |
| "grad_norm": 0.34043287674955064, |
| "learning_rate": 6.248447115753009e-06, |
| "loss": 0.4327, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.4380969544061992, |
| "grad_norm": 0.32777806734674225, |
| "learning_rate": 6.21795298551542e-06, |
| "loss": 0.4206, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.4435033339340422, |
| "grad_norm": 0.2839690869238431, |
| "learning_rate": 6.187410618345241e-06, |
| "loss": 0.4337, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.4489097134618851, |
| "grad_norm": 0.2845491198333412, |
| "learning_rate": 6.156821223870533e-06, |
| "loss": 0.428, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.4543160929897279, |
| "grad_norm": 0.3381278947086419, |
| "learning_rate": 6.126186013581868e-06, |
| "loss": 0.4442, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.4597224725175708, |
| "grad_norm": 0.2678673584947001, |
| "learning_rate": 6.095506200784349e-06, |
| "loss": 0.4313, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.4651288520454135, |
| "grad_norm": 0.32064492812884415, |
| "learning_rate": 6.06478300054956e-06, |
| "loss": 0.4443, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.4705352315732565, |
| "grad_norm": 0.33114310721210843, |
| "learning_rate": 6.034017629667439e-06, |
| "loss": 0.4321, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.4759416111010992, |
| "grad_norm": 0.3407274170049336, |
| "learning_rate": 6.003211306598089e-06, |
| "loss": 0.4302, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.4813479906289422, |
| "grad_norm": 0.3655959799961016, |
| "learning_rate": 5.972365251423521e-06, |
| "loss": 0.4331, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.4867543701567851, |
| "grad_norm": 0.3707027911602118, |
| "learning_rate": 5.941480685799338e-06, |
| "loss": 0.433, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.4921607496846279, |
| "grad_norm": 0.30224309374010494, |
| "learning_rate": 5.910558832906341e-06, |
| "loss": 0.4378, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.4975671292124706, |
| "grad_norm": 0.3421553953269554, |
| "learning_rate": 5.879600917402089e-06, |
| "loss": 0.4322, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.5029735087403135, |
| "grad_norm": 0.33381909956811917, |
| "learning_rate": 5.848608165372403e-06, |
| "loss": 0.425, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.5083798882681565, |
| "grad_norm": 0.3189833875248174, |
| "learning_rate": 5.8175818042828e-06, |
| "loss": 0.4357, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.5137862677959992, |
| "grad_norm": 0.36173513055424256, |
| "learning_rate": 5.78652306292988e-06, |
| "loss": 0.4395, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.5191926473238422, |
| "grad_norm": 0.3265416603091211, |
| "learning_rate": 5.75543317139266e-06, |
| "loss": 0.4426, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.5245990268516851, |
| "grad_norm": 0.33495795652653004, |
| "learning_rate": 5.724313360983859e-06, |
| "loss": 0.4335, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.5300054063795279, |
| "grad_norm": 0.35637908471545576, |
| "learning_rate": 5.693164864201134e-06, |
| "loss": 0.4343, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.5354117859073706, |
| "grad_norm": 0.3422755476029069, |
| "learning_rate": 5.661988914678257e-06, |
| "loss": 0.4201, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.5408181654352135, |
| "grad_norm": 0.29401423880776295, |
| "learning_rate": 5.630786747136269e-06, |
| "loss": 0.4263, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.5462245449630565, |
| "grad_norm": 0.35559246067713574, |
| "learning_rate": 5.599559597334568e-06, |
| "loss": 0.4327, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.5516309244908992, |
| "grad_norm": 0.3234026109207772, |
| "learning_rate": 5.56830870202198e-06, |
| "loss": 0.4284, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.557037304018742, |
| "grad_norm": 0.3041181368480941, |
| "learning_rate": 5.537035298887764e-06, |
| "loss": 0.4291, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.562443683546585, |
| "grad_norm": 0.4152034967270183, |
| "learning_rate": 5.505740626512601e-06, |
| "loss": 0.4333, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.5678500630744279, |
| "grad_norm": 0.32189843480023705, |
| "learning_rate": 5.474425924319538e-06, |
| "loss": 0.4313, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.5732564426022706, |
| "grad_norm": 0.3400408960358337, |
| "learning_rate": 5.443092432524906e-06, |
| "loss": 0.4446, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.5786628221301136, |
| "grad_norm": 0.3253331216756115, |
| "learning_rate": 5.411741392089192e-06, |
| "loss": 0.4276, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.5840692016579565, |
| "grad_norm": 0.34364169352732366, |
| "learning_rate": 5.380374044667896e-06, |
| "loss": 0.4363, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.5894755811857992, |
| "grad_norm": 0.2993302543547276, |
| "learning_rate": 5.348991632562355e-06, |
| "loss": 0.4347, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.594881960713642, |
| "grad_norm": 0.31140003151111195, |
| "learning_rate": 5.317595398670543e-06, |
| "loss": 0.4203, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.600288340241485, |
| "grad_norm": 0.34917215566088183, |
| "learning_rate": 5.286186586437845e-06, |
| "loss": 0.4394, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.6056947197693279, |
| "grad_norm": 0.3099678473182354, |
| "learning_rate": 5.254766439807807e-06, |
| "loss": 0.4224, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.6111010992971706, |
| "grad_norm": 0.32027842285858055, |
| "learning_rate": 5.223336203172874e-06, |
| "loss": 0.4289, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.6165074788250136, |
| "grad_norm": 0.29377503624337103, |
| "learning_rate": 5.191897121325111e-06, |
| "loss": 0.43, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.6219138583528565, |
| "grad_norm": 0.3286814138894788, |
| "learning_rate": 5.16045043940689e-06, |
| "loss": 0.4344, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.6273202378806992, |
| "grad_norm": 0.35588674616258936, |
| "learning_rate": 5.128997402861584e-06, |
| "loss": 0.4306, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.632726617408542, |
| "grad_norm": 0.33501603495492577, |
| "learning_rate": 5.09753925738424e-06, |
| "loss": 0.4154, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.638132996936385, |
| "grad_norm": 0.3011476898703049, |
| "learning_rate": 5.06607724887225e-06, |
| "loss": 0.4314, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.6435393764642279, |
| "grad_norm": 0.3879201939655995, |
| "learning_rate": 5.034612623375993e-06, |
| "loss": 0.4412, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.6489457559920706, |
| "grad_norm": 0.3426764786646151, |
| "learning_rate": 5.003146627049499e-06, |
| "loss": 0.4295, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.6543521355199133, |
| "grad_norm": 0.3408786770769329, |
| "learning_rate": 4.971680506101086e-06, |
| "loss": 0.4259, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.6597585150477565, |
| "grad_norm": 0.3689333373771858, |
| "learning_rate": 4.940215506744011e-06, |
| "loss": 0.4254, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.6651648945755992, |
| "grad_norm": 0.33725311763702437, |
| "learning_rate": 4.90875287514711e-06, |
| "loss": 0.4286, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.670571274103442, |
| "grad_norm": 0.3106105413402686, |
| "learning_rate": 4.87729385738544e-06, |
| "loss": 0.426, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.675977653631285, |
| "grad_norm": 0.361491556160267, |
| "learning_rate": 4.845839699390936e-06, |
| "loss": 0.4229, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.6813840331591279, |
| "grad_norm": 0.3012437306295753, |
| "learning_rate": 4.814391646903063e-06, |
| "loss": 0.4296, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.6867904126869706, |
| "grad_norm": 0.3142934287582159, |
| "learning_rate": 4.782950945419475e-06, |
| "loss": 0.4304, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.6921967922148133, |
| "grad_norm": 0.3024864799296645, |
| "learning_rate": 4.751518840146695e-06, |
| "loss": 0.4329, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.6976031717426563, |
| "grad_norm": 0.3081924919099197, |
| "learning_rate": 4.720096575950784e-06, |
| "loss": 0.4319, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.7030095512704992, |
| "grad_norm": 0.32189094915170496, |
| "learning_rate": 4.688685397308061e-06, |
| "loss": 0.42, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.708415930798342, |
| "grad_norm": 0.33972262308693657, |
| "learning_rate": 4.657286548255789e-06, |
| "loss": 0.4369, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.713822310326185, |
| "grad_norm": 0.30741331028975344, |
| "learning_rate": 4.6259012723429285e-06, |
| "loss": 0.4274, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.7192286898540279, |
| "grad_norm": 0.28971622178653267, |
| "learning_rate": 4.594530812580876e-06, |
| "loss": 0.4216, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.7246350693818706, |
| "grad_norm": 0.2792098363578085, |
| "learning_rate": 4.563176411394229e-06, |
| "loss": 0.4238, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.7300414489097133, |
| "grad_norm": 0.29274514837335597, |
| "learning_rate": 4.531839310571595e-06, |
| "loss": 0.4291, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.7354478284375563, |
| "grad_norm": 0.32996912353874136, |
| "learning_rate": 4.5005207512163914e-06, |
| "loss": 0.4388, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.7408542079653992, |
| "grad_norm": 0.34282857698540753, |
| "learning_rate": 4.469221973697714e-06, |
| "loss": 0.4373, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.746260587493242, |
| "grad_norm": 0.3147983795136612, |
| "learning_rate": 4.43794421760119e-06, |
| "loss": 0.4291, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.751666967021085, |
| "grad_norm": 0.2953517288607898, |
| "learning_rate": 4.4066887216799055e-06, |
| "loss": 0.4219, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.7570733465489279, |
| "grad_norm": 0.30489564567587807, |
| "learning_rate": 4.375456723805321e-06, |
| "loss": 0.4308, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.7624797260767706, |
| "grad_norm": 0.30950501632812377, |
| "learning_rate": 4.344249460918271e-06, |
| "loss": 0.4213, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.7678861056046133, |
| "grad_norm": 0.30230325895579757, |
| "learning_rate": 4.313068168979957e-06, |
| "loss": 0.4364, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.7732924851324563, |
| "grad_norm": 0.30774095159515363, |
| "learning_rate": 4.281914082923002e-06, |
| "loss": 0.4165, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.7786988646602993, |
| "grad_norm": 0.3275433264912912, |
| "learning_rate": 4.250788436602548e-06, |
| "loss": 0.4269, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.784105244188142, |
| "grad_norm": 0.3270523212461865, |
| "learning_rate": 4.2196924627473715e-06, |
| "loss": 0.4304, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.7895116237159847, |
| "grad_norm": 0.28953105726529316, |
| "learning_rate": 4.188627392911091e-06, |
| "loss": 0.4281, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.7949180032438277, |
| "grad_norm": 0.34157770345495453, |
| "learning_rate": 4.157594457423357e-06, |
| "loss": 0.432, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.8003243827716706, |
| "grad_norm": 0.2952227481543905, |
| "learning_rate": 4.1265948853411506e-06, |
| "loss": 0.427, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.8057307622995133, |
| "grad_norm": 0.3058432699391948, |
| "learning_rate": 4.095629904400097e-06, |
| "loss": 0.4268, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.8111371418273563, |
| "grad_norm": 0.32888818257409286, |
| "learning_rate": 4.06470074096584e-06, |
| "loss": 0.4334, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.8165435213551993, |
| "grad_norm": 0.29929296938295863, |
| "learning_rate": 4.0338086199854765e-06, |
| "loss": 0.4248, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.821949900883042, |
| "grad_norm": 0.33418978699429813, |
| "learning_rate": 4.0029547649390346e-06, |
| "loss": 0.4307, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.8273562804108847, |
| "grad_norm": 0.2991040804166494, |
| "learning_rate": 3.97214039779103e-06, |
| "loss": 0.435, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.8327626599387277, |
| "grad_norm": 0.2829911428105187, |
| "learning_rate": 3.941366738942058e-06, |
| "loss": 0.4246, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.8381690394665706, |
| "grad_norm": 0.2990384176756561, |
| "learning_rate": 3.910635007180468e-06, |
| "loss": 0.4394, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.8435754189944134, |
| "grad_norm": 0.28487793163600966, |
| "learning_rate": 3.879946419634087e-06, |
| "loss": 0.4268, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.8489817985222563, |
| "grad_norm": 0.30066911074015307, |
| "learning_rate": 3.8493021917220225e-06, |
| "loss": 0.4289, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.8543881780500993, |
| "grad_norm": 0.3145700146426358, |
| "learning_rate": 3.818703537106522e-06, |
| "loss": 0.427, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.859794557577942, |
| "grad_norm": 0.3121437364875441, |
| "learning_rate": 3.7881516676449014e-06, |
| "loss": 0.4334, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.8652009371057847, |
| "grad_norm": 0.2914138429548545, |
| "learning_rate": 3.7576477933415612e-06, |
| "loss": 0.4358, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.8706073166336277, |
| "grad_norm": 0.3263366427961882, |
| "learning_rate": 3.7271931223000507e-06, |
| "loss": 0.4294, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.8760136961614706, |
| "grad_norm": 0.3181986581808925, |
| "learning_rate": 3.6967888606752345e-06, |
| "loss": 0.433, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.8814200756893134, |
| "grad_norm": 0.31837041508546626, |
| "learning_rate": 3.6664362126255087e-06, |
| "loss": 0.4283, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.886826455217156, |
| "grad_norm": 0.2876960972161682, |
| "learning_rate": 3.636136380265124e-06, |
| "loss": 0.4189, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.8922328347449993, |
| "grad_norm": 0.30867320900321366, |
| "learning_rate": 3.6058905636165674e-06, |
| "loss": 0.4309, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.897639214272842, |
| "grad_norm": 0.29104980848951667, |
| "learning_rate": 3.575699960563038e-06, |
| "loss": 0.4184, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.9030455938006847, |
| "grad_norm": 0.2859389528274554, |
| "learning_rate": 3.5455657668010057e-06, |
| "loss": 0.4253, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.9084519733285277, |
| "grad_norm": 0.30910611127718657, |
| "learning_rate": 3.5154891757928523e-06, |
| "loss": 0.4257, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.9138583528563706, |
| "grad_norm": 0.31381289055858025, |
| "learning_rate": 3.4854713787196105e-06, |
| "loss": 0.4324, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.9192647323842134, |
| "grad_norm": 0.33654431291917486, |
| "learning_rate": 3.4555135644337803e-06, |
| "loss": 0.4262, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.924671111912056, |
| "grad_norm": 0.30712399081960845, |
| "learning_rate": 3.42561691941225e-06, |
| "loss": 0.4344, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.930077491439899, |
| "grad_norm": 0.2989668977037765, |
| "learning_rate": 3.3957826277093074e-06, |
| "loss": 0.4278, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.935483870967742, |
| "grad_norm": 0.3259516671848096, |
| "learning_rate": 3.3660118709097347e-06, |
| "loss": 0.4242, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.9408902504955847, |
| "grad_norm": 0.29719187591192203, |
| "learning_rate": 3.336305828082024e-06, |
| "loss": 0.4319, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.9462966300234277, |
| "grad_norm": 0.3250815058947025, |
| "learning_rate": 3.306665675731674e-06, |
| "loss": 0.4324, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.9517030095512706, |
| "grad_norm": 0.3196705993035981, |
| "learning_rate": 3.277092587754598e-06, |
| "loss": 0.4283, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.9571093890791134, |
| "grad_norm": 0.2836241969868925, |
| "learning_rate": 3.247587735390628e-06, |
| "loss": 0.4285, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.962515768606956, |
| "grad_norm": 0.2963451307813687, |
| "learning_rate": 3.218152287177133e-06, |
| "loss": 0.4233, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.967922148134799, |
| "grad_norm": 0.32162438964611967, |
| "learning_rate": 3.1887874089027304e-06, |
| "loss": 0.4275, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.973328527662642, |
| "grad_norm": 0.2858747270839711, |
| "learning_rate": 3.159494263561126e-06, |
| "loss": 0.429, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.9787349071904847, |
| "grad_norm": 0.294205581889964, |
| "learning_rate": 3.130274011305047e-06, |
| "loss": 0.4261, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.9841412867183277, |
| "grad_norm": 0.3271655262933234, |
| "learning_rate": 3.1011278094002928e-06, |
| "loss": 0.4352, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.9895476662461706, |
| "grad_norm": 0.3151321646815863, |
| "learning_rate": 3.0720568121799105e-06, |
| "loss": 0.4302, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.9949540457740134, |
| "grad_norm": 0.3069606817223593, |
| "learning_rate": 3.043062170998464e-06, |
| "loss": 0.4274, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.000360425301856, |
| "grad_norm": 0.3418886732932903, |
| "learning_rate": 3.0141450341864486e-06, |
| "loss": 0.4368, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.005766804829699, |
| "grad_norm": 0.28231273100784204, |
| "learning_rate": 2.9853065470048016e-06, |
| "loss": 0.4084, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.011173184357542, |
| "grad_norm": 0.27285411121752895, |
| "learning_rate": 2.956547851599548e-06, |
| "loss": 0.3899, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.0165795638853847, |
| "grad_norm": 0.31740692003997667, |
| "learning_rate": 2.9278700869565713e-06, |
| "loss": 0.406, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.0219859434132275, |
| "grad_norm": 0.32723222207620034, |
| "learning_rate": 2.8992743888564886e-06, |
| "loss": 0.4107, |
| "step": 3740 |
| }, |
| { |
| "epoch": 2.0273923229410706, |
| "grad_norm": 0.3293876655149398, |
| "learning_rate": 2.8707618898296864e-06, |
| "loss": 0.4052, |
| "step": 3750 |
| }, |
| { |
| "epoch": 2.0327987024689134, |
| "grad_norm": 0.26473497263074053, |
| "learning_rate": 2.8423337191114495e-06, |
| "loss": 0.402, |
| "step": 3760 |
| }, |
| { |
| "epoch": 2.038205081996756, |
| "grad_norm": 0.31910999655360905, |
| "learning_rate": 2.8139910025972622e-06, |
| "loss": 0.4134, |
| "step": 3770 |
| }, |
| { |
| "epoch": 2.043611461524599, |
| "grad_norm": 0.29154253424627524, |
| "learning_rate": 2.785734862798184e-06, |
| "loss": 0.4086, |
| "step": 3780 |
| }, |
| { |
| "epoch": 2.049017841052442, |
| "grad_norm": 0.2910125618297838, |
| "learning_rate": 2.7575664187964236e-06, |
| "loss": 0.4007, |
| "step": 3790 |
| }, |
| { |
| "epoch": 2.0544242205802847, |
| "grad_norm": 0.28793585101610353, |
| "learning_rate": 2.7294867862009937e-06, |
| "loss": 0.4053, |
| "step": 3800 |
| }, |
| { |
| "epoch": 2.0598306001081275, |
| "grad_norm": 0.2731032601573403, |
| "learning_rate": 2.7014970771035474e-06, |
| "loss": 0.4138, |
| "step": 3810 |
| }, |
| { |
| "epoch": 2.0652369796359706, |
| "grad_norm": 0.29876809472359783, |
| "learning_rate": 2.6735984000343216e-06, |
| "loss": 0.4156, |
| "step": 3820 |
| }, |
| { |
| "epoch": 2.0706433591638134, |
| "grad_norm": 0.3100743441240049, |
| "learning_rate": 2.645791859918234e-06, |
| "loss": 0.4089, |
| "step": 3830 |
| }, |
| { |
| "epoch": 2.076049738691656, |
| "grad_norm": 0.34676569440909566, |
| "learning_rate": 2.6180785580311284e-06, |
| "loss": 0.3998, |
| "step": 3840 |
| }, |
| { |
| "epoch": 2.081456118219499, |
| "grad_norm": 0.28331404223893575, |
| "learning_rate": 2.5904595919561563e-06, |
| "loss": 0.3935, |
| "step": 3850 |
| }, |
| { |
| "epoch": 2.086862497747342, |
| "grad_norm": 0.2892120423588288, |
| "learning_rate": 2.562936055540307e-06, |
| "loss": 0.411, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.0922688772751847, |
| "grad_norm": 0.29210558202813347, |
| "learning_rate": 2.5355090388510806e-06, |
| "loss": 0.4108, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.0976752568030275, |
| "grad_norm": 0.29027866503096267, |
| "learning_rate": 2.508179628133326e-06, |
| "loss": 0.4016, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.1030816363308706, |
| "grad_norm": 0.2876065349136538, |
| "learning_rate": 2.4809489057662168e-06, |
| "loss": 0.4101, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.1084880158587134, |
| "grad_norm": 0.3135899601532618, |
| "learning_rate": 2.4538179502203753e-06, |
| "loss": 0.4001, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.113894395386556, |
| "grad_norm": 0.30848425065584256, |
| "learning_rate": 2.4267878360151747e-06, |
| "loss": 0.3997, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.119300774914399, |
| "grad_norm": 0.2923032276510183, |
| "learning_rate": 2.399859633676165e-06, |
| "loss": 0.4049, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.124707154442242, |
| "grad_norm": 0.29055776768248115, |
| "learning_rate": 2.3730344096926974e-06, |
| "loss": 0.3981, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.1301135339700847, |
| "grad_norm": 0.3161385412337821, |
| "learning_rate": 2.3463132264756617e-06, |
| "loss": 0.4075, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.1355199134979275, |
| "grad_norm": 0.2828900068372096, |
| "learning_rate": 2.319697142315428e-06, |
| "loss": 0.3906, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.14092629302577, |
| "grad_norm": 0.26292390614915356, |
| "learning_rate": 2.293187211339926e-06, |
| "loss": 0.3991, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.1463326725536134, |
| "grad_norm": 0.2987394527032652, |
| "learning_rate": 2.2667844834728923e-06, |
| "loss": 0.3999, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.151739052081456, |
| "grad_norm": 0.27915670540136367, |
| "learning_rate": 2.2404900043922996e-06, |
| "loss": 0.3995, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.157145431609299, |
| "grad_norm": 0.2818164391888048, |
| "learning_rate": 2.2143048154889272e-06, |
| "loss": 0.4015, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.162551811137142, |
| "grad_norm": 0.26044900685376793, |
| "learning_rate": 2.1882299538251352e-06, |
| "loss": 0.4003, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.1679581906649847, |
| "grad_norm": 0.27297932069072756, |
| "learning_rate": 2.162266452093774e-06, |
| "loss": 0.4149, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.1733645701928275, |
| "grad_norm": 0.2978434115081757, |
| "learning_rate": 2.1364153385773007e-06, |
| "loss": 0.4018, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.17877094972067, |
| "grad_norm": 0.31586609932366294, |
| "learning_rate": 2.110677637107036e-06, |
| "loss": 0.4053, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.1841773292485134, |
| "grad_norm": 0.29030802044428805, |
| "learning_rate": 2.0850543670226318e-06, |
| "loss": 0.4065, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.189583708776356, |
| "grad_norm": 0.3365802334808058, |
| "learning_rate": 2.059546543131696e-06, |
| "loss": 0.405, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.194990088304199, |
| "grad_norm": 0.2995355365322975, |
| "learning_rate": 2.034155175669592e-06, |
| "loss": 0.4044, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.200396467832042, |
| "grad_norm": 0.2868235821916637, |
| "learning_rate": 2.0088812702594424e-06, |
| "loss": 0.4023, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.2058028473598847, |
| "grad_norm": 0.29532698621262965, |
| "learning_rate": 1.9837258278722855e-06, |
| "loss": 0.413, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.2112092268877275, |
| "grad_norm": 0.282345122194298, |
| "learning_rate": 1.9586898447874543e-06, |
| "loss": 0.4033, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.21661560641557, |
| "grad_norm": 0.28744059302390934, |
| "learning_rate": 1.933774312553092e-06, |
| "loss": 0.4002, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.2220219859434134, |
| "grad_norm": 0.29637974416632634, |
| "learning_rate": 1.9089802179469036e-06, |
| "loss": 0.397, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.227428365471256, |
| "grad_norm": 0.29136812414474506, |
| "learning_rate": 1.884308542937065e-06, |
| "loss": 0.4198, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.232834744999099, |
| "grad_norm": 0.28845833396948634, |
| "learning_rate": 1.8597602646433294e-06, |
| "loss": 0.4012, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.238241124526942, |
| "grad_norm": 0.31515767696033387, |
| "learning_rate": 1.8353363552983382e-06, |
| "loss": 0.4084, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.2436475040547847, |
| "grad_norm": 0.2852056906534805, |
| "learning_rate": 1.8110377822091057e-06, |
| "loss": 0.4129, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.2490538835826275, |
| "grad_norm": 0.2961534698999477, |
| "learning_rate": 1.7868655077187175e-06, |
| "loss": 0.404, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.25446026311047, |
| "grad_norm": 0.3026130823215708, |
| "learning_rate": 1.76282048916821e-06, |
| "loss": 0.4105, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.2598666426383134, |
| "grad_norm": 0.295103201693147, |
| "learning_rate": 1.7389036788586627e-06, |
| "loss": 0.4057, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.265273022166156, |
| "grad_norm": 0.26979492433946, |
| "learning_rate": 1.7151160240134702e-06, |
| "loss": 0.4027, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.270679401693999, |
| "grad_norm": 0.3069718829915049, |
| "learning_rate": 1.6914584667408408e-06, |
| "loss": 0.407, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.276085781221842, |
| "grad_norm": 0.2582555297518662, |
| "learning_rate": 1.6679319439964797e-06, |
| "loss": 0.3943, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.2814921607496847, |
| "grad_norm": 0.30300112933414725, |
| "learning_rate": 1.6445373875464738e-06, |
| "loss": 0.4073, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.2868985402775275, |
| "grad_norm": 0.27640155584834986, |
| "learning_rate": 1.6212757239304e-06, |
| "loss": 0.4074, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.29230491980537, |
| "grad_norm": 0.288482277273483, |
| "learning_rate": 1.5981478744246242e-06, |
| "loss": 0.3961, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.297711299333213, |
| "grad_norm": 0.2968944260811366, |
| "learning_rate": 1.575154755005816e-06, |
| "loss": 0.403, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.303117678861056, |
| "grad_norm": 0.29278471655933946, |
| "learning_rate": 1.5522972763146653e-06, |
| "loss": 0.4019, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.308524058388899, |
| "grad_norm": 0.2729883421366084, |
| "learning_rate": 1.5295763436198274e-06, |
| "loss": 0.4148, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.3139304379167416, |
| "grad_norm": 0.30284845140590294, |
| "learning_rate": 1.5069928567820635e-06, |
| "loss": 0.4016, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.3193368174445848, |
| "grad_norm": 0.3044664985270554, |
| "learning_rate": 1.4845477102185974e-06, |
| "loss": 0.4092, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.3247431969724275, |
| "grad_norm": 0.30467048506977945, |
| "learning_rate": 1.4622417928677034e-06, |
| "loss": 0.3997, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.33014957650027, |
| "grad_norm": 0.25546815283849933, |
| "learning_rate": 1.4400759881534886e-06, |
| "loss": 0.3988, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.335555956028113, |
| "grad_norm": 0.2852027186621198, |
| "learning_rate": 1.418051173950914e-06, |
| "loss": 0.4124, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.340962335555956, |
| "grad_norm": 0.28906302811953016, |
| "learning_rate": 1.3961682225510203e-06, |
| "loss": 0.3993, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.346368715083799, |
| "grad_norm": 0.27197836639387235, |
| "learning_rate": 1.3744280006263839e-06, |
| "loss": 0.408, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.3517750946116416, |
| "grad_norm": 0.2668399923208869, |
| "learning_rate": 1.3528313691967926e-06, |
| "loss": 0.4134, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.3571814741394848, |
| "grad_norm": 0.2872848077693314, |
| "learning_rate": 1.3313791835951396e-06, |
| "loss": 0.4045, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.3625878536673275, |
| "grad_norm": 0.29802601615160446, |
| "learning_rate": 1.310072293433558e-06, |
| "loss": 0.4014, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.36799423319517, |
| "grad_norm": 0.25723071187565805, |
| "learning_rate": 1.2889115425697612e-06, |
| "loss": 0.399, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.373400612723013, |
| "grad_norm": 0.2842104581531295, |
| "learning_rate": 1.2678977690736311e-06, |
| "loss": 0.4015, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.378806992250856, |
| "grad_norm": 0.2813179130833351, |
| "learning_rate": 1.2470318051940205e-06, |
| "loss": 0.4026, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.384213371778699, |
| "grad_norm": 0.27762098429764004, |
| "learning_rate": 1.2263144773257967e-06, |
| "loss": 0.4068, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.3896197513065416, |
| "grad_norm": 0.27848678899943174, |
| "learning_rate": 1.2057466059771035e-06, |
| "loss": 0.4006, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.3950261308343848, |
| "grad_norm": 0.27875535013460345, |
| "learning_rate": 1.1853290057368754e-06, |
| "loss": 0.4088, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.4004325103622275, |
| "grad_norm": 0.2662344684523685, |
| "learning_rate": 1.165062485242574e-06, |
| "loss": 0.4019, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.40583888989007, |
| "grad_norm": 0.3005215328293971, |
| "learning_rate": 1.1449478471481512e-06, |
| "loss": 0.411, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.411245269417913, |
| "grad_norm": 0.2712567161403629, |
| "learning_rate": 1.1249858880922771e-06, |
| "loss": 0.4059, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.416651648945756, |
| "grad_norm": 0.26211955276644977, |
| "learning_rate": 1.1051773986667735e-06, |
| "loss": 0.4051, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.422058028473599, |
| "grad_norm": 0.26165210615685336, |
| "learning_rate": 1.0855231633853137e-06, |
| "loss": 0.4068, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.4274644080014416, |
| "grad_norm": 0.2765363606523804, |
| "learning_rate": 1.0660239606523466e-06, |
| "loss": 0.4128, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.4328707875292848, |
| "grad_norm": 0.2770223660740028, |
| "learning_rate": 1.0466805627322685e-06, |
| "loss": 0.4055, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.4382771670571275, |
| "grad_norm": 0.266013699998984, |
| "learning_rate": 1.0274937357188414e-06, |
| "loss": 0.4049, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.4436835465849702, |
| "grad_norm": 0.25683355130670393, |
| "learning_rate": 1.0084642395048428e-06, |
| "loss": 0.4078, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.449089926112813, |
| "grad_norm": 0.2811697424270643, |
| "learning_rate": 9.895928277519822e-07, |
| "loss": 0.4092, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.454496305640656, |
| "grad_norm": 0.2836256278223854, |
| "learning_rate": 9.708802478610413e-07, |
| "loss": 0.4059, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.459902685168499, |
| "grad_norm": 0.2771952071252828, |
| "learning_rate": 9.523272409422829e-07, |
| "loss": 0.4112, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.4653090646963416, |
| "grad_norm": 0.2965292468618203, |
| "learning_rate": 9.339345417860918e-07, |
| "loss": 0.4028, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.4707154442241848, |
| "grad_norm": 0.307263683184186, |
| "learning_rate": 9.157028788338795e-07, |
| "loss": 0.4029, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.4761218237520275, |
| "grad_norm": 0.2922545833760392, |
| "learning_rate": 8.976329741492262e-07, |
| "loss": 0.3939, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.4815282032798702, |
| "grad_norm": 0.29211120065069335, |
| "learning_rate": 8.797255433892926e-07, |
| "loss": 0.4086, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.486934582807713, |
| "grad_norm": 0.28634400793358533, |
| "learning_rate": 8.619812957764729e-07, |
| "loss": 0.4059, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.492340962335556, |
| "grad_norm": 0.2646272575948771, |
| "learning_rate": 8.444009340703008e-07, |
| "loss": 0.398, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.497747341863399, |
| "grad_norm": 0.29066647888917396, |
| "learning_rate": 8.269851545396279e-07, |
| "loss": 0.4025, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.5031537213912416, |
| "grad_norm": 0.28424280479329644, |
| "learning_rate": 8.097346469350348e-07, |
| "loss": 0.4013, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.5085601009190848, |
| "grad_norm": 0.2896529003620974, |
| "learning_rate": 7.926500944615267e-07, |
| "loss": 0.4108, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.5139664804469275, |
| "grad_norm": 0.27346406286896946, |
| "learning_rate": 7.757321737514645e-07, |
| "loss": 0.3941, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.5193728599747702, |
| "grad_norm": 0.26882609264045565, |
| "learning_rate": 7.589815548377738e-07, |
| "loss": 0.4035, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.524779239502613, |
| "grad_norm": 0.27733293233890505, |
| "learning_rate": 7.423989011274052e-07, |
| "loss": 0.4085, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.5301856190304557, |
| "grad_norm": 0.25627085107348396, |
| "learning_rate": 7.259848693750582e-07, |
| "loss": 0.4017, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.535591998558299, |
| "grad_norm": 0.2691243234604463, |
| "learning_rate": 7.097401096571765e-07, |
| "loss": 0.3996, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.5409983780861416, |
| "grad_norm": 0.2764529789534093, |
| "learning_rate": 6.936652653461939e-07, |
| "loss": 0.4145, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.5464047576139848, |
| "grad_norm": 0.2902741811813119, |
| "learning_rate": 6.777609730850615e-07, |
| "loss": 0.4007, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.5518111371418275, |
| "grad_norm": 0.265969991168333, |
| "learning_rate": 6.620278627620286e-07, |
| "loss": 0.402, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.5572175166696702, |
| "grad_norm": 0.259196836837019, |
| "learning_rate": 6.464665574856977e-07, |
| "loss": 0.4124, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.562623896197513, |
| "grad_norm": 0.2829926842253021, |
| "learning_rate": 6.310776735603452e-07, |
| "loss": 0.3989, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.5680302757253557, |
| "grad_norm": 0.2694529736291035, |
| "learning_rate": 6.158618204615119e-07, |
| "loss": 0.4032, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.573436655253199, |
| "grad_norm": 0.2630102431201598, |
| "learning_rate": 6.008196008118705e-07, |
| "loss": 0.407, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.5788430347810416, |
| "grad_norm": 0.27146999027694685, |
| "learning_rate": 5.859516103573492e-07, |
| "loss": 0.3982, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.5842494143088843, |
| "grad_norm": 0.28346284777141134, |
| "learning_rate": 5.712584379435482e-07, |
| "loss": 0.3984, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.5896557938367275, |
| "grad_norm": 0.28197172604169823, |
| "learning_rate": 5.567406654924074e-07, |
| "loss": 0.3988, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.5950621733645702, |
| "grad_norm": 0.2717022634001503, |
| "learning_rate": 5.423988679791686e-07, |
| "loss": 0.4098, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.600468552892413, |
| "grad_norm": 0.276903744178795, |
| "learning_rate": 5.282336134095994e-07, |
| "loss": 0.4043, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.6058749324202557, |
| "grad_norm": 0.25453566586188486, |
| "learning_rate": 5.142454627974969e-07, |
| "loss": 0.3976, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.611281311948099, |
| "grad_norm": 0.2784736093310705, |
| "learning_rate": 5.00434970142471e-07, |
| "loss": 0.4062, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.6166876914759416, |
| "grad_norm": 0.24784017038474418, |
| "learning_rate": 4.868026824080008e-07, |
| "loss": 0.4061, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.6220940710037843, |
| "grad_norm": 0.2807417719405863, |
| "learning_rate": 4.7334913949977526e-07, |
| "loss": 0.4075, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.6275004505316275, |
| "grad_norm": 0.25346910500895187, |
| "learning_rate": 4.6007487424430565e-07, |
| "loss": 0.3964, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.6329068300594702, |
| "grad_norm": 0.27364761903392193, |
| "learning_rate": 4.46980412367829e-07, |
| "loss": 0.3938, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.638313209587313, |
| "grad_norm": 0.2765709048501121, |
| "learning_rate": 4.3406627247548184e-07, |
| "loss": 0.4074, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.6437195891151557, |
| "grad_norm": 0.2776500402889704, |
| "learning_rate": 4.21332966030763e-07, |
| "loss": 0.3994, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.649125968642999, |
| "grad_norm": 0.26079072827311783, |
| "learning_rate": 4.08780997335278e-07, |
| "loss": 0.4045, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.6545323481708416, |
| "grad_norm": 0.2397016051949167, |
| "learning_rate": 3.9641086350876155e-07, |
| "loss": 0.4029, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.6599387276986843, |
| "grad_norm": 0.29754617724142174, |
| "learning_rate": 3.84223054469397e-07, |
| "loss": 0.4018, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.6653451072265275, |
| "grad_norm": 0.27568276310419043, |
| "learning_rate": 3.722180529144054e-07, |
| "loss": 0.4096, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.6707514867543702, |
| "grad_norm": 0.25544292907340554, |
| "learning_rate": 3.6039633430093367e-07, |
| "loss": 0.4006, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.676157866282213, |
| "grad_norm": 0.2904302979415872, |
| "learning_rate": 3.4875836682722096e-07, |
| "loss": 0.4093, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.6815642458100557, |
| "grad_norm": 0.2796446372356396, |
| "learning_rate": 3.373046114140571e-07, |
| "loss": 0.4037, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.686970625337899, |
| "grad_norm": 0.2690617997319961, |
| "learning_rate": 3.260355216865291e-07, |
| "loss": 0.4058, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.6923770048657416, |
| "grad_norm": 0.27708751977237855, |
| "learning_rate": 3.149515439560524e-07, |
| "loss": 0.4084, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.6977833843935843, |
| "grad_norm": 0.25923770611284674, |
| "learning_rate": 3.040531172026978e-07, |
| "loss": 0.4035, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.7031897639214275, |
| "grad_norm": 0.2503752240400745, |
| "learning_rate": 2.933406730578009e-07, |
| "loss": 0.4094, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.7085961434492702, |
| "grad_norm": 0.27256002841564525, |
| "learning_rate": 2.828146357868755e-07, |
| "loss": 0.4049, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.714002522977113, |
| "grad_norm": 0.262526407381437, |
| "learning_rate": 2.7247542227280155e-07, |
| "loss": 0.399, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.7194089025049557, |
| "grad_norm": 0.26889496739047675, |
| "learning_rate": 2.6232344199932034e-07, |
| "loss": 0.3974, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.7248152820327984, |
| "grad_norm": 0.2581699169174531, |
| "learning_rate": 2.523590970348166e-07, |
| "loss": 0.4078, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.7302216615606416, |
| "grad_norm": 0.2681313769671267, |
| "learning_rate": 2.4258278201639117e-07, |
| "loss": 0.4083, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.7356280410884843, |
| "grad_norm": 0.2583458633767275, |
| "learning_rate": 2.3299488413423554e-07, |
| "loss": 0.4033, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.7410344206163275, |
| "grad_norm": 0.27176652448537475, |
| "learning_rate": 2.2359578311629272e-07, |
| "loss": 0.41, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.7464408001441702, |
| "grad_norm": 0.2651677980954859, |
| "learning_rate": 2.1438585121322465e-07, |
| "loss": 0.4048, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.751847179672013, |
| "grad_norm": 0.26468667998207535, |
| "learning_rate": 2.0536545318366018e-07, |
| "loss": 0.4089, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.7572535591998557, |
| "grad_norm": 0.2682578170402083, |
| "learning_rate": 1.9653494627975888e-07, |
| "loss": 0.404, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.7626599387276984, |
| "grad_norm": 0.27087994511441277, |
| "learning_rate": 1.8789468023305334e-07, |
| "loss": 0.4033, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.7680663182555416, |
| "grad_norm": 0.25252752081120117, |
| "learning_rate": 1.7944499724060484e-07, |
| "loss": 0.4086, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.7734726977833843, |
| "grad_norm": 0.2765603337180068, |
| "learning_rate": 1.711862319514457e-07, |
| "loss": 0.4058, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.7788790773112275, |
| "grad_norm": 0.2662570880480703, |
| "learning_rate": 1.6311871145332836e-07, |
| "loss": 0.4016, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.7842854568390702, |
| "grad_norm": 0.26536562491010973, |
| "learning_rate": 1.5524275525977073e-07, |
| "loss": 0.3961, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.789691836366913, |
| "grad_norm": 0.2696933797225792, |
| "learning_rate": 1.4755867529740064e-07, |
| "loss": 0.402, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.7950982158947557, |
| "grad_norm": 0.26230277928432566, |
| "learning_rate": 1.4006677589360307e-07, |
| "loss": 0.4006, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.8005045954225984, |
| "grad_norm": 0.2618189445881308, |
| "learning_rate": 1.3276735376446693e-07, |
| "loss": 0.4101, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.8059109749504416, |
| "grad_norm": 0.26154419260033057, |
| "learning_rate": 1.2566069800303393e-07, |
| "loss": 0.4007, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.8113173544782843, |
| "grad_norm": 0.26129803510244903, |
| "learning_rate": 1.1874709006784891e-07, |
| "loss": 0.4108, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.816723734006127, |
| "grad_norm": 0.2755262239215911, |
| "learning_rate": 1.1202680377181252e-07, |
| "loss": 0.4081, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.8221301135339703, |
| "grad_norm": 0.27615467193849846, |
| "learning_rate": 1.055001052713378e-07, |
| "loss": 0.4057, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.827536493061813, |
| "grad_norm": 0.2565394448779921, |
| "learning_rate": 9.916725305580632e-08, |
| "loss": 0.4074, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.8329428725896557, |
| "grad_norm": 0.29481883515723867, |
| "learning_rate": 9.302849793733526e-08, |
| "loss": 0.4037, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.8383492521174984, |
| "grad_norm": 0.2628737439763179, |
| "learning_rate": 8.708408304083927e-08, |
| "loss": 0.3982, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.8437556316453416, |
| "grad_norm": 0.2856973586242492, |
| "learning_rate": 8.133424379440535e-08, |
| "loss": 0.4098, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.8491620111731844, |
| "grad_norm": 0.2573191532815954, |
| "learning_rate": 7.577920791996595e-08, |
| "loss": 0.4021, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.854568390701027, |
| "grad_norm": 0.2671924144995498, |
| "learning_rate": 7.041919542428221e-08, |
| "loss": 0.4046, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.8599747702288703, |
| "grad_norm": 0.27125026996972024, |
| "learning_rate": 6.525441859022873e-08, |
| "loss": 0.3996, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.865381149756713, |
| "grad_norm": 0.2597885306736867, |
| "learning_rate": 6.028508196838811e-08, |
| "loss": 0.3991, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.8707875292845557, |
| "grad_norm": 0.2661065612840173, |
| "learning_rate": 5.551138236894793e-08, |
| "loss": 0.4082, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.8761939088123984, |
| "grad_norm": 0.27596106902272594, |
| "learning_rate": 5.093350885390591e-08, |
| "loss": 0.4092, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.8816002883402416, |
| "grad_norm": 0.2798778899386736, |
| "learning_rate": 4.655164272958534e-08, |
| "loss": 0.3935, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.8870066678680844, |
| "grad_norm": 0.2675281011170649, |
| "learning_rate": 4.236595753944972e-08, |
| "loss": 0.4049, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.892413047395927, |
| "grad_norm": 0.24219018671622744, |
| "learning_rate": 3.837661905723378e-08, |
| "loss": 0.4061, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.8978194269237703, |
| "grad_norm": 0.26852051522723963, |
| "learning_rate": 3.458378528037598e-08, |
| "loss": 0.3982, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.903225806451613, |
| "grad_norm": 0.2598218760743794, |
| "learning_rate": 3.0987606423759644e-08, |
| "loss": 0.3978, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.9086321859794557, |
| "grad_norm": 0.24224454585639746, |
| "learning_rate": 2.7588224913768225e-08, |
| "loss": 0.4056, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.9140385655072985, |
| "grad_norm": 0.28293842876891173, |
| "learning_rate": 2.438577538263931e-08, |
| "loss": 0.4041, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.9194449450351416, |
| "grad_norm": 0.24273867782068695, |
| "learning_rate": 2.1380384663135523e-08, |
| "loss": 0.4046, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.9248513245629844, |
| "grad_norm": 0.2589867572465761, |
| "learning_rate": 1.8572171783521885e-08, |
| "loss": 0.4016, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.930257704090827, |
| "grad_norm": 0.26040920179163585, |
| "learning_rate": 1.596124796284848e-08, |
| "loss": 0.4048, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.9356640836186703, |
| "grad_norm": 0.28129280293565423, |
| "learning_rate": 1.3547716606548967e-08, |
| "loss": 0.4082, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.941070463146513, |
| "grad_norm": 0.27263421805264343, |
| "learning_rate": 1.133167330234386e-08, |
| "loss": 0.3957, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.9464768426743557, |
| "grad_norm": 0.27306797377575853, |
| "learning_rate": 9.313205816454674e-09, |
| "loss": 0.4097, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.9518832222021985, |
| "grad_norm": 0.26535989264790094, |
| "learning_rate": 7.492394090128364e-09, |
| "loss": 0.4091, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.957289601730041, |
| "grad_norm": 0.26682062170730547, |
| "learning_rate": 5.8693102364698604e-09, |
| "loss": 0.3975, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.9626959812578844, |
| "grad_norm": 0.2848285894683682, |
| "learning_rate": 4.444018537588801e-09, |
| "loss": 0.4075, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.968102360785727, |
| "grad_norm": 0.2853108418534249, |
| "learning_rate": 3.2165754420510063e-09, |
| "loss": 0.4107, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.9735087403135703, |
| "grad_norm": 0.26447810990716136, |
| "learning_rate": 2.1870295626441607e-09, |
| "loss": 0.4022, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.978915119841413, |
| "grad_norm": 0.2661971477507847, |
| "learning_rate": 1.3554216744521287e-09, |
| "loss": 0.4041, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.9843214993692557, |
| "grad_norm": 0.25582504114161564, |
| "learning_rate": 7.217847132401367e-10, |
| "loss": 0.4064, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.9897278788970985, |
| "grad_norm": 0.26069476073784237, |
| "learning_rate": 2.861437741508155e-10, |
| "loss": 0.4115, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.995134258424941, |
| "grad_norm": 0.27554755453273777, |
| "learning_rate": 4.851611070832984e-11, |
| "loss": 0.4016, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.9989187240944313, |
| "step": 5547, |
| "total_flos": 8484146955288576.0, |
| "train_loss": 0.44718967426225087, |
| "train_runtime": 93872.001, |
| "train_samples_per_second": 5.675, |
| "train_steps_per_second": 0.059 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5547, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8484146955288576.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|