|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.33224431031618584, |
|
"eval_steps": 500, |
|
"global_step": 102000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0003257297159962606, |
|
"grad_norm": 2.2308592796325684, |
|
"learning_rate": 4.99853416853153e-05, |
|
"loss": 1.4483, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0006514594319925212, |
|
"grad_norm": 2.3997225761413574, |
|
"learning_rate": 4.996905466899897e-05, |
|
"loss": 1.3276, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0009771891479887819, |
|
"grad_norm": 1.4687339067459106, |
|
"learning_rate": 4.995276765268264e-05, |
|
"loss": 1.3394, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0013029188639850425, |
|
"grad_norm": 0.6583470702171326, |
|
"learning_rate": 4.993648063636631e-05, |
|
"loss": 1.3245, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0016286485799813031, |
|
"grad_norm": 1.6252340078353882, |
|
"learning_rate": 4.992019362004997e-05, |
|
"loss": 1.3249, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0019543782959775637, |
|
"grad_norm": 2.0806777477264404, |
|
"learning_rate": 4.9903906603733634e-05, |
|
"loss": 1.32, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.002280108011973824, |
|
"grad_norm": 1.376539707183838, |
|
"learning_rate": 4.988761958741731e-05, |
|
"loss": 1.3133, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.002605837727970085, |
|
"grad_norm": 2.234644889831543, |
|
"learning_rate": 4.987133257110097e-05, |
|
"loss": 1.3179, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0029315674439663454, |
|
"grad_norm": 1.4599684476852417, |
|
"learning_rate": 4.985504555478464e-05, |
|
"loss": 1.3097, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.0032572971599626062, |
|
"grad_norm": 1.7078094482421875, |
|
"learning_rate": 4.9838758538468304e-05, |
|
"loss": 1.3083, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0035830268759588666, |
|
"grad_norm": 0.6953567266464233, |
|
"learning_rate": 4.9822471522151976e-05, |
|
"loss": 1.3075, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.0039087565919551275, |
|
"grad_norm": 1.225602626800537, |
|
"learning_rate": 4.980618450583564e-05, |
|
"loss": 1.3054, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.004234486307951388, |
|
"grad_norm": 1.3010519742965698, |
|
"learning_rate": 4.978989748951931e-05, |
|
"loss": 1.3066, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.004560216023947648, |
|
"grad_norm": 0.6475724577903748, |
|
"learning_rate": 4.9773610473202974e-05, |
|
"loss": 1.3109, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.004885945739943909, |
|
"grad_norm": 1.046614646911621, |
|
"learning_rate": 4.975732345688664e-05, |
|
"loss": 1.3074, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.00521167545594017, |
|
"grad_norm": 1.113573670387268, |
|
"learning_rate": 4.974103644057031e-05, |
|
"loss": 1.3083, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.005537405171936431, |
|
"grad_norm": 1.4273550510406494, |
|
"learning_rate": 4.972474942425398e-05, |
|
"loss": 1.3018, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.005863134887932691, |
|
"grad_norm": 0.5519908666610718, |
|
"learning_rate": 4.970846240793764e-05, |
|
"loss": 1.2945, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.006188864603928952, |
|
"grad_norm": 0.6653416156768799, |
|
"learning_rate": 4.969217539162131e-05, |
|
"loss": 1.3004, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.0065145943199252125, |
|
"grad_norm": 0.732170581817627, |
|
"learning_rate": 4.9675888375304975e-05, |
|
"loss": 1.3014, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.006840324035921473, |
|
"grad_norm": 0.405608594417572, |
|
"learning_rate": 4.965960135898865e-05, |
|
"loss": 1.2939, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.007166053751917733, |
|
"grad_norm": 0.9849847555160522, |
|
"learning_rate": 4.9643314342672306e-05, |
|
"loss": 1.2922, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.007491783467913994, |
|
"grad_norm": 0.7152832746505737, |
|
"learning_rate": 4.962702732635598e-05, |
|
"loss": 1.2905, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.007817513183910255, |
|
"grad_norm": 1.1164734363555908, |
|
"learning_rate": 4.9610740310039644e-05, |
|
"loss": 1.3024, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.008143242899906516, |
|
"grad_norm": 0.574243426322937, |
|
"learning_rate": 4.959445329372332e-05, |
|
"loss": 1.2944, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.008468972615902777, |
|
"grad_norm": 0.6976324319839478, |
|
"learning_rate": 4.9578166277406976e-05, |
|
"loss": 1.2939, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.008794702331899037, |
|
"grad_norm": 0.4648737609386444, |
|
"learning_rate": 4.956187926109064e-05, |
|
"loss": 1.2841, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.009120432047895297, |
|
"grad_norm": 1.189271092414856, |
|
"learning_rate": 4.9545592244774314e-05, |
|
"loss": 1.294, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.009446161763891557, |
|
"grad_norm": 0.6437670588493347, |
|
"learning_rate": 4.952930522845798e-05, |
|
"loss": 1.2882, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.009771891479887818, |
|
"grad_norm": 1.591304898262024, |
|
"learning_rate": 4.9513018212141646e-05, |
|
"loss": 1.2805, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.010097621195884079, |
|
"grad_norm": 0.2836475670337677, |
|
"learning_rate": 4.949673119582531e-05, |
|
"loss": 1.2802, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.01042335091188034, |
|
"grad_norm": 1.304417610168457, |
|
"learning_rate": 4.9480444179508984e-05, |
|
"loss": 1.2833, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.0107490806278766, |
|
"grad_norm": 0.27579864859580994, |
|
"learning_rate": 4.946415716319265e-05, |
|
"loss": 1.2852, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.011074810343872862, |
|
"grad_norm": 1.1080585718154907, |
|
"learning_rate": 4.9447870146876315e-05, |
|
"loss": 1.289, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.011400540059869122, |
|
"grad_norm": 0.2783690392971039, |
|
"learning_rate": 4.943158313055998e-05, |
|
"loss": 1.2885, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.011726269775865382, |
|
"grad_norm": 0.6603112816810608, |
|
"learning_rate": 4.941529611424365e-05, |
|
"loss": 1.2882, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.012051999491861642, |
|
"grad_norm": 0.9498095512390137, |
|
"learning_rate": 4.939900909792732e-05, |
|
"loss": 1.2835, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.012377729207857903, |
|
"grad_norm": 0.5274548530578613, |
|
"learning_rate": 4.9382722081610985e-05, |
|
"loss": 1.279, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.012703458923854164, |
|
"grad_norm": 0.5299821496009827, |
|
"learning_rate": 4.936643506529465e-05, |
|
"loss": 1.2879, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.013029188639850425, |
|
"grad_norm": 1.0898863077163696, |
|
"learning_rate": 4.9350148048978316e-05, |
|
"loss": 1.2913, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.013354918355846686, |
|
"grad_norm": 0.6892501711845398, |
|
"learning_rate": 4.933386103266198e-05, |
|
"loss": 1.2835, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.013680648071842947, |
|
"grad_norm": 0.9103847146034241, |
|
"learning_rate": 4.9317574016345655e-05, |
|
"loss": 1.2876, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.014006377787839207, |
|
"grad_norm": 0.8750960826873779, |
|
"learning_rate": 4.9301287000029314e-05, |
|
"loss": 1.2761, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.014332107503835467, |
|
"grad_norm": 1.7296843528747559, |
|
"learning_rate": 4.9284999983712986e-05, |
|
"loss": 1.2825, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.014657837219831727, |
|
"grad_norm": 0.7019387483596802, |
|
"learning_rate": 4.926871296739665e-05, |
|
"loss": 1.2774, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.014983566935827988, |
|
"grad_norm": 0.9353660345077515, |
|
"learning_rate": 4.9252425951080324e-05, |
|
"loss": 1.2701, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.015309296651824249, |
|
"grad_norm": 0.7081932425498962, |
|
"learning_rate": 4.923613893476399e-05, |
|
"loss": 1.276, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.01563502636782051, |
|
"grad_norm": 0.8366962671279907, |
|
"learning_rate": 4.9219851918447656e-05, |
|
"loss": 1.2767, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.01596075608381677, |
|
"grad_norm": 1.765871286392212, |
|
"learning_rate": 4.920356490213132e-05, |
|
"loss": 1.2617, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.01628648579981303, |
|
"grad_norm": 0.2926379442214966, |
|
"learning_rate": 4.918727788581499e-05, |
|
"loss": 1.2762, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01661221551580929, |
|
"grad_norm": 1.1176525354385376, |
|
"learning_rate": 4.917099086949866e-05, |
|
"loss": 1.2647, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.016937945231805553, |
|
"grad_norm": 0.384264200925827, |
|
"learning_rate": 4.915470385318232e-05, |
|
"loss": 1.2628, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.017263674947801812, |
|
"grad_norm": 1.5339140892028809, |
|
"learning_rate": 4.913841683686599e-05, |
|
"loss": 1.2692, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.017589404663798075, |
|
"grad_norm": 1.2026703357696533, |
|
"learning_rate": 4.912212982054966e-05, |
|
"loss": 1.2618, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.017915134379794334, |
|
"grad_norm": 0.6754997968673706, |
|
"learning_rate": 4.910584280423333e-05, |
|
"loss": 1.2495, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.018240864095790593, |
|
"grad_norm": 0.8240428566932678, |
|
"learning_rate": 4.908955578791699e-05, |
|
"loss": 1.2498, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.018566593811786856, |
|
"grad_norm": 0.6363087892532349, |
|
"learning_rate": 4.9073268771600654e-05, |
|
"loss": 1.2514, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.018892323527783115, |
|
"grad_norm": 1.393833875656128, |
|
"learning_rate": 4.905698175528433e-05, |
|
"loss": 1.2509, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.019218053243779377, |
|
"grad_norm": 0.6422170996665955, |
|
"learning_rate": 4.904069473896799e-05, |
|
"loss": 1.2405, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.019543782959775637, |
|
"grad_norm": 0.7575420141220093, |
|
"learning_rate": 4.902440772265166e-05, |
|
"loss": 1.2241, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.0198695126757719, |
|
"grad_norm": 0.7148196697235107, |
|
"learning_rate": 4.9008120706335324e-05, |
|
"loss": 1.2372, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.020195242391768158, |
|
"grad_norm": 1.1207329034805298, |
|
"learning_rate": 4.8991833690018996e-05, |
|
"loss": 1.2372, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.02052097210776442, |
|
"grad_norm": 1.3915568590164185, |
|
"learning_rate": 4.897554667370266e-05, |
|
"loss": 1.2129, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.02084670182376068, |
|
"grad_norm": 0.8674553036689758, |
|
"learning_rate": 4.895925965738633e-05, |
|
"loss": 1.2262, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.02117243153975694, |
|
"grad_norm": 0.7640644311904907, |
|
"learning_rate": 4.8942972641069994e-05, |
|
"loss": 1.1998, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.0214981612557532, |
|
"grad_norm": 0.7928606271743774, |
|
"learning_rate": 4.892668562475366e-05, |
|
"loss": 1.1776, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.02182389097174946, |
|
"grad_norm": 1.1644946336746216, |
|
"learning_rate": 4.891039860843733e-05, |
|
"loss": 1.1916, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.022149620687745723, |
|
"grad_norm": 1.1310213804244995, |
|
"learning_rate": 4.8894111592121e-05, |
|
"loss": 1.1786, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.022475350403741982, |
|
"grad_norm": 1.3858141899108887, |
|
"learning_rate": 4.887782457580466e-05, |
|
"loss": 1.1728, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.022801080119738245, |
|
"grad_norm": 3.814767360687256, |
|
"learning_rate": 4.886153755948833e-05, |
|
"loss": 1.1384, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.023126809835734504, |
|
"grad_norm": 1.2411885261535645, |
|
"learning_rate": 4.8845250543171995e-05, |
|
"loss": 1.1588, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.023452539551730763, |
|
"grad_norm": 1.4492881298065186, |
|
"learning_rate": 4.882896352685567e-05, |
|
"loss": 1.1266, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.023778269267727026, |
|
"grad_norm": 0.8389878869056702, |
|
"learning_rate": 4.8812676510539326e-05, |
|
"loss": 1.1446, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.024103998983723285, |
|
"grad_norm": 0.33955487608909607, |
|
"learning_rate": 4.8796389494223e-05, |
|
"loss": 1.1111, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.024429728699719547, |
|
"grad_norm": 0.7004753351211548, |
|
"learning_rate": 4.8780102477906664e-05, |
|
"loss": 1.0954, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.024755458415715807, |
|
"grad_norm": 0.7213209271430969, |
|
"learning_rate": 4.876381546159034e-05, |
|
"loss": 1.1123, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.02508118813171207, |
|
"grad_norm": 0.960991382598877, |
|
"learning_rate": 4.8747528445273996e-05, |
|
"loss": 1.0982, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.025406917847708328, |
|
"grad_norm": 0.6955804228782654, |
|
"learning_rate": 4.873124142895766e-05, |
|
"loss": 1.0827, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.02573264756370459, |
|
"grad_norm": 0.47498619556427, |
|
"learning_rate": 4.8714954412641334e-05, |
|
"loss": 1.1043, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.02605837727970085, |
|
"grad_norm": 0.304063618183136, |
|
"learning_rate": 4.8698667396325e-05, |
|
"loss": 1.0699, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.02638410699569711, |
|
"grad_norm": 0.9996088743209839, |
|
"learning_rate": 4.8682380380008666e-05, |
|
"loss": 1.0697, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.02670983671169337, |
|
"grad_norm": 0.5986392498016357, |
|
"learning_rate": 4.866609336369233e-05, |
|
"loss": 1.0733, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.02703556642768963, |
|
"grad_norm": 0.41347017884254456, |
|
"learning_rate": 4.8649806347376004e-05, |
|
"loss": 1.0643, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.027361296143685893, |
|
"grad_norm": 0.3976612687110901, |
|
"learning_rate": 4.863351933105967e-05, |
|
"loss": 1.0401, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.027687025859682152, |
|
"grad_norm": 1.1716387271881104, |
|
"learning_rate": 4.8617232314743335e-05, |
|
"loss": 1.0298, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.028012755575678415, |
|
"grad_norm": 0.7384105324745178, |
|
"learning_rate": 4.8600945298427e-05, |
|
"loss": 1.0223, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.028338485291674674, |
|
"grad_norm": 0.517280638217926, |
|
"learning_rate": 4.858465828211067e-05, |
|
"loss": 1.0445, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.028664215007670933, |
|
"grad_norm": 0.7129126787185669, |
|
"learning_rate": 4.856837126579434e-05, |
|
"loss": 1.0508, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.028989944723667196, |
|
"grad_norm": 0.35596320033073425, |
|
"learning_rate": 4.8552084249478005e-05, |
|
"loss": 1.0296, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.029315674439663455, |
|
"grad_norm": 0.9362590909004211, |
|
"learning_rate": 4.853579723316167e-05, |
|
"loss": 1.0785, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.029641404155659717, |
|
"grad_norm": 0.8223775625228882, |
|
"learning_rate": 4.8519510216845336e-05, |
|
"loss": 1.043, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.029967133871655977, |
|
"grad_norm": 0.7149192690849304, |
|
"learning_rate": 4.8503223200529e-05, |
|
"loss": 1.0036, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.03029286358765224, |
|
"grad_norm": 0.5907948017120361, |
|
"learning_rate": 4.8486936184212675e-05, |
|
"loss": 1.0408, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.030618593303648498, |
|
"grad_norm": 0.6083859801292419, |
|
"learning_rate": 4.847064916789634e-05, |
|
"loss": 1.0313, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.03094432301964476, |
|
"grad_norm": 0.5470224618911743, |
|
"learning_rate": 4.8454362151580006e-05, |
|
"loss": 1.0395, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.03127005273564102, |
|
"grad_norm": 0.9455150961875916, |
|
"learning_rate": 4.843807513526367e-05, |
|
"loss": 1.0132, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.03159578245163728, |
|
"grad_norm": 0.9068177938461304, |
|
"learning_rate": 4.8421788118947344e-05, |
|
"loss": 1.0219, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.03192151216763354, |
|
"grad_norm": 0.6018943190574646, |
|
"learning_rate": 4.840550110263101e-05, |
|
"loss": 0.9966, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.032247241883629804, |
|
"grad_norm": 1.1521615982055664, |
|
"learning_rate": 4.838921408631467e-05, |
|
"loss": 0.9782, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.03257297159962606, |
|
"grad_norm": 0.33281368017196655, |
|
"learning_rate": 4.837292706999834e-05, |
|
"loss": 1.0325, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03289870131562232, |
|
"grad_norm": 0.8903327584266663, |
|
"learning_rate": 4.835664005368201e-05, |
|
"loss": 0.9889, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.03322443103161858, |
|
"grad_norm": 0.5526803731918335, |
|
"learning_rate": 4.834035303736568e-05, |
|
"loss": 1.0018, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.03355016074761485, |
|
"grad_norm": 0.8086706399917603, |
|
"learning_rate": 4.832406602104934e-05, |
|
"loss": 1.0189, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.03387589046361111, |
|
"grad_norm": 0.6990864276885986, |
|
"learning_rate": 4.830777900473301e-05, |
|
"loss": 0.996, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.034201620179607366, |
|
"grad_norm": 0.4859602451324463, |
|
"learning_rate": 4.829149198841668e-05, |
|
"loss": 0.992, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.034527349895603625, |
|
"grad_norm": 1.2284592390060425, |
|
"learning_rate": 4.827520497210034e-05, |
|
"loss": 1.0139, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.034853079611599884, |
|
"grad_norm": 0.6529733538627625, |
|
"learning_rate": 4.825891795578401e-05, |
|
"loss": 1.025, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.03517880932759615, |
|
"grad_norm": 0.6755232810974121, |
|
"learning_rate": 4.8242630939467674e-05, |
|
"loss": 1.0123, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.03550453904359241, |
|
"grad_norm": 0.9006055593490601, |
|
"learning_rate": 4.8226343923151347e-05, |
|
"loss": 0.9936, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.03583026875958867, |
|
"grad_norm": 0.7058572769165039, |
|
"learning_rate": 4.821005690683501e-05, |
|
"loss": 0.934, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.03615599847558493, |
|
"grad_norm": 0.4535008668899536, |
|
"learning_rate": 4.819376989051868e-05, |
|
"loss": 1.0269, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.036481728191581186, |
|
"grad_norm": 0.39823395013809204, |
|
"learning_rate": 4.8177482874202344e-05, |
|
"loss": 0.9866, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.03680745790757745, |
|
"grad_norm": 0.8109054565429688, |
|
"learning_rate": 4.816119585788601e-05, |
|
"loss": 1.0209, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.03713318762357371, |
|
"grad_norm": 0.760396420955658, |
|
"learning_rate": 4.814490884156968e-05, |
|
"loss": 0.9711, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.03745891733956997, |
|
"grad_norm": 0.8584955334663391, |
|
"learning_rate": 4.812862182525335e-05, |
|
"loss": 1.0151, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.03778464705556623, |
|
"grad_norm": 1.104041576385498, |
|
"learning_rate": 4.8112334808937013e-05, |
|
"loss": 0.9826, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.038110376771562496, |
|
"grad_norm": 0.6111257672309875, |
|
"learning_rate": 4.809604779262068e-05, |
|
"loss": 0.9524, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.038436106487558755, |
|
"grad_norm": 0.6601366996765137, |
|
"learning_rate": 4.807976077630435e-05, |
|
"loss": 0.9527, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.038761836203555014, |
|
"grad_norm": 0.4624398350715637, |
|
"learning_rate": 4.806347375998802e-05, |
|
"loss": 1.0077, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.03908756591955127, |
|
"grad_norm": 0.2786065638065338, |
|
"learning_rate": 4.8047186743671676e-05, |
|
"loss": 0.956, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.03941329563554753, |
|
"grad_norm": 1.0275955200195312, |
|
"learning_rate": 4.803089972735535e-05, |
|
"loss": 0.9484, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.0397390253515438, |
|
"grad_norm": 0.6198407411575317, |
|
"learning_rate": 4.8014612711039015e-05, |
|
"loss": 0.9847, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.04006475506754006, |
|
"grad_norm": 0.5880489945411682, |
|
"learning_rate": 4.799832569472269e-05, |
|
"loss": 0.9559, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.040390484783536316, |
|
"grad_norm": 0.39753594994544983, |
|
"learning_rate": 4.7982038678406346e-05, |
|
"loss": 0.9489, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.040716214499532576, |
|
"grad_norm": 0.5815085768699646, |
|
"learning_rate": 4.796575166209002e-05, |
|
"loss": 0.9567, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.04104194421552884, |
|
"grad_norm": 0.8463611602783203, |
|
"learning_rate": 4.7949464645773684e-05, |
|
"loss": 0.9706, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.0413676739315251, |
|
"grad_norm": 0.7260481715202332, |
|
"learning_rate": 4.793317762945736e-05, |
|
"loss": 1.0032, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.04169340364752136, |
|
"grad_norm": 0.6970434188842773, |
|
"learning_rate": 4.7916890613141016e-05, |
|
"loss": 0.9559, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.04201913336351762, |
|
"grad_norm": 0.6083927750587463, |
|
"learning_rate": 4.790060359682468e-05, |
|
"loss": 0.9558, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.04234486307951388, |
|
"grad_norm": 0.4736403524875641, |
|
"learning_rate": 4.7884316580508354e-05, |
|
"loss": 0.9444, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.042670592795510144, |
|
"grad_norm": 0.34586021304130554, |
|
"learning_rate": 4.786802956419202e-05, |
|
"loss": 0.9186, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.0429963225115064, |
|
"grad_norm": 0.5979019403457642, |
|
"learning_rate": 4.7851742547875685e-05, |
|
"loss": 0.9367, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.04332205222750266, |
|
"grad_norm": 1.0827624797821045, |
|
"learning_rate": 4.783545553155935e-05, |
|
"loss": 0.9324, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.04364778194349892, |
|
"grad_norm": 1.1920030117034912, |
|
"learning_rate": 4.7819168515243024e-05, |
|
"loss": 0.9367, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.04397351165949519, |
|
"grad_norm": 0.6469812989234924, |
|
"learning_rate": 4.780288149892669e-05, |
|
"loss": 0.9815, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.04429924137549145, |
|
"grad_norm": 0.8156530857086182, |
|
"learning_rate": 4.7786594482610355e-05, |
|
"loss": 0.9679, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.044624971091487706, |
|
"grad_norm": 1.2997325658798218, |
|
"learning_rate": 4.777030746629402e-05, |
|
"loss": 0.9358, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.044950700807483965, |
|
"grad_norm": 0.42360150814056396, |
|
"learning_rate": 4.7754020449977687e-05, |
|
"loss": 0.9326, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.045276430523480224, |
|
"grad_norm": 0.7316247820854187, |
|
"learning_rate": 4.773773343366136e-05, |
|
"loss": 0.9283, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.04560216023947649, |
|
"grad_norm": 0.5978175401687622, |
|
"learning_rate": 4.7721446417345025e-05, |
|
"loss": 0.9699, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.04592788995547275, |
|
"grad_norm": 0.5278334617614746, |
|
"learning_rate": 4.770515940102869e-05, |
|
"loss": 0.99, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.04625361967146901, |
|
"grad_norm": 0.7452822327613831, |
|
"learning_rate": 4.7688872384712356e-05, |
|
"loss": 0.8824, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.04657934938746527, |
|
"grad_norm": 0.4158065617084503, |
|
"learning_rate": 4.767258536839602e-05, |
|
"loss": 0.9076, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.046905079103461526, |
|
"grad_norm": 0.6929590106010437, |
|
"learning_rate": 4.7656298352079694e-05, |
|
"loss": 0.926, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.04723080881945779, |
|
"grad_norm": 0.8249752521514893, |
|
"learning_rate": 4.764001133576336e-05, |
|
"loss": 0.9342, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.04755653853545405, |
|
"grad_norm": 0.6523115038871765, |
|
"learning_rate": 4.7623724319447026e-05, |
|
"loss": 0.9312, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.04788226825145031, |
|
"grad_norm": 0.7809571027755737, |
|
"learning_rate": 4.760743730313069e-05, |
|
"loss": 0.927, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.04820799796744657, |
|
"grad_norm": 0.4370424747467041, |
|
"learning_rate": 4.7591150286814364e-05, |
|
"loss": 0.9275, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.048533727683442836, |
|
"grad_norm": 0.8082228302955627, |
|
"learning_rate": 4.757486327049803e-05, |
|
"loss": 0.9524, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.048859457399439095, |
|
"grad_norm": 0.7073273658752441, |
|
"learning_rate": 4.755857625418169e-05, |
|
"loss": 0.9069, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.049185187115435354, |
|
"grad_norm": 0.9150802493095398, |
|
"learning_rate": 4.754228923786536e-05, |
|
"loss": 0.9669, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.04951091683143161, |
|
"grad_norm": 0.6621295809745789, |
|
"learning_rate": 4.752600222154903e-05, |
|
"loss": 0.9117, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.04983664654742787, |
|
"grad_norm": 1.1658425331115723, |
|
"learning_rate": 4.75097152052327e-05, |
|
"loss": 0.9061, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.05016237626342414, |
|
"grad_norm": 1.1669522523880005, |
|
"learning_rate": 4.749342818891636e-05, |
|
"loss": 0.9625, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.0504881059794204, |
|
"grad_norm": 0.6995384693145752, |
|
"learning_rate": 4.747714117260003e-05, |
|
"loss": 0.9098, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.050813835695416656, |
|
"grad_norm": 0.5169076919555664, |
|
"learning_rate": 4.74608541562837e-05, |
|
"loss": 0.9243, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.051139565411412916, |
|
"grad_norm": 0.33565372228622437, |
|
"learning_rate": 4.744456713996736e-05, |
|
"loss": 0.9375, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.05146529512740918, |
|
"grad_norm": 0.4140024781227112, |
|
"learning_rate": 4.742828012365103e-05, |
|
"loss": 0.919, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.05179102484340544, |
|
"grad_norm": 0.9499224424362183, |
|
"learning_rate": 4.7411993107334694e-05, |
|
"loss": 0.9034, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.0521167545594017, |
|
"grad_norm": 0.8801336288452148, |
|
"learning_rate": 4.7395706091018366e-05, |
|
"loss": 0.881, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.05244248427539796, |
|
"grad_norm": 0.7208696007728577, |
|
"learning_rate": 4.737941907470203e-05, |
|
"loss": 0.8518, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.05276821399139422, |
|
"grad_norm": 0.5132054686546326, |
|
"learning_rate": 4.73631320583857e-05, |
|
"loss": 0.8933, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.053093943707390484, |
|
"grad_norm": 0.6521860957145691, |
|
"learning_rate": 4.7346845042069364e-05, |
|
"loss": 0.9332, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.05341967342338674, |
|
"grad_norm": 0.7121620178222656, |
|
"learning_rate": 4.733055802575303e-05, |
|
"loss": 0.9067, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.053745403139383, |
|
"grad_norm": 0.5065134763717651, |
|
"learning_rate": 4.73142710094367e-05, |
|
"loss": 0.9062, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.05407113285537926, |
|
"grad_norm": 0.5855521559715271, |
|
"learning_rate": 4.729798399312037e-05, |
|
"loss": 0.915, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.05439686257137553, |
|
"grad_norm": 0.5392531156539917, |
|
"learning_rate": 4.728169697680403e-05, |
|
"loss": 0.9124, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.05472259228737179, |
|
"grad_norm": 0.6617989540100098, |
|
"learning_rate": 4.72654099604877e-05, |
|
"loss": 0.8594, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.055048322003368046, |
|
"grad_norm": 0.6459785103797913, |
|
"learning_rate": 4.724912294417137e-05, |
|
"loss": 0.9262, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.055374051719364305, |
|
"grad_norm": 0.34565970301628113, |
|
"learning_rate": 4.723283592785504e-05, |
|
"loss": 0.8747, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.055699781435360564, |
|
"grad_norm": 0.9510948061943054, |
|
"learning_rate": 4.7216548911538696e-05, |
|
"loss": 0.9027, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.05602551115135683, |
|
"grad_norm": 0.577192485332489, |
|
"learning_rate": 4.720026189522237e-05, |
|
"loss": 0.9192, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.05635124086735309, |
|
"grad_norm": 0.38653406500816345, |
|
"learning_rate": 4.7183974878906034e-05, |
|
"loss": 0.8759, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.05667697058334935, |
|
"grad_norm": 0.6405381560325623, |
|
"learning_rate": 4.716768786258971e-05, |
|
"loss": 0.8486, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.05700270029934561, |
|
"grad_norm": 0.6968704462051392, |
|
"learning_rate": 4.7151400846273366e-05, |
|
"loss": 0.903, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.057328430015341866, |
|
"grad_norm": 0.8094695210456848, |
|
"learning_rate": 4.713511382995704e-05, |
|
"loss": 0.864, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.05765415973133813, |
|
"grad_norm": 0.8325287103652954, |
|
"learning_rate": 4.7118826813640704e-05, |
|
"loss": 0.8886, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.05797988944733439, |
|
"grad_norm": 0.5068339705467224, |
|
"learning_rate": 4.710253979732437e-05, |
|
"loss": 0.8767, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.05830561916333065, |
|
"grad_norm": 0.7535611391067505, |
|
"learning_rate": 4.7086252781008036e-05, |
|
"loss": 0.8661, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.05863134887932691, |
|
"grad_norm": 0.9104974865913391, |
|
"learning_rate": 4.70699657646917e-05, |
|
"loss": 0.8612, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.058957078595323176, |
|
"grad_norm": 0.9106101989746094, |
|
"learning_rate": 4.7053678748375374e-05, |
|
"loss": 0.8885, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.059282808311319435, |
|
"grad_norm": 0.9990994334220886, |
|
"learning_rate": 4.703739173205904e-05, |
|
"loss": 0.9097, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.059608538027315694, |
|
"grad_norm": 0.6219133138656616, |
|
"learning_rate": 4.7021104715742705e-05, |
|
"loss": 0.8349, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.05993426774331195, |
|
"grad_norm": 0.28884798288345337, |
|
"learning_rate": 4.700481769942637e-05, |
|
"loss": 0.8359, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.06025999745930821, |
|
"grad_norm": 0.6142743229866028, |
|
"learning_rate": 4.698853068311004e-05, |
|
"loss": 0.8686, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.06058572717530448, |
|
"grad_norm": 0.7121238708496094, |
|
"learning_rate": 4.697224366679371e-05, |
|
"loss": 0.8318, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.06091145689130074, |
|
"grad_norm": 0.3502013683319092, |
|
"learning_rate": 4.6955956650477375e-05, |
|
"loss": 0.8353, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.061237186607296996, |
|
"grad_norm": 0.869159460067749, |
|
"learning_rate": 4.693966963416104e-05, |
|
"loss": 0.8811, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.061562916323293256, |
|
"grad_norm": 0.4008027911186218, |
|
"learning_rate": 4.6923382617844706e-05, |
|
"loss": 0.8595, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.06188864603928952, |
|
"grad_norm": 0.6609760522842407, |
|
"learning_rate": 4.690709560152838e-05, |
|
"loss": 0.8591, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.06221437575528578, |
|
"grad_norm": 0.41599878668785095, |
|
"learning_rate": 4.6890808585212045e-05, |
|
"loss": 0.8792, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.06254010547128204, |
|
"grad_norm": 0.8219528794288635, |
|
"learning_rate": 4.687452156889571e-05, |
|
"loss": 0.8469, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.0628658351872783, |
|
"grad_norm": 0.5383628010749817, |
|
"learning_rate": 4.6858234552579376e-05, |
|
"loss": 0.8619, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.06319156490327456, |
|
"grad_norm": 1.0892442464828491, |
|
"learning_rate": 4.684194753626304e-05, |
|
"loss": 0.8219, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.06351729461927082, |
|
"grad_norm": 0.7258702516555786, |
|
"learning_rate": 4.6825660519946714e-05, |
|
"loss": 0.8243, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.06384302433526708, |
|
"grad_norm": 1.2622634172439575, |
|
"learning_rate": 4.680937350363038e-05, |
|
"loss": 0.8619, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.06416875405126335, |
|
"grad_norm": 0.3901592195034027, |
|
"learning_rate": 4.6793086487314046e-05, |
|
"loss": 0.8315, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.06449448376725961, |
|
"grad_norm": 0.5976518392562866, |
|
"learning_rate": 4.677679947099771e-05, |
|
"loss": 0.8193, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.06482021348325587, |
|
"grad_norm": 1.0668984651565552, |
|
"learning_rate": 4.676051245468138e-05, |
|
"loss": 0.8381, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.06514594319925213, |
|
"grad_norm": 0.6844903826713562, |
|
"learning_rate": 4.674422543836505e-05, |
|
"loss": 0.8202, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.06547167291524839, |
|
"grad_norm": 0.6987929344177246, |
|
"learning_rate": 4.672793842204871e-05, |
|
"loss": 0.844, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.06579740263124464, |
|
"grad_norm": 1.0227413177490234, |
|
"learning_rate": 4.671165140573238e-05, |
|
"loss": 0.8093, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.0661231323472409, |
|
"grad_norm": 0.5901645421981812, |
|
"learning_rate": 4.669536438941605e-05, |
|
"loss": 0.8068, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.06644886206323716, |
|
"grad_norm": 0.7951213717460632, |
|
"learning_rate": 4.667907737309972e-05, |
|
"loss": 0.8581, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.06677459177923342, |
|
"grad_norm": 0.617341160774231, |
|
"learning_rate": 4.666279035678338e-05, |
|
"loss": 0.8427, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.0671003214952297, |
|
"grad_norm": 0.694558322429657, |
|
"learning_rate": 4.6646503340467044e-05, |
|
"loss": 0.8619, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.06742605121122595, |
|
"grad_norm": 0.6441329717636108, |
|
"learning_rate": 4.663021632415072e-05, |
|
"loss": 0.8866, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.06775178092722221, |
|
"grad_norm": 0.46440285444259644, |
|
"learning_rate": 4.661392930783438e-05, |
|
"loss": 0.8435, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.06807751064321847, |
|
"grad_norm": 0.42911046743392944, |
|
"learning_rate": 4.659764229151805e-05, |
|
"loss": 0.8145, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.06840324035921473, |
|
"grad_norm": 0.7508918046951294, |
|
"learning_rate": 4.6581355275201714e-05, |
|
"loss": 0.8576, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.06872897007521099, |
|
"grad_norm": 0.6361901164054871, |
|
"learning_rate": 4.6565068258885386e-05, |
|
"loss": 0.7982, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.06905469979120725, |
|
"grad_norm": 0.804426372051239, |
|
"learning_rate": 4.654878124256905e-05, |
|
"loss": 0.8386, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.06938042950720351, |
|
"grad_norm": 0.5336636304855347, |
|
"learning_rate": 4.653249422625272e-05, |
|
"loss": 0.8296, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.06970615922319977, |
|
"grad_norm": 0.5880811810493469, |
|
"learning_rate": 4.6516207209936384e-05, |
|
"loss": 0.8065, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.07003188893919603, |
|
"grad_norm": 0.4607875347137451, |
|
"learning_rate": 4.649992019362005e-05, |
|
"loss": 0.8601, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.0703576186551923, |
|
"grad_norm": 0.6503331065177917, |
|
"learning_rate": 4.648363317730372e-05, |
|
"loss": 0.7925, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.07068334837118856, |
|
"grad_norm": 0.7841913104057312, |
|
"learning_rate": 4.646734616098739e-05, |
|
"loss": 0.8218, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.07100907808718482, |
|
"grad_norm": 0.45437848567962646, |
|
"learning_rate": 4.645105914467105e-05, |
|
"loss": 0.8663, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.07133480780318108, |
|
"grad_norm": 0.6052650213241577, |
|
"learning_rate": 4.643477212835472e-05, |
|
"loss": 0.8634, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.07166053751917734, |
|
"grad_norm": 0.5301306247711182, |
|
"learning_rate": 4.641848511203839e-05, |
|
"loss": 0.8215, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.0719862672351736, |
|
"grad_norm": 0.8724095821380615, |
|
"learning_rate": 4.640219809572206e-05, |
|
"loss": 0.8304, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.07231199695116985, |
|
"grad_norm": 0.8219661116600037, |
|
"learning_rate": 4.6385911079405716e-05, |
|
"loss": 0.8515, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.07263772666716611, |
|
"grad_norm": 0.6308414936065674, |
|
"learning_rate": 4.636962406308939e-05, |
|
"loss": 0.7233, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.07296345638316237, |
|
"grad_norm": 0.35772112011909485, |
|
"learning_rate": 4.6353337046773054e-05, |
|
"loss": 0.7792, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.07328918609915865, |
|
"grad_norm": 0.519975483417511, |
|
"learning_rate": 4.633705003045673e-05, |
|
"loss": 0.8265, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.0736149158151549, |
|
"grad_norm": 0.8935458660125732, |
|
"learning_rate": 4.6320763014140386e-05, |
|
"loss": 0.8276, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.07394064553115116, |
|
"grad_norm": 0.4765929877758026, |
|
"learning_rate": 4.630447599782406e-05, |
|
"loss": 0.8088, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.07426637524714742, |
|
"grad_norm": 0.5910876989364624, |
|
"learning_rate": 4.6288188981507724e-05, |
|
"loss": 0.8003, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.07459210496314368, |
|
"grad_norm": 0.6108260154724121, |
|
"learning_rate": 4.627190196519139e-05, |
|
"loss": 0.7949, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.07491783467913994, |
|
"grad_norm": 0.9665610194206238, |
|
"learning_rate": 4.625561494887506e-05, |
|
"loss": 0.7989, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.0752435643951362, |
|
"grad_norm": 0.43020346760749817, |
|
"learning_rate": 4.623932793255872e-05, |
|
"loss": 0.8052, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.07556929411113246, |
|
"grad_norm": 0.3901965022087097, |
|
"learning_rate": 4.6223040916242394e-05, |
|
"loss": 0.7756, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.07589502382712872, |
|
"grad_norm": 0.8132317066192627, |
|
"learning_rate": 4.620675389992606e-05, |
|
"loss": 0.797, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.07622075354312499, |
|
"grad_norm": 0.6211370825767517, |
|
"learning_rate": 4.619046688360973e-05, |
|
"loss": 0.7698, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.07654648325912125, |
|
"grad_norm": 0.8378313779830933, |
|
"learning_rate": 4.617417986729339e-05, |
|
"loss": 0.805, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.07687221297511751, |
|
"grad_norm": 0.9225132465362549, |
|
"learning_rate": 4.615789285097706e-05, |
|
"loss": 0.7999, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.07719794269111377, |
|
"grad_norm": 0.46878713369369507, |
|
"learning_rate": 4.614160583466073e-05, |
|
"loss": 0.75, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.07752367240711003, |
|
"grad_norm": 0.409138560295105, |
|
"learning_rate": 4.6125318818344395e-05, |
|
"loss": 0.7944, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.07784940212310629, |
|
"grad_norm": 0.4791303277015686, |
|
"learning_rate": 4.610903180202806e-05, |
|
"loss": 0.7912, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.07817513183910255, |
|
"grad_norm": 0.8759014010429382, |
|
"learning_rate": 4.6092744785711726e-05, |
|
"loss": 0.8198, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.0785008615550988, |
|
"grad_norm": 0.47595012187957764, |
|
"learning_rate": 4.60764577693954e-05, |
|
"loss": 0.7984, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.07882659127109506, |
|
"grad_norm": 0.7923133373260498, |
|
"learning_rate": 4.6060170753079065e-05, |
|
"loss": 0.7436, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.07915232098709134, |
|
"grad_norm": 0.39254361391067505, |
|
"learning_rate": 4.604388373676273e-05, |
|
"loss": 0.7771, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.0794780507030876, |
|
"grad_norm": 0.6828033924102783, |
|
"learning_rate": 4.6027596720446396e-05, |
|
"loss": 0.8083, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.07980378041908386, |
|
"grad_norm": 0.6189585328102112, |
|
"learning_rate": 4.601130970413006e-05, |
|
"loss": 0.7885, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.08012951013508011, |
|
"grad_norm": 0.6750975847244263, |
|
"learning_rate": 4.5995022687813734e-05, |
|
"loss": 0.759, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.08045523985107637, |
|
"grad_norm": 0.6616020798683167, |
|
"learning_rate": 4.59787356714974e-05, |
|
"loss": 0.8226, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.08078096956707263, |
|
"grad_norm": 0.7598117589950562, |
|
"learning_rate": 4.5962448655181066e-05, |
|
"loss": 0.7806, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.08110669928306889, |
|
"grad_norm": 0.41183263063430786, |
|
"learning_rate": 4.594616163886473e-05, |
|
"loss": 0.7939, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.08143242899906515, |
|
"grad_norm": 0.40911582112312317, |
|
"learning_rate": 4.59298746225484e-05, |
|
"loss": 0.7635, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.08175815871506141, |
|
"grad_norm": 0.8820083737373352, |
|
"learning_rate": 4.591358760623207e-05, |
|
"loss": 0.7886, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.08208388843105768, |
|
"grad_norm": 0.9055482745170593, |
|
"learning_rate": 4.589730058991573e-05, |
|
"loss": 0.7487, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.08240961814705394, |
|
"grad_norm": 0.5680561065673828, |
|
"learning_rate": 4.58810135735994e-05, |
|
"loss": 0.7505, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.0827353478630502, |
|
"grad_norm": 0.5064377188682556, |
|
"learning_rate": 4.586472655728307e-05, |
|
"loss": 0.768, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.08306107757904646, |
|
"grad_norm": 0.462200403213501, |
|
"learning_rate": 4.584843954096674e-05, |
|
"loss": 0.7399, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.08338680729504272, |
|
"grad_norm": 0.7820500731468201, |
|
"learning_rate": 4.58321525246504e-05, |
|
"loss": 0.8109, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.08371253701103898, |
|
"grad_norm": 0.4833464026451111, |
|
"learning_rate": 4.5815865508334064e-05, |
|
"loss": 0.764, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.08403826672703524, |
|
"grad_norm": 0.3821680247783661, |
|
"learning_rate": 4.5799578492017737e-05, |
|
"loss": 0.7397, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.0843639964430315, |
|
"grad_norm": 0.5084909200668335, |
|
"learning_rate": 4.57832914757014e-05, |
|
"loss": 0.7428, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.08468972615902776, |
|
"grad_norm": 0.925619900226593, |
|
"learning_rate": 4.576700445938507e-05, |
|
"loss": 0.7386, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.08501545587502403, |
|
"grad_norm": 0.8126088380813599, |
|
"learning_rate": 4.5750717443068734e-05, |
|
"loss": 0.7798, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.08534118559102029, |
|
"grad_norm": 1.0178046226501465, |
|
"learning_rate": 4.5734430426752406e-05, |
|
"loss": 0.7796, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.08566691530701655, |
|
"grad_norm": 0.4879295229911804, |
|
"learning_rate": 4.571814341043607e-05, |
|
"loss": 0.7762, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.0859926450230128, |
|
"grad_norm": 0.6722548604011536, |
|
"learning_rate": 4.570185639411974e-05, |
|
"loss": 0.7234, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.08631837473900907, |
|
"grad_norm": 0.6326486468315125, |
|
"learning_rate": 4.5685569377803403e-05, |
|
"loss": 0.72, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.08664410445500532, |
|
"grad_norm": 0.4354076087474823, |
|
"learning_rate": 4.566928236148707e-05, |
|
"loss": 0.7704, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.08696983417100158, |
|
"grad_norm": 0.7113054394721985, |
|
"learning_rate": 4.565299534517074e-05, |
|
"loss": 0.7623, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.08729556388699784, |
|
"grad_norm": 0.595664381980896, |
|
"learning_rate": 4.563670832885441e-05, |
|
"loss": 0.765, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.0876212936029941, |
|
"grad_norm": 0.5344740152359009, |
|
"learning_rate": 4.562042131253807e-05, |
|
"loss": 0.7201, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.08794702331899037, |
|
"grad_norm": 0.5330939292907715, |
|
"learning_rate": 4.560413429622174e-05, |
|
"loss": 0.7617, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.08827275303498663, |
|
"grad_norm": 0.45265939831733704, |
|
"learning_rate": 4.5587847279905405e-05, |
|
"loss": 0.7806, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.0885984827509829, |
|
"grad_norm": 0.5947338342666626, |
|
"learning_rate": 4.557156026358908e-05, |
|
"loss": 0.7524, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.08892421246697915, |
|
"grad_norm": 0.8656592965126038, |
|
"learning_rate": 4.555527324727274e-05, |
|
"loss": 0.7599, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.08924994218297541, |
|
"grad_norm": 0.645728349685669, |
|
"learning_rate": 4.553898623095641e-05, |
|
"loss": 0.7629, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.08957567189897167, |
|
"grad_norm": 0.8474392890930176, |
|
"learning_rate": 4.5522699214640074e-05, |
|
"loss": 0.7641, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.08990140161496793, |
|
"grad_norm": 0.7386724948883057, |
|
"learning_rate": 4.550641219832375e-05, |
|
"loss": 0.7523, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.09022713133096419, |
|
"grad_norm": 0.9216130971908569, |
|
"learning_rate": 4.549012518200741e-05, |
|
"loss": 0.7562, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.09055286104696045, |
|
"grad_norm": 0.8789349794387817, |
|
"learning_rate": 4.547383816569107e-05, |
|
"loss": 0.7229, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.0908785907629567, |
|
"grad_norm": 0.582091748714447, |
|
"learning_rate": 4.5457551149374744e-05, |
|
"loss": 0.7274, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.09120432047895298, |
|
"grad_norm": 0.6011328101158142, |
|
"learning_rate": 4.544126413305841e-05, |
|
"loss": 0.7297, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.09153005019494924, |
|
"grad_norm": 0.6041598916053772, |
|
"learning_rate": 4.542497711674208e-05, |
|
"loss": 0.7409, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.0918557799109455, |
|
"grad_norm": 0.7190874814987183, |
|
"learning_rate": 4.540869010042574e-05, |
|
"loss": 0.7149, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.09218150962694176, |
|
"grad_norm": 0.5705780982971191, |
|
"learning_rate": 4.5392403084109414e-05, |
|
"loss": 0.76, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.09250723934293802, |
|
"grad_norm": 0.7988401651382446, |
|
"learning_rate": 4.537611606779308e-05, |
|
"loss": 0.7594, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.09283296905893428, |
|
"grad_norm": 0.48971208930015564, |
|
"learning_rate": 4.5359829051476745e-05, |
|
"loss": 0.7505, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.09315869877493053, |
|
"grad_norm": 0.6600379347801208, |
|
"learning_rate": 4.534354203516041e-05, |
|
"loss": 0.7902, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.0934844284909268, |
|
"grad_norm": 0.6095920205116272, |
|
"learning_rate": 4.5327255018844077e-05, |
|
"loss": 0.7166, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.09381015820692305, |
|
"grad_norm": 0.6808424592018127, |
|
"learning_rate": 4.531096800252775e-05, |
|
"loss": 0.7148, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.09413588792291933, |
|
"grad_norm": 0.9923068284988403, |
|
"learning_rate": 4.5294680986211415e-05, |
|
"loss": 0.7226, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.09446161763891558, |
|
"grad_norm": 0.8952274918556213, |
|
"learning_rate": 4.527839396989508e-05, |
|
"loss": 0.7645, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.09478734735491184, |
|
"grad_norm": 0.7416999936103821, |
|
"learning_rate": 4.5262106953578746e-05, |
|
"loss": 0.7503, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.0951130770709081, |
|
"grad_norm": 0.7862002849578857, |
|
"learning_rate": 4.524581993726242e-05, |
|
"loss": 0.7469, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.09543880678690436, |
|
"grad_norm": 0.6296769380569458, |
|
"learning_rate": 4.5229532920946085e-05, |
|
"loss": 0.6873, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.09576453650290062, |
|
"grad_norm": 0.9056894779205322, |
|
"learning_rate": 4.521324590462975e-05, |
|
"loss": 0.7126, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.09609026621889688, |
|
"grad_norm": 0.624724268913269, |
|
"learning_rate": 4.5196958888313416e-05, |
|
"loss": 0.7668, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.09641599593489314, |
|
"grad_norm": 0.680957555770874, |
|
"learning_rate": 4.518067187199708e-05, |
|
"loss": 0.7783, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.0967417256508894, |
|
"grad_norm": 0.5778472423553467, |
|
"learning_rate": 4.5164384855680754e-05, |
|
"loss": 0.7355, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.09706745536688567, |
|
"grad_norm": 0.6346442699432373, |
|
"learning_rate": 4.514809783936442e-05, |
|
"loss": 0.7276, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.09739318508288193, |
|
"grad_norm": 0.9289300441741943, |
|
"learning_rate": 4.5131810823048086e-05, |
|
"loss": 0.7179, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.09771891479887819, |
|
"grad_norm": 0.7473464012145996, |
|
"learning_rate": 4.511552380673175e-05, |
|
"loss": 0.7172, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.09804464451487445, |
|
"grad_norm": 0.6801792979240417, |
|
"learning_rate": 4.509923679041542e-05, |
|
"loss": 0.7074, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.09837037423087071, |
|
"grad_norm": 0.6129624247550964, |
|
"learning_rate": 4.508294977409909e-05, |
|
"loss": 0.7166, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.09869610394686697, |
|
"grad_norm": 0.8195613026618958, |
|
"learning_rate": 4.506666275778275e-05, |
|
"loss": 0.7709, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.09902183366286323, |
|
"grad_norm": 0.4703550934791565, |
|
"learning_rate": 4.505037574146642e-05, |
|
"loss": 0.7037, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.09934756337885949, |
|
"grad_norm": 0.7674877047538757, |
|
"learning_rate": 4.503408872515009e-05, |
|
"loss": 0.7202, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.09967329309485574, |
|
"grad_norm": 0.8670388460159302, |
|
"learning_rate": 4.501780170883376e-05, |
|
"loss": 0.7183, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.09999902281085202, |
|
"grad_norm": 0.280652791261673, |
|
"learning_rate": 4.500151469251742e-05, |
|
"loss": 0.6998, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.10032475252684828, |
|
"grad_norm": 0.7346746325492859, |
|
"learning_rate": 4.4985227676201084e-05, |
|
"loss": 0.7358, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.10065048224284454, |
|
"grad_norm": 0.978670060634613, |
|
"learning_rate": 4.4968940659884756e-05, |
|
"loss": 0.7259, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.1009762119588408, |
|
"grad_norm": 0.5910704135894775, |
|
"learning_rate": 4.495265364356842e-05, |
|
"loss": 0.7074, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.10130194167483705, |
|
"grad_norm": 0.7966532707214355, |
|
"learning_rate": 4.493636662725209e-05, |
|
"loss": 0.7117, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.10162767139083331, |
|
"grad_norm": 0.9344640374183655, |
|
"learning_rate": 4.4920079610935754e-05, |
|
"loss": 0.7349, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.10195340110682957, |
|
"grad_norm": 0.8043787479400635, |
|
"learning_rate": 4.4903792594619426e-05, |
|
"loss": 0.7361, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.10227913082282583, |
|
"grad_norm": 0.6786687970161438, |
|
"learning_rate": 4.488750557830309e-05, |
|
"loss": 0.6969, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.10260486053882209, |
|
"grad_norm": 0.4679253399372101, |
|
"learning_rate": 4.487121856198676e-05, |
|
"loss": 0.7157, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.10293059025481836, |
|
"grad_norm": 0.5903817415237427, |
|
"learning_rate": 4.485493154567042e-05, |
|
"loss": 0.7352, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.10325631997081462, |
|
"grad_norm": 0.715834379196167, |
|
"learning_rate": 4.483864452935409e-05, |
|
"loss": 0.7532, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.10358204968681088, |
|
"grad_norm": 0.6664106249809265, |
|
"learning_rate": 4.482235751303776e-05, |
|
"loss": 0.6853, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.10390777940280714, |
|
"grad_norm": 0.700243353843689, |
|
"learning_rate": 4.480607049672143e-05, |
|
"loss": 0.6835, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.1042335091188034, |
|
"grad_norm": 0.7481942772865295, |
|
"learning_rate": 4.478978348040509e-05, |
|
"loss": 0.7343, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.10455923883479966, |
|
"grad_norm": 0.5347774028778076, |
|
"learning_rate": 4.477349646408876e-05, |
|
"loss": 0.6688, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.10488496855079592, |
|
"grad_norm": 0.541346549987793, |
|
"learning_rate": 4.4757209447772425e-05, |
|
"loss": 0.7088, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.10521069826679218, |
|
"grad_norm": 0.6126936674118042, |
|
"learning_rate": 4.47409224314561e-05, |
|
"loss": 0.7333, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.10553642798278844, |
|
"grad_norm": 0.952684760093689, |
|
"learning_rate": 4.472463541513976e-05, |
|
"loss": 0.7242, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.10586215769878471, |
|
"grad_norm": 0.72658771276474, |
|
"learning_rate": 4.470834839882343e-05, |
|
"loss": 0.7422, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.10618788741478097, |
|
"grad_norm": 0.5741873383522034, |
|
"learning_rate": 4.4692061382507094e-05, |
|
"loss": 0.7307, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.10651361713077723, |
|
"grad_norm": 0.646496057510376, |
|
"learning_rate": 4.467577436619077e-05, |
|
"loss": 0.7138, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.10683934684677349, |
|
"grad_norm": 0.40007448196411133, |
|
"learning_rate": 4.465948734987443e-05, |
|
"loss": 0.7045, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.10716507656276975, |
|
"grad_norm": 0.6594932675361633, |
|
"learning_rate": 4.464320033355809e-05, |
|
"loss": 0.6874, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.107490806278766, |
|
"grad_norm": 0.7663995623588562, |
|
"learning_rate": 4.4626913317241764e-05, |
|
"loss": 0.7303, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.10781653599476226, |
|
"grad_norm": 0.5867152810096741, |
|
"learning_rate": 4.461062630092543e-05, |
|
"loss": 0.7072, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.10814226571075852, |
|
"grad_norm": 0.5017038583755493, |
|
"learning_rate": 4.45943392846091e-05, |
|
"loss": 0.6879, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.10846799542675478, |
|
"grad_norm": 0.6196131110191345, |
|
"learning_rate": 4.457805226829276e-05, |
|
"loss": 0.7094, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.10879372514275105, |
|
"grad_norm": 0.643118679523468, |
|
"learning_rate": 4.4561765251976434e-05, |
|
"loss": 0.6763, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.10911945485874731, |
|
"grad_norm": 0.516583263874054, |
|
"learning_rate": 4.45454782356601e-05, |
|
"loss": 0.6744, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.10944518457474357, |
|
"grad_norm": 0.6565887928009033, |
|
"learning_rate": 4.4529191219343765e-05, |
|
"loss": 0.6818, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.10977091429073983, |
|
"grad_norm": 0.644209623336792, |
|
"learning_rate": 4.451290420302743e-05, |
|
"loss": 0.6795, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.11009664400673609, |
|
"grad_norm": 0.5720322132110596, |
|
"learning_rate": 4.4496617186711096e-05, |
|
"loss": 0.6444, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.11042237372273235, |
|
"grad_norm": 0.7580476999282837, |
|
"learning_rate": 4.448033017039477e-05, |
|
"loss": 0.7067, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.11074810343872861, |
|
"grad_norm": 0.3334468603134155, |
|
"learning_rate": 4.4464043154078435e-05, |
|
"loss": 0.7245, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.11107383315472487, |
|
"grad_norm": 0.7232679724693298, |
|
"learning_rate": 4.44477561377621e-05, |
|
"loss": 0.6476, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.11139956287072113, |
|
"grad_norm": 0.49447712302207947, |
|
"learning_rate": 4.4431469121445766e-05, |
|
"loss": 0.6813, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.11172529258671739, |
|
"grad_norm": 0.9112755656242371, |
|
"learning_rate": 4.441518210512943e-05, |
|
"loss": 0.7039, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.11205102230271366, |
|
"grad_norm": 0.9391865134239197, |
|
"learning_rate": 4.4398895088813104e-05, |
|
"loss": 0.7154, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.11237675201870992, |
|
"grad_norm": 0.6869890689849854, |
|
"learning_rate": 4.438260807249677e-05, |
|
"loss": 0.7462, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.11270248173470618, |
|
"grad_norm": 0.6954273581504822, |
|
"learning_rate": 4.4366321056180436e-05, |
|
"loss": 0.7151, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.11302821145070244, |
|
"grad_norm": 0.8512132167816162, |
|
"learning_rate": 4.43500340398641e-05, |
|
"loss": 0.7157, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.1133539411666987, |
|
"grad_norm": 0.7044045329093933, |
|
"learning_rate": 4.4333747023547774e-05, |
|
"loss": 0.6649, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.11367967088269496, |
|
"grad_norm": 0.6773298978805542, |
|
"learning_rate": 4.431746000723144e-05, |
|
"loss": 0.6137, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.11400540059869121, |
|
"grad_norm": 0.544491171836853, |
|
"learning_rate": 4.43011729909151e-05, |
|
"loss": 0.6577, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.11433113031468747, |
|
"grad_norm": 0.543596625328064, |
|
"learning_rate": 4.428488597459877e-05, |
|
"loss": 0.6699, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.11465686003068373, |
|
"grad_norm": 0.7878594398498535, |
|
"learning_rate": 4.426859895828244e-05, |
|
"loss": 0.709, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.11498258974668, |
|
"grad_norm": 0.8226998448371887, |
|
"learning_rate": 4.425231194196611e-05, |
|
"loss": 0.6954, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.11530831946267626, |
|
"grad_norm": 0.48608875274658203, |
|
"learning_rate": 4.423602492564977e-05, |
|
"loss": 0.7502, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.11563404917867252, |
|
"grad_norm": 0.6490182280540466, |
|
"learning_rate": 4.421973790933344e-05, |
|
"loss": 0.7085, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.11595977889466878, |
|
"grad_norm": 0.3032003343105316, |
|
"learning_rate": 4.420345089301711e-05, |
|
"loss": 0.6778, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.11628550861066504, |
|
"grad_norm": 0.7003344297409058, |
|
"learning_rate": 4.418716387670077e-05, |
|
"loss": 0.71, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.1166112383266613, |
|
"grad_norm": 0.6569785475730896, |
|
"learning_rate": 4.417087686038444e-05, |
|
"loss": 0.653, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.11693696804265756, |
|
"grad_norm": 0.5428867936134338, |
|
"learning_rate": 4.4154589844068104e-05, |
|
"loss": 0.6733, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.11726269775865382, |
|
"grad_norm": 0.6179760098457336, |
|
"learning_rate": 4.4138302827751776e-05, |
|
"loss": 0.7081, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.11758842747465008, |
|
"grad_norm": 0.7397803068161011, |
|
"learning_rate": 4.412201581143544e-05, |
|
"loss": 0.6894, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.11791415719064635, |
|
"grad_norm": 0.725395679473877, |
|
"learning_rate": 4.410572879511911e-05, |
|
"loss": 0.6874, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.11823988690664261, |
|
"grad_norm": 0.45658519864082336, |
|
"learning_rate": 4.4089441778802774e-05, |
|
"loss": 0.6821, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.11856561662263887, |
|
"grad_norm": 0.9002487063407898, |
|
"learning_rate": 4.407315476248644e-05, |
|
"loss": 0.641, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.11889134633863513, |
|
"grad_norm": 0.8738647103309631, |
|
"learning_rate": 4.405686774617011e-05, |
|
"loss": 0.6763, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.11921707605463139, |
|
"grad_norm": 1.0051002502441406, |
|
"learning_rate": 4.404058072985378e-05, |
|
"loss": 0.6775, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.11954280577062765, |
|
"grad_norm": 0.8074469566345215, |
|
"learning_rate": 4.402429371353744e-05, |
|
"loss": 0.7408, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.1198685354866239, |
|
"grad_norm": 0.485388845205307, |
|
"learning_rate": 4.400800669722111e-05, |
|
"loss": 0.6729, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.12019426520262017, |
|
"grad_norm": 0.7123886942863464, |
|
"learning_rate": 4.399171968090478e-05, |
|
"loss": 0.661, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.12051999491861642, |
|
"grad_norm": 0.4587586522102356, |
|
"learning_rate": 4.397543266458845e-05, |
|
"loss": 0.6662, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.1208457246346127, |
|
"grad_norm": 0.7726449966430664, |
|
"learning_rate": 4.395914564827211e-05, |
|
"loss": 0.7469, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.12117145435060896, |
|
"grad_norm": 0.8636273741722107, |
|
"learning_rate": 4.394285863195578e-05, |
|
"loss": 0.6669, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.12149718406660522, |
|
"grad_norm": 0.6817033886909485, |
|
"learning_rate": 4.3926571615639444e-05, |
|
"loss": 0.6874, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.12182291378260147, |
|
"grad_norm": 0.5549355149269104, |
|
"learning_rate": 4.391028459932312e-05, |
|
"loss": 0.6939, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.12214864349859773, |
|
"grad_norm": 0.6180316805839539, |
|
"learning_rate": 4.389399758300678e-05, |
|
"loss": 0.6299, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.12247437321459399, |
|
"grad_norm": 0.7779985070228577, |
|
"learning_rate": 4.387771056669045e-05, |
|
"loss": 0.7181, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.12280010293059025, |
|
"grad_norm": 0.7182669043540955, |
|
"learning_rate": 4.3861423550374114e-05, |
|
"loss": 0.6703, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.12312583264658651, |
|
"grad_norm": 0.7191387414932251, |
|
"learning_rate": 4.3845136534057787e-05, |
|
"loss": 0.6802, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.12345156236258277, |
|
"grad_norm": 0.6137369275093079, |
|
"learning_rate": 4.382884951774145e-05, |
|
"loss": 0.7028, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.12377729207857904, |
|
"grad_norm": 0.7508791089057922, |
|
"learning_rate": 4.381256250142511e-05, |
|
"loss": 0.642, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.1241030217945753, |
|
"grad_norm": 0.6414891481399536, |
|
"learning_rate": 4.3796275485108784e-05, |
|
"loss": 0.6255, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.12442875151057156, |
|
"grad_norm": 0.6669697165489197, |
|
"learning_rate": 4.377998846879245e-05, |
|
"loss": 0.6691, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.12475448122656782, |
|
"grad_norm": 0.8991898894309998, |
|
"learning_rate": 4.376370145247612e-05, |
|
"loss": 0.6727, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.12508021094256408, |
|
"grad_norm": 0.4924679398536682, |
|
"learning_rate": 4.374741443615978e-05, |
|
"loss": 0.6661, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.12540594065856034, |
|
"grad_norm": 0.3712103068828583, |
|
"learning_rate": 4.3731127419843453e-05, |
|
"loss": 0.7306, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.1257316703745566, |
|
"grad_norm": 0.9136518836021423, |
|
"learning_rate": 4.371484040352712e-05, |
|
"loss": 0.6453, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.12605740009055286, |
|
"grad_norm": 0.6828204393386841, |
|
"learning_rate": 4.3698553387210785e-05, |
|
"loss": 0.6587, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.12638312980654912, |
|
"grad_norm": 0.6366333961486816, |
|
"learning_rate": 4.368226637089445e-05, |
|
"loss": 0.6606, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.12670885952254538, |
|
"grad_norm": 0.39375558495521545, |
|
"learning_rate": 4.3665979354578116e-05, |
|
"loss": 0.6937, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.12703458923854163, |
|
"grad_norm": 0.46293410658836365, |
|
"learning_rate": 4.364969233826179e-05, |
|
"loss": 0.6504, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.1273603189545379, |
|
"grad_norm": 0.9897958040237427, |
|
"learning_rate": 4.3633405321945455e-05, |
|
"loss": 0.7126, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.12768604867053415, |
|
"grad_norm": 0.5616987347602844, |
|
"learning_rate": 4.361711830562912e-05, |
|
"loss": 0.5956, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.1280117783865304, |
|
"grad_norm": 0.4081191122531891, |
|
"learning_rate": 4.3600831289312786e-05, |
|
"loss": 0.6648, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.1283375081025267, |
|
"grad_norm": 0.485188364982605, |
|
"learning_rate": 4.358454427299645e-05, |
|
"loss": 0.6694, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.12866323781852296, |
|
"grad_norm": 0.7212422490119934, |
|
"learning_rate": 4.3568257256680124e-05, |
|
"loss": 0.6767, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.12898896753451922, |
|
"grad_norm": 0.5502139925956726, |
|
"learning_rate": 4.355197024036379e-05, |
|
"loss": 0.6721, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.12931469725051548, |
|
"grad_norm": 0.49975594878196716, |
|
"learning_rate": 4.3535683224047456e-05, |
|
"loss": 0.6669, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.12964042696651173, |
|
"grad_norm": 0.4203544557094574, |
|
"learning_rate": 4.351939620773112e-05, |
|
"loss": 0.6716, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.129966156682508, |
|
"grad_norm": 0.5464275479316711, |
|
"learning_rate": 4.3503109191414794e-05, |
|
"loss": 0.6544, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.13029188639850425, |
|
"grad_norm": 0.6473097801208496, |
|
"learning_rate": 4.348682217509846e-05, |
|
"loss": 0.6977, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.1306176161145005, |
|
"grad_norm": 0.39890334010124207, |
|
"learning_rate": 4.347053515878212e-05, |
|
"loss": 0.6704, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.13094334583049677, |
|
"grad_norm": 1.0785876512527466, |
|
"learning_rate": 4.345424814246579e-05, |
|
"loss": 0.6196, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.13126907554649303, |
|
"grad_norm": 0.6607077121734619, |
|
"learning_rate": 4.343796112614946e-05, |
|
"loss": 0.6608, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.1315948052624893, |
|
"grad_norm": 0.5987501740455627, |
|
"learning_rate": 4.342167410983313e-05, |
|
"loss": 0.6334, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.13192053497848555, |
|
"grad_norm": 0.3443163335323334, |
|
"learning_rate": 4.340538709351679e-05, |
|
"loss": 0.6621, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.1322462646944818, |
|
"grad_norm": 0.9362694025039673, |
|
"learning_rate": 4.338910007720046e-05, |
|
"loss": 0.6404, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.13257199441047807, |
|
"grad_norm": 0.5049243569374084, |
|
"learning_rate": 4.3372813060884127e-05, |
|
"loss": 0.6426, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.13289772412647433, |
|
"grad_norm": 0.787389874458313, |
|
"learning_rate": 4.335652604456779e-05, |
|
"loss": 0.6432, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.13322345384247058, |
|
"grad_norm": 0.8065658211708069, |
|
"learning_rate": 4.334023902825146e-05, |
|
"loss": 0.6477, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.13354918355846684, |
|
"grad_norm": 0.5166397094726562, |
|
"learning_rate": 4.3323952011935124e-05, |
|
"loss": 0.6384, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.1338749132744631, |
|
"grad_norm": 0.9597229957580566, |
|
"learning_rate": 4.3307664995618796e-05, |
|
"loss": 0.6832, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.1342006429904594, |
|
"grad_norm": 0.5936517715454102, |
|
"learning_rate": 4.329137797930246e-05, |
|
"loss": 0.6767, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.13452637270645565, |
|
"grad_norm": 0.8391766548156738, |
|
"learning_rate": 4.3275090962986135e-05, |
|
"loss": 0.6215, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.1348521024224519, |
|
"grad_norm": 0.977497398853302, |
|
"learning_rate": 4.3258803946669793e-05, |
|
"loss": 0.6307, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.13517783213844817, |
|
"grad_norm": 0.6750873923301697, |
|
"learning_rate": 4.324251693035346e-05, |
|
"loss": 0.631, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.13550356185444443, |
|
"grad_norm": 0.4655423164367676, |
|
"learning_rate": 4.322622991403713e-05, |
|
"loss": 0.7025, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.13582929157044069, |
|
"grad_norm": 0.43544334173202515, |
|
"learning_rate": 4.32099428977208e-05, |
|
"loss": 0.6555, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.13615502128643694, |
|
"grad_norm": 0.7595189213752747, |
|
"learning_rate": 4.319365588140446e-05, |
|
"loss": 0.6197, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.1364807510024332, |
|
"grad_norm": 0.4422534108161926, |
|
"learning_rate": 4.317736886508813e-05, |
|
"loss": 0.5798, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.13680648071842946, |
|
"grad_norm": 0.4622032344341278, |
|
"learning_rate": 4.31610818487718e-05, |
|
"loss": 0.6493, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.13713221043442572, |
|
"grad_norm": 0.7267939448356628, |
|
"learning_rate": 4.314479483245547e-05, |
|
"loss": 0.6228, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.13745794015042198, |
|
"grad_norm": 0.66838139295578, |
|
"learning_rate": 4.312850781613913e-05, |
|
"loss": 0.6507, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.13778366986641824, |
|
"grad_norm": 0.40865644812583923, |
|
"learning_rate": 4.31122207998228e-05, |
|
"loss": 0.6388, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.1381093995824145, |
|
"grad_norm": 0.7203364968299866, |
|
"learning_rate": 4.3095933783506464e-05, |
|
"loss": 0.589, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.13843512929841076, |
|
"grad_norm": 0.7719990015029907, |
|
"learning_rate": 4.307964676719014e-05, |
|
"loss": 0.6446, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.13876085901440702, |
|
"grad_norm": 0.35780540108680725, |
|
"learning_rate": 4.30633597508738e-05, |
|
"loss": 0.683, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.13908658873040328, |
|
"grad_norm": 0.5952534675598145, |
|
"learning_rate": 4.304707273455747e-05, |
|
"loss": 0.6697, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.13941231844639954, |
|
"grad_norm": 0.539117157459259, |
|
"learning_rate": 4.3030785718241134e-05, |
|
"loss": 0.6582, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.1397380481623958, |
|
"grad_norm": 0.8181525468826294, |
|
"learning_rate": 4.30144987019248e-05, |
|
"loss": 0.6695, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.14006377787839205, |
|
"grad_norm": 0.8720047473907471, |
|
"learning_rate": 4.299821168560847e-05, |
|
"loss": 0.5931, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.14038950759438834, |
|
"grad_norm": 0.9138098955154419, |
|
"learning_rate": 4.298192466929213e-05, |
|
"loss": 0.6874, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.1407152373103846, |
|
"grad_norm": 0.8015493750572205, |
|
"learning_rate": 4.2965637652975804e-05, |
|
"loss": 0.6574, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.14104096702638086, |
|
"grad_norm": 0.8426867723464966, |
|
"learning_rate": 4.294935063665947e-05, |
|
"loss": 0.6662, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.14136669674237712, |
|
"grad_norm": 0.3480939567089081, |
|
"learning_rate": 4.293306362034314e-05, |
|
"loss": 0.6351, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.14169242645837338, |
|
"grad_norm": 0.5666735172271729, |
|
"learning_rate": 4.29167766040268e-05, |
|
"loss": 0.641, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.14201815617436964, |
|
"grad_norm": 0.9445961117744446, |
|
"learning_rate": 4.2900489587710467e-05, |
|
"loss": 0.6608, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.1423438858903659, |
|
"grad_norm": 0.7916907072067261, |
|
"learning_rate": 4.288420257139414e-05, |
|
"loss": 0.6615, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.14266961560636215, |
|
"grad_norm": 0.9159532785415649, |
|
"learning_rate": 4.2867915555077805e-05, |
|
"loss": 0.5919, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.1429953453223584, |
|
"grad_norm": 0.5766249895095825, |
|
"learning_rate": 4.285162853876147e-05, |
|
"loss": 0.6724, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.14332107503835467, |
|
"grad_norm": 0.753519594669342, |
|
"learning_rate": 4.2835341522445136e-05, |
|
"loss": 0.6995, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.14364680475435093, |
|
"grad_norm": 1.1004271507263184, |
|
"learning_rate": 4.281905450612881e-05, |
|
"loss": 0.6636, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.1439725344703472, |
|
"grad_norm": 0.7064334154129028, |
|
"learning_rate": 4.2802767489812475e-05, |
|
"loss": 0.6793, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.14429826418634345, |
|
"grad_norm": 0.5158839225769043, |
|
"learning_rate": 4.278648047349614e-05, |
|
"loss": 0.6336, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.1446239939023397, |
|
"grad_norm": 1.0451433658599854, |
|
"learning_rate": 4.2770193457179806e-05, |
|
"loss": 0.6227, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.14494972361833597, |
|
"grad_norm": 0.5956864356994629, |
|
"learning_rate": 4.275390644086347e-05, |
|
"loss": 0.6517, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.14527545333433223, |
|
"grad_norm": 0.9525729417800903, |
|
"learning_rate": 4.2737619424547144e-05, |
|
"loss": 0.6245, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.1456011830503285, |
|
"grad_norm": 0.7456961274147034, |
|
"learning_rate": 4.272133240823081e-05, |
|
"loss": 0.6577, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.14592691276632475, |
|
"grad_norm": 0.5686585307121277, |
|
"learning_rate": 4.2705045391914476e-05, |
|
"loss": 0.6675, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.14625264248232103, |
|
"grad_norm": 0.5127500295639038, |
|
"learning_rate": 4.268875837559814e-05, |
|
"loss": 0.5966, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.1465783721983173, |
|
"grad_norm": 0.6099263429641724, |
|
"learning_rate": 4.267247135928181e-05, |
|
"loss": 0.6259, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.14690410191431355, |
|
"grad_norm": 0.5734119415283203, |
|
"learning_rate": 4.265618434296548e-05, |
|
"loss": 0.6251, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.1472298316303098, |
|
"grad_norm": 0.40758875012397766, |
|
"learning_rate": 4.263989732664914e-05, |
|
"loss": 0.5856, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.14755556134630607, |
|
"grad_norm": 0.5974459052085876, |
|
"learning_rate": 4.262361031033281e-05, |
|
"loss": 0.6443, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.14788129106230233, |
|
"grad_norm": 0.48085859417915344, |
|
"learning_rate": 4.260732329401648e-05, |
|
"loss": 0.6612, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.1482070207782986, |
|
"grad_norm": 0.5771530270576477, |
|
"learning_rate": 4.259103627770015e-05, |
|
"loss": 0.6272, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.14853275049429485, |
|
"grad_norm": 0.8463455438613892, |
|
"learning_rate": 4.2574749261383815e-05, |
|
"loss": 0.6008, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.1488584802102911, |
|
"grad_norm": 0.7014292478561401, |
|
"learning_rate": 4.255846224506748e-05, |
|
"loss": 0.5353, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.14918420992628736, |
|
"grad_norm": 0.6181588768959045, |
|
"learning_rate": 4.2542175228751146e-05, |
|
"loss": 0.6139, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.14950993964228362, |
|
"grad_norm": 0.6540141701698303, |
|
"learning_rate": 4.252588821243481e-05, |
|
"loss": 0.5997, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.14983566935827988, |
|
"grad_norm": 0.47981733083724976, |
|
"learning_rate": 4.2509601196118485e-05, |
|
"loss": 0.6511, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.15016139907427614, |
|
"grad_norm": 0.964857816696167, |
|
"learning_rate": 4.2493314179802144e-05, |
|
"loss": 0.6365, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.1504871287902724, |
|
"grad_norm": 0.6706714034080505, |
|
"learning_rate": 4.2477027163485816e-05, |
|
"loss": 0.664, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.15081285850626866, |
|
"grad_norm": 0.5073367953300476, |
|
"learning_rate": 4.246074014716948e-05, |
|
"loss": 0.5633, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.15113858822226492, |
|
"grad_norm": 0.37114378809928894, |
|
"learning_rate": 4.2444453130853154e-05, |
|
"loss": 0.6498, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.15146431793826118, |
|
"grad_norm": 1.153325080871582, |
|
"learning_rate": 4.242816611453681e-05, |
|
"loss": 0.6254, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.15179004765425744, |
|
"grad_norm": 0.7353873252868652, |
|
"learning_rate": 4.241187909822048e-05, |
|
"loss": 0.6573, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.15211577737025372, |
|
"grad_norm": 0.5379579067230225, |
|
"learning_rate": 4.239559208190415e-05, |
|
"loss": 0.6642, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.15244150708624998, |
|
"grad_norm": 0.341907799243927, |
|
"learning_rate": 4.237930506558782e-05, |
|
"loss": 0.6294, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.15276723680224624, |
|
"grad_norm": 0.3866462707519531, |
|
"learning_rate": 4.236301804927148e-05, |
|
"loss": 0.6212, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.1530929665182425, |
|
"grad_norm": 0.6686252951622009, |
|
"learning_rate": 4.234673103295515e-05, |
|
"loss": 0.64, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.15341869623423876, |
|
"grad_norm": 0.6398385167121887, |
|
"learning_rate": 4.233044401663882e-05, |
|
"loss": 0.6156, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.15374442595023502, |
|
"grad_norm": 0.8679475784301758, |
|
"learning_rate": 4.231415700032249e-05, |
|
"loss": 0.6492, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.15407015566623128, |
|
"grad_norm": 0.6425623297691345, |
|
"learning_rate": 4.229786998400615e-05, |
|
"loss": 0.6661, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.15439588538222754, |
|
"grad_norm": 0.7811526656150818, |
|
"learning_rate": 4.228158296768982e-05, |
|
"loss": 0.6416, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.1547216150982238, |
|
"grad_norm": 0.6820793747901917, |
|
"learning_rate": 4.2265295951373484e-05, |
|
"loss": 0.6426, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.15504734481422006, |
|
"grad_norm": 0.8748511672019958, |
|
"learning_rate": 4.224900893505716e-05, |
|
"loss": 0.6038, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.15537307453021632, |
|
"grad_norm": 0.6828723549842834, |
|
"learning_rate": 4.223272191874082e-05, |
|
"loss": 0.6408, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.15569880424621257, |
|
"grad_norm": 1.01051926612854, |
|
"learning_rate": 4.221643490242449e-05, |
|
"loss": 0.6218, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.15602453396220883, |
|
"grad_norm": 0.6920143961906433, |
|
"learning_rate": 4.2200147886108154e-05, |
|
"loss": 0.63, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.1563502636782051, |
|
"grad_norm": 0.6410394310951233, |
|
"learning_rate": 4.218386086979182e-05, |
|
"loss": 0.6176, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.15667599339420135, |
|
"grad_norm": 0.5157743692398071, |
|
"learning_rate": 4.216757385347549e-05, |
|
"loss": 0.5947, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.1570017231101976, |
|
"grad_norm": 0.6770983934402466, |
|
"learning_rate": 4.215128683715915e-05, |
|
"loss": 0.6192, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.15732745282619387, |
|
"grad_norm": 0.49714550375938416, |
|
"learning_rate": 4.2134999820842824e-05, |
|
"loss": 0.6121, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.15765318254219013, |
|
"grad_norm": 0.3486001789569855, |
|
"learning_rate": 4.211871280452649e-05, |
|
"loss": 0.5821, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.15797891225818642, |
|
"grad_norm": 0.4202999770641327, |
|
"learning_rate": 4.210242578821016e-05, |
|
"loss": 0.5909, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.15830464197418267, |
|
"grad_norm": 0.44769522547721863, |
|
"learning_rate": 4.208613877189382e-05, |
|
"loss": 0.6369, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.15863037169017893, |
|
"grad_norm": 0.6501901745796204, |
|
"learning_rate": 4.2069851755577486e-05, |
|
"loss": 0.6187, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.1589561014061752, |
|
"grad_norm": 0.8261470794677734, |
|
"learning_rate": 4.205356473926116e-05, |
|
"loss": 0.6136, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.15928183112217145, |
|
"grad_norm": 0.9979439973831177, |
|
"learning_rate": 4.2037277722944825e-05, |
|
"loss": 0.623, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.1596075608381677, |
|
"grad_norm": 0.5651659369468689, |
|
"learning_rate": 4.202099070662849e-05, |
|
"loss": 0.6742, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.15993329055416397, |
|
"grad_norm": 0.7412470579147339, |
|
"learning_rate": 4.2004703690312156e-05, |
|
"loss": 0.6272, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.16025902027016023, |
|
"grad_norm": 0.43271690607070923, |
|
"learning_rate": 4.198841667399583e-05, |
|
"loss": 0.5729, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.1605847499861565, |
|
"grad_norm": 0.5117851495742798, |
|
"learning_rate": 4.1972129657679494e-05, |
|
"loss": 0.6156, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.16091047970215275, |
|
"grad_norm": 0.7106539011001587, |
|
"learning_rate": 4.195584264136316e-05, |
|
"loss": 0.6052, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.161236209418149, |
|
"grad_norm": 0.6146919131278992, |
|
"learning_rate": 4.1939555625046826e-05, |
|
"loss": 0.5932, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.16156193913414527, |
|
"grad_norm": 0.49088531732559204, |
|
"learning_rate": 4.192326860873049e-05, |
|
"loss": 0.568, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.16188766885014153, |
|
"grad_norm": 0.9923317432403564, |
|
"learning_rate": 4.1906981592414164e-05, |
|
"loss": 0.596, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.16221339856613778, |
|
"grad_norm": 0.3995937705039978, |
|
"learning_rate": 4.189069457609783e-05, |
|
"loss": 0.6442, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.16253912828213404, |
|
"grad_norm": 0.5258984565734863, |
|
"learning_rate": 4.1874407559781496e-05, |
|
"loss": 0.5601, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.1628648579981303, |
|
"grad_norm": 0.19585928320884705, |
|
"learning_rate": 4.185812054346516e-05, |
|
"loss": 0.6509, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.16319058771412656, |
|
"grad_norm": 0.625548243522644, |
|
"learning_rate": 4.184183352714883e-05, |
|
"loss": 0.6411, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.16351631743012282, |
|
"grad_norm": 0.7014303207397461, |
|
"learning_rate": 4.18255465108325e-05, |
|
"loss": 0.6125, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.16384204714611908, |
|
"grad_norm": 0.5523779988288879, |
|
"learning_rate": 4.1809259494516165e-05, |
|
"loss": 0.5811, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.16416777686211537, |
|
"grad_norm": 0.5742841958999634, |
|
"learning_rate": 4.179297247819983e-05, |
|
"loss": 0.6282, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.16449350657811163, |
|
"grad_norm": 0.5776492357254028, |
|
"learning_rate": 4.17766854618835e-05, |
|
"loss": 0.6622, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.16481923629410788, |
|
"grad_norm": 0.7464694380760193, |
|
"learning_rate": 4.176039844556717e-05, |
|
"loss": 0.6309, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.16514496601010414, |
|
"grad_norm": 0.5271546244621277, |
|
"learning_rate": 4.1744111429250835e-05, |
|
"loss": 0.645, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.1654706957261004, |
|
"grad_norm": 0.6904231905937195, |
|
"learning_rate": 4.1727824412934494e-05, |
|
"loss": 0.5927, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.16579642544209666, |
|
"grad_norm": 0.578195333480835, |
|
"learning_rate": 4.1711537396618166e-05, |
|
"loss": 0.5812, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.16612215515809292, |
|
"grad_norm": 0.8716936707496643, |
|
"learning_rate": 4.169525038030183e-05, |
|
"loss": 0.6261, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.16644788487408918, |
|
"grad_norm": 0.6577697992324829, |
|
"learning_rate": 4.1678963363985505e-05, |
|
"loss": 0.6101, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.16677361459008544, |
|
"grad_norm": 0.7431929111480713, |
|
"learning_rate": 4.1662676347669164e-05, |
|
"loss": 0.6227, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.1670993443060817, |
|
"grad_norm": 0.9198315739631653, |
|
"learning_rate": 4.1646389331352836e-05, |
|
"loss": 0.6399, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.16742507402207796, |
|
"grad_norm": 0.5159572958946228, |
|
"learning_rate": 4.16301023150365e-05, |
|
"loss": 0.6329, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.16775080373807422, |
|
"grad_norm": 0.7744697332382202, |
|
"learning_rate": 4.161381529872017e-05, |
|
"loss": 0.5579, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.16807653345407048, |
|
"grad_norm": 0.4429173767566681, |
|
"learning_rate": 4.159752828240383e-05, |
|
"loss": 0.5786, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.16840226317006673, |
|
"grad_norm": 0.7796801924705505, |
|
"learning_rate": 4.15812412660875e-05, |
|
"loss": 0.6353, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.168727992886063, |
|
"grad_norm": 0.43117523193359375, |
|
"learning_rate": 4.156495424977117e-05, |
|
"loss": 0.5807, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.16905372260205925, |
|
"grad_norm": 0.44315412640571594, |
|
"learning_rate": 4.154866723345484e-05, |
|
"loss": 0.5979, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.1693794523180555, |
|
"grad_norm": 0.4306319057941437, |
|
"learning_rate": 4.15323802171385e-05, |
|
"loss": 0.6498, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.16970518203405177, |
|
"grad_norm": 0.283033549785614, |
|
"learning_rate": 4.151609320082217e-05, |
|
"loss": 0.6329, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.17003091175004806, |
|
"grad_norm": 0.4118421673774719, |
|
"learning_rate": 4.1499806184505834e-05, |
|
"loss": 0.5933, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.17035664146604432, |
|
"grad_norm": 0.9130700826644897, |
|
"learning_rate": 4.148351916818951e-05, |
|
"loss": 0.5349, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.17068237118204058, |
|
"grad_norm": 0.33348548412323, |
|
"learning_rate": 4.146723215187317e-05, |
|
"loss": 0.6182, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.17100810089803684, |
|
"grad_norm": 0.6642253398895264, |
|
"learning_rate": 4.145094513555684e-05, |
|
"loss": 0.5989, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.1713338306140331, |
|
"grad_norm": 0.7113855481147766, |
|
"learning_rate": 4.1434658119240504e-05, |
|
"loss": 0.6063, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.17165956033002935, |
|
"grad_norm": 1.0840643644332886, |
|
"learning_rate": 4.1418371102924177e-05, |
|
"loss": 0.615, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.1719852900460256, |
|
"grad_norm": 0.5277838706970215, |
|
"learning_rate": 4.140208408660784e-05, |
|
"loss": 0.6234, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.17231101976202187, |
|
"grad_norm": 0.5993104577064514, |
|
"learning_rate": 4.13857970702915e-05, |
|
"loss": 0.5905, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.17263674947801813, |
|
"grad_norm": 0.7363581657409668, |
|
"learning_rate": 4.1369510053975174e-05, |
|
"loss": 0.6032, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.1729624791940144, |
|
"grad_norm": 0.6299027800559998, |
|
"learning_rate": 4.135322303765884e-05, |
|
"loss": 0.5717, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.17328820891001065, |
|
"grad_norm": 0.49232372641563416, |
|
"learning_rate": 4.133693602134251e-05, |
|
"loss": 0.6031, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.1736139386260069, |
|
"grad_norm": 0.7371428608894348, |
|
"learning_rate": 4.132064900502617e-05, |
|
"loss": 0.5608, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.17393966834200317, |
|
"grad_norm": 1.0730559825897217, |
|
"learning_rate": 4.1304361988709843e-05, |
|
"loss": 0.6026, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.17426539805799943, |
|
"grad_norm": 0.674548327922821, |
|
"learning_rate": 4.128807497239351e-05, |
|
"loss": 0.5721, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.17459112777399569, |
|
"grad_norm": 0.5990965962409973, |
|
"learning_rate": 4.1271787956077175e-05, |
|
"loss": 0.6185, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.17491685748999194, |
|
"grad_norm": 0.61868816614151, |
|
"learning_rate": 4.125550093976084e-05, |
|
"loss": 0.6089, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.1752425872059882, |
|
"grad_norm": 0.4897661507129669, |
|
"learning_rate": 4.1239213923444506e-05, |
|
"loss": 0.6025, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.17556831692198446, |
|
"grad_norm": 0.2856525480747223, |
|
"learning_rate": 4.122292690712818e-05, |
|
"loss": 0.5609, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.17589404663798075, |
|
"grad_norm": 0.5488519668579102, |
|
"learning_rate": 4.1206639890811845e-05, |
|
"loss": 0.5781, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.176219776353977, |
|
"grad_norm": 0.7812597155570984, |
|
"learning_rate": 4.119035287449551e-05, |
|
"loss": 0.665, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.17654550606997327, |
|
"grad_norm": 0.5567785501480103, |
|
"learning_rate": 4.1174065858179176e-05, |
|
"loss": 0.6178, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.17687123578596953, |
|
"grad_norm": 0.7302952408790588, |
|
"learning_rate": 4.115777884186285e-05, |
|
"loss": 0.5912, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.1771969655019658, |
|
"grad_norm": 0.6872962713241577, |
|
"learning_rate": 4.1141491825546514e-05, |
|
"loss": 0.5698, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.17752269521796205, |
|
"grad_norm": 0.6139744520187378, |
|
"learning_rate": 4.112520480923018e-05, |
|
"loss": 0.6148, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.1778484249339583, |
|
"grad_norm": 0.6646268367767334, |
|
"learning_rate": 4.1108917792913846e-05, |
|
"loss": 0.5222, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.17817415464995456, |
|
"grad_norm": 0.4842844009399414, |
|
"learning_rate": 4.109263077659751e-05, |
|
"loss": 0.6225, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.17849988436595082, |
|
"grad_norm": 0.6158716082572937, |
|
"learning_rate": 4.1076343760281184e-05, |
|
"loss": 0.634, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.17882561408194708, |
|
"grad_norm": 0.5122677683830261, |
|
"learning_rate": 4.106005674396485e-05, |
|
"loss": 0.6355, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.17915134379794334, |
|
"grad_norm": 0.6086121201515198, |
|
"learning_rate": 4.1043769727648515e-05, |
|
"loss": 0.5787, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.1794770735139396, |
|
"grad_norm": 0.5853461623191833, |
|
"learning_rate": 4.102748271133218e-05, |
|
"loss": 0.5935, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.17980280322993586, |
|
"grad_norm": 0.9216148853302002, |
|
"learning_rate": 4.101119569501585e-05, |
|
"loss": 0.575, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.18012853294593212, |
|
"grad_norm": 0.6602348685264587, |
|
"learning_rate": 4.099490867869952e-05, |
|
"loss": 0.6324, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.18045426266192838, |
|
"grad_norm": 0.7494210004806519, |
|
"learning_rate": 4.0978621662383185e-05, |
|
"loss": 0.5859, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.18077999237792464, |
|
"grad_norm": 0.6391832232475281, |
|
"learning_rate": 4.096233464606685e-05, |
|
"loss": 0.6172, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.1811057220939209, |
|
"grad_norm": 0.5824201107025146, |
|
"learning_rate": 4.0946047629750517e-05, |
|
"loss": 0.6298, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.18143145180991715, |
|
"grad_norm": 0.6924212574958801, |
|
"learning_rate": 4.092976061343419e-05, |
|
"loss": 0.6105, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.1817571815259134, |
|
"grad_norm": 0.4423877000808716, |
|
"learning_rate": 4.0913473597117855e-05, |
|
"loss": 0.5613, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.1820829112419097, |
|
"grad_norm": 0.6090314984321594, |
|
"learning_rate": 4.0897186580801514e-05, |
|
"loss": 0.6643, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.18240864095790596, |
|
"grad_norm": 0.7554407119750977, |
|
"learning_rate": 4.0880899564485186e-05, |
|
"loss": 0.6017, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.18273437067390222, |
|
"grad_norm": 0.8148972988128662, |
|
"learning_rate": 4.086461254816885e-05, |
|
"loss": 0.6539, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.18306010038989848, |
|
"grad_norm": 0.5610066652297974, |
|
"learning_rate": 4.0848325531852525e-05, |
|
"loss": 0.5872, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.18338583010589474, |
|
"grad_norm": 0.6361645460128784, |
|
"learning_rate": 4.0832038515536183e-05, |
|
"loss": 0.5815, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.183711559821891, |
|
"grad_norm": 0.4567771553993225, |
|
"learning_rate": 4.0815751499219856e-05, |
|
"loss": 0.5799, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.18403728953788726, |
|
"grad_norm": 0.8705578446388245, |
|
"learning_rate": 4.079946448290352e-05, |
|
"loss": 0.6088, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.18436301925388351, |
|
"grad_norm": 0.8278294801712036, |
|
"learning_rate": 4.078317746658719e-05, |
|
"loss": 0.6064, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.18468874896987977, |
|
"grad_norm": 0.38864201307296753, |
|
"learning_rate": 4.076689045027085e-05, |
|
"loss": 0.5705, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.18501447868587603, |
|
"grad_norm": 0.6986147165298462, |
|
"learning_rate": 4.075060343395452e-05, |
|
"loss": 0.6071, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.1853402084018723, |
|
"grad_norm": 0.9127377867698669, |
|
"learning_rate": 4.073431641763819e-05, |
|
"loss": 0.608, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.18566593811786855, |
|
"grad_norm": 0.5072229504585266, |
|
"learning_rate": 4.071802940132186e-05, |
|
"loss": 0.583, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.1859916678338648, |
|
"grad_norm": 0.47545337677001953, |
|
"learning_rate": 4.070174238500552e-05, |
|
"loss": 0.5826, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.18631739754986107, |
|
"grad_norm": 0.5175743103027344, |
|
"learning_rate": 4.068545536868919e-05, |
|
"loss": 0.6184, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.18664312726585733, |
|
"grad_norm": 0.7252177596092224, |
|
"learning_rate": 4.0669168352372854e-05, |
|
"loss": 0.6042, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.1869688569818536, |
|
"grad_norm": 0.21297673881053925, |
|
"learning_rate": 4.065288133605653e-05, |
|
"loss": 0.5874, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.18729458669784985, |
|
"grad_norm": 0.6985592246055603, |
|
"learning_rate": 4.063659431974019e-05, |
|
"loss": 0.5641, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.1876203164138461, |
|
"grad_norm": 0.35783612728118896, |
|
"learning_rate": 4.062030730342386e-05, |
|
"loss": 0.5743, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.1879460461298424, |
|
"grad_norm": 0.40871796011924744, |
|
"learning_rate": 4.0604020287107524e-05, |
|
"loss": 0.6418, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.18827177584583865, |
|
"grad_norm": 0.6412025094032288, |
|
"learning_rate": 4.0587733270791197e-05, |
|
"loss": 0.6048, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.1885975055618349, |
|
"grad_norm": 0.6944416165351868, |
|
"learning_rate": 4.057144625447486e-05, |
|
"loss": 0.5647, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.18892323527783117, |
|
"grad_norm": 0.8592963218688965, |
|
"learning_rate": 4.055515923815852e-05, |
|
"loss": 0.5703, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.18924896499382743, |
|
"grad_norm": 0.7240419983863831, |
|
"learning_rate": 4.0538872221842194e-05, |
|
"loss": 0.6025, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.1895746947098237, |
|
"grad_norm": 0.3861270546913147, |
|
"learning_rate": 4.052258520552586e-05, |
|
"loss": 0.5864, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.18990042442581995, |
|
"grad_norm": 0.6718447208404541, |
|
"learning_rate": 4.050629818920953e-05, |
|
"loss": 0.6139, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.1902261541418162, |
|
"grad_norm": 0.7049744129180908, |
|
"learning_rate": 4.049001117289319e-05, |
|
"loss": 0.5697, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.19055188385781247, |
|
"grad_norm": 0.39576876163482666, |
|
"learning_rate": 4.047372415657686e-05, |
|
"loss": 0.5987, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.19087761357380872, |
|
"grad_norm": 0.7814981341362, |
|
"learning_rate": 4.045743714026053e-05, |
|
"loss": 0.5715, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.19120334328980498, |
|
"grad_norm": 1.0083011388778687, |
|
"learning_rate": 4.0441150123944195e-05, |
|
"loss": 0.6355, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.19152907300580124, |
|
"grad_norm": 0.7083866596221924, |
|
"learning_rate": 4.042486310762786e-05, |
|
"loss": 0.6666, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.1918548027217975, |
|
"grad_norm": 0.4740765690803528, |
|
"learning_rate": 4.0408576091311526e-05, |
|
"loss": 0.5773, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.19218053243779376, |
|
"grad_norm": 0.3599790632724762, |
|
"learning_rate": 4.03922890749952e-05, |
|
"loss": 0.5916, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.19250626215379002, |
|
"grad_norm": 0.6107310652732849, |
|
"learning_rate": 4.0376002058678865e-05, |
|
"loss": 0.63, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.19283199186978628, |
|
"grad_norm": 0.6388813257217407, |
|
"learning_rate": 4.035971504236253e-05, |
|
"loss": 0.6197, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.19315772158578254, |
|
"grad_norm": 0.4137844145298004, |
|
"learning_rate": 4.0343428026046196e-05, |
|
"loss": 0.6185, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.1934834513017788, |
|
"grad_norm": 0.6289616823196411, |
|
"learning_rate": 4.032714100972986e-05, |
|
"loss": 0.6367, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.19380918101777508, |
|
"grad_norm": 0.7528841495513916, |
|
"learning_rate": 4.0310853993413534e-05, |
|
"loss": 0.5783, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.19413491073377134, |
|
"grad_norm": 0.7345238924026489, |
|
"learning_rate": 4.02945669770972e-05, |
|
"loss": 0.6378, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.1944606404497676, |
|
"grad_norm": 0.7652753591537476, |
|
"learning_rate": 4.0278279960780866e-05, |
|
"loss": 0.5419, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.19478637016576386, |
|
"grad_norm": 0.3726235032081604, |
|
"learning_rate": 4.026199294446453e-05, |
|
"loss": 0.5933, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.19511209988176012, |
|
"grad_norm": 0.475990355014801, |
|
"learning_rate": 4.0245705928148204e-05, |
|
"loss": 0.5421, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.19543782959775638, |
|
"grad_norm": 0.8618846535682678, |
|
"learning_rate": 4.022941891183187e-05, |
|
"loss": 0.6149, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.19576355931375264, |
|
"grad_norm": 0.3643835484981537, |
|
"learning_rate": 4.0213131895515535e-05, |
|
"loss": 0.5898, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.1960892890297489, |
|
"grad_norm": 0.6492701172828674, |
|
"learning_rate": 4.01968448791992e-05, |
|
"loss": 0.6115, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.19641501874574516, |
|
"grad_norm": 0.46400219202041626, |
|
"learning_rate": 4.018055786288287e-05, |
|
"loss": 0.6093, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.19674074846174142, |
|
"grad_norm": 0.6529611349105835, |
|
"learning_rate": 4.016427084656654e-05, |
|
"loss": 0.5663, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.19706647817773768, |
|
"grad_norm": 0.8332497477531433, |
|
"learning_rate": 4.0147983830250205e-05, |
|
"loss": 0.557, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.19739220789373393, |
|
"grad_norm": 0.43394774198532104, |
|
"learning_rate": 4.013169681393387e-05, |
|
"loss": 0.5864, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.1977179376097302, |
|
"grad_norm": 0.3713783919811249, |
|
"learning_rate": 4.0115409797617537e-05, |
|
"loss": 0.597, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.19804366732572645, |
|
"grad_norm": 0.5605040788650513, |
|
"learning_rate": 4.00991227813012e-05, |
|
"loss": 0.5965, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.1983693970417227, |
|
"grad_norm": 0.4591531455516815, |
|
"learning_rate": 4.0082835764984875e-05, |
|
"loss": 0.5718, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.19869512675771897, |
|
"grad_norm": 0.7599985003471375, |
|
"learning_rate": 4.0066548748668534e-05, |
|
"loss": 0.6088, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.19902085647371523, |
|
"grad_norm": 0.7234918475151062, |
|
"learning_rate": 4.0050261732352206e-05, |
|
"loss": 0.6022, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.1993465861897115, |
|
"grad_norm": 0.8344034552574158, |
|
"learning_rate": 4.003397471603587e-05, |
|
"loss": 0.5978, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.19967231590570778, |
|
"grad_norm": 0.7539324164390564, |
|
"learning_rate": 4.0017687699719544e-05, |
|
"loss": 0.5979, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.19999804562170403, |
|
"grad_norm": 0.7535436153411865, |
|
"learning_rate": 4.00014006834032e-05, |
|
"loss": 0.5632, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.2003237753377003, |
|
"grad_norm": 1.0253859758377075, |
|
"learning_rate": 3.998511366708687e-05, |
|
"loss": 0.6245, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.20064950505369655, |
|
"grad_norm": 0.8442240357398987, |
|
"learning_rate": 3.996882665077054e-05, |
|
"loss": 0.56, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.2009752347696928, |
|
"grad_norm": 0.7696794867515564, |
|
"learning_rate": 3.995253963445421e-05, |
|
"loss": 0.5525, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.20130096448568907, |
|
"grad_norm": 1.0839108228683472, |
|
"learning_rate": 3.993625261813787e-05, |
|
"loss": 0.576, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.20162669420168533, |
|
"grad_norm": 0.4837821125984192, |
|
"learning_rate": 3.991996560182154e-05, |
|
"loss": 0.6654, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.2019524239176816, |
|
"grad_norm": 0.8696286082267761, |
|
"learning_rate": 3.990367858550521e-05, |
|
"loss": 0.5237, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.20227815363367785, |
|
"grad_norm": 0.5389662384986877, |
|
"learning_rate": 3.988739156918888e-05, |
|
"loss": 0.5765, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.2026038833496741, |
|
"grad_norm": 0.39996546506881714, |
|
"learning_rate": 3.987110455287254e-05, |
|
"loss": 0.5666, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.20292961306567037, |
|
"grad_norm": 0.5612654685974121, |
|
"learning_rate": 3.985481753655621e-05, |
|
"loss": 0.5975, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.20325534278166663, |
|
"grad_norm": 0.4764688014984131, |
|
"learning_rate": 3.9838530520239874e-05, |
|
"loss": 0.5973, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.20358107249766288, |
|
"grad_norm": 0.538745105266571, |
|
"learning_rate": 3.982224350392355e-05, |
|
"loss": 0.6108, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.20390680221365914, |
|
"grad_norm": 0.6589317321777344, |
|
"learning_rate": 3.980595648760721e-05, |
|
"loss": 0.5482, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.2042325319296554, |
|
"grad_norm": 0.8373557925224304, |
|
"learning_rate": 3.978966947129088e-05, |
|
"loss": 0.5671, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.20455826164565166, |
|
"grad_norm": 0.6305526494979858, |
|
"learning_rate": 3.9773382454974544e-05, |
|
"loss": 0.6205, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.20488399136164792, |
|
"grad_norm": 0.6550065875053406, |
|
"learning_rate": 3.9757095438658216e-05, |
|
"loss": 0.5805, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.20520972107764418, |
|
"grad_norm": 0.6951280236244202, |
|
"learning_rate": 3.974080842234188e-05, |
|
"loss": 0.6103, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.20553545079364044, |
|
"grad_norm": 0.5202652215957642, |
|
"learning_rate": 3.972452140602554e-05, |
|
"loss": 0.5623, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.20586118050963673, |
|
"grad_norm": 1.0889042615890503, |
|
"learning_rate": 3.9708234389709214e-05, |
|
"loss": 0.5879, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.20618691022563299, |
|
"grad_norm": 0.4142896234989166, |
|
"learning_rate": 3.969194737339288e-05, |
|
"loss": 0.6148, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.20651263994162924, |
|
"grad_norm": 0.6650342345237732, |
|
"learning_rate": 3.967566035707655e-05, |
|
"loss": 0.5902, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.2068383696576255, |
|
"grad_norm": 0.42452552914619446, |
|
"learning_rate": 3.965937334076021e-05, |
|
"loss": 0.4877, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.20716409937362176, |
|
"grad_norm": 0.6702756881713867, |
|
"learning_rate": 3.964308632444388e-05, |
|
"loss": 0.5943, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.20748982908961802, |
|
"grad_norm": 0.9007012248039246, |
|
"learning_rate": 3.962679930812755e-05, |
|
"loss": 0.5652, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.20781555880561428, |
|
"grad_norm": 0.8962705135345459, |
|
"learning_rate": 3.9610512291811215e-05, |
|
"loss": 0.5731, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.20814128852161054, |
|
"grad_norm": 0.8256299495697021, |
|
"learning_rate": 3.959422527549489e-05, |
|
"loss": 0.5596, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.2084670182376068, |
|
"grad_norm": 0.5674106478691101, |
|
"learning_rate": 3.9577938259178546e-05, |
|
"loss": 0.557, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.20879274795360306, |
|
"grad_norm": 0.564755916595459, |
|
"learning_rate": 3.956165124286222e-05, |
|
"loss": 0.5735, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.20911847766959932, |
|
"grad_norm": 1.0437874794006348, |
|
"learning_rate": 3.9545364226545884e-05, |
|
"loss": 0.5371, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.20944420738559558, |
|
"grad_norm": 0.877699077129364, |
|
"learning_rate": 3.952907721022956e-05, |
|
"loss": 0.538, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.20976993710159184, |
|
"grad_norm": 0.6481153964996338, |
|
"learning_rate": 3.9512790193913216e-05, |
|
"loss": 0.5763, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.2100956668175881, |
|
"grad_norm": 0.7963904142379761, |
|
"learning_rate": 3.949650317759688e-05, |
|
"loss": 0.5617, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.21042139653358435, |
|
"grad_norm": 1.1034698486328125, |
|
"learning_rate": 3.9480216161280554e-05, |
|
"loss": 0.5876, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.2107471262495806, |
|
"grad_norm": 0.7540128827095032, |
|
"learning_rate": 3.946392914496422e-05, |
|
"loss": 0.574, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.21107285596557687, |
|
"grad_norm": 0.7184910178184509, |
|
"learning_rate": 3.9447642128647886e-05, |
|
"loss": 0.5328, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.21139858568157313, |
|
"grad_norm": 0.7150009274482727, |
|
"learning_rate": 3.943135511233155e-05, |
|
"loss": 0.6049, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.21172431539756942, |
|
"grad_norm": 0.4451941251754761, |
|
"learning_rate": 3.9415068096015224e-05, |
|
"loss": 0.5958, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.21205004511356568, |
|
"grad_norm": 1.00858736038208, |
|
"learning_rate": 3.939878107969889e-05, |
|
"loss": 0.5752, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.21237577482956194, |
|
"grad_norm": 0.7953845858573914, |
|
"learning_rate": 3.9382494063382555e-05, |
|
"loss": 0.5555, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.2127015045455582, |
|
"grad_norm": 0.5992127060890198, |
|
"learning_rate": 3.936620704706622e-05, |
|
"loss": 0.59, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.21302723426155445, |
|
"grad_norm": 0.5878809690475464, |
|
"learning_rate": 3.934992003074989e-05, |
|
"loss": 0.5881, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.2133529639775507, |
|
"grad_norm": 0.9159529805183411, |
|
"learning_rate": 3.933363301443356e-05, |
|
"loss": 0.5951, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.21367869369354697, |
|
"grad_norm": 0.6340069770812988, |
|
"learning_rate": 3.9317345998117225e-05, |
|
"loss": 0.5799, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.21400442340954323, |
|
"grad_norm": 0.8940368890762329, |
|
"learning_rate": 3.930105898180089e-05, |
|
"loss": 0.5273, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.2143301531255395, |
|
"grad_norm": 0.7908622622489929, |
|
"learning_rate": 3.9284771965484556e-05, |
|
"loss": 0.5472, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.21465588284153575, |
|
"grad_norm": 0.9964277744293213, |
|
"learning_rate": 3.926848494916822e-05, |
|
"loss": 0.5719, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.214981612557532, |
|
"grad_norm": 0.6497515439987183, |
|
"learning_rate": 3.9252197932851895e-05, |
|
"loss": 0.5338, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.21530734227352827, |
|
"grad_norm": 0.8303185105323792, |
|
"learning_rate": 3.9235910916535554e-05, |
|
"loss": 0.5237, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.21563307198952453, |
|
"grad_norm": 0.8530830144882202, |
|
"learning_rate": 3.9219623900219226e-05, |
|
"loss": 0.5328, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.2159588017055208, |
|
"grad_norm": 0.9482616782188416, |
|
"learning_rate": 3.920333688390289e-05, |
|
"loss": 0.5548, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.21628453142151705, |
|
"grad_norm": 0.430633008480072, |
|
"learning_rate": 3.9187049867586564e-05, |
|
"loss": 0.551, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.2166102611375133, |
|
"grad_norm": 0.5612674355506897, |
|
"learning_rate": 3.917076285127022e-05, |
|
"loss": 0.5571, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.21693599085350956, |
|
"grad_norm": 0.7157821655273438, |
|
"learning_rate": 3.915447583495389e-05, |
|
"loss": 0.555, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.21726172056950582, |
|
"grad_norm": 0.6013966202735901, |
|
"learning_rate": 3.913818881863756e-05, |
|
"loss": 0.585, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.2175874502855021, |
|
"grad_norm": 0.4616648554801941, |
|
"learning_rate": 3.912190180232123e-05, |
|
"loss": 0.5832, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.21791318000149837, |
|
"grad_norm": 0.6870980858802795, |
|
"learning_rate": 3.910561478600489e-05, |
|
"loss": 0.5944, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.21823890971749463, |
|
"grad_norm": 0.629490315914154, |
|
"learning_rate": 3.908932776968856e-05, |
|
"loss": 0.5279, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.2185646394334909, |
|
"grad_norm": 0.5478650331497192, |
|
"learning_rate": 3.907304075337223e-05, |
|
"loss": 0.5815, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.21889036914948715, |
|
"grad_norm": 0.6581255793571472, |
|
"learning_rate": 3.90567537370559e-05, |
|
"loss": 0.5661, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.2192160988654834, |
|
"grad_norm": 0.7738802433013916, |
|
"learning_rate": 3.904046672073956e-05, |
|
"loss": 0.5901, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.21954182858147966, |
|
"grad_norm": 0.5748447179794312, |
|
"learning_rate": 3.902417970442323e-05, |
|
"loss": 0.5813, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.21986755829747592, |
|
"grad_norm": 0.7152987718582153, |
|
"learning_rate": 3.9007892688106894e-05, |
|
"loss": 0.5359, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.22019328801347218, |
|
"grad_norm": 0.867574155330658, |
|
"learning_rate": 3.899160567179057e-05, |
|
"loss": 0.5419, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.22051901772946844, |
|
"grad_norm": 0.8477634787559509, |
|
"learning_rate": 3.897531865547423e-05, |
|
"loss": 0.5788, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.2208447474454647, |
|
"grad_norm": 0.7993571758270264, |
|
"learning_rate": 3.89590316391579e-05, |
|
"loss": 0.528, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.22117047716146096, |
|
"grad_norm": 0.6607359647750854, |
|
"learning_rate": 3.8942744622841564e-05, |
|
"loss": 0.5647, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.22149620687745722, |
|
"grad_norm": 0.6910780072212219, |
|
"learning_rate": 3.892645760652523e-05, |
|
"loss": 0.5418, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.22182193659345348, |
|
"grad_norm": 0.4793308675289154, |
|
"learning_rate": 3.89101705902089e-05, |
|
"loss": 0.5913, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.22214766630944974, |
|
"grad_norm": 0.7222141027450562, |
|
"learning_rate": 3.889388357389257e-05, |
|
"loss": 0.6128, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.222473396025446, |
|
"grad_norm": 0.43712884187698364, |
|
"learning_rate": 3.8877596557576233e-05, |
|
"loss": 0.583, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.22279912574144226, |
|
"grad_norm": 0.5187420845031738, |
|
"learning_rate": 3.88613095412599e-05, |
|
"loss": 0.5758, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.22312485545743851, |
|
"grad_norm": 0.5550572872161865, |
|
"learning_rate": 3.884502252494357e-05, |
|
"loss": 0.5269, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.22345058517343477, |
|
"grad_norm": 0.7551735639572144, |
|
"learning_rate": 3.882873550862724e-05, |
|
"loss": 0.6005, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.22377631488943106, |
|
"grad_norm": 0.7213869690895081, |
|
"learning_rate": 3.8812448492310896e-05, |
|
"loss": 0.5174, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.22410204460542732, |
|
"grad_norm": 0.6445099115371704, |
|
"learning_rate": 3.879616147599457e-05, |
|
"loss": 0.5501, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.22442777432142358, |
|
"grad_norm": 0.7937589883804321, |
|
"learning_rate": 3.8779874459678235e-05, |
|
"loss": 0.5598, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.22475350403741984, |
|
"grad_norm": 0.5327324271202087, |
|
"learning_rate": 3.876358744336191e-05, |
|
"loss": 0.531, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.2250792337534161, |
|
"grad_norm": 0.7627710103988647, |
|
"learning_rate": 3.8747300427045566e-05, |
|
"loss": 0.578, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.22540496346941236, |
|
"grad_norm": 0.5054932832717896, |
|
"learning_rate": 3.873101341072924e-05, |
|
"loss": 0.5905, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.22573069318540862, |
|
"grad_norm": 0.6468352675437927, |
|
"learning_rate": 3.8714726394412904e-05, |
|
"loss": 0.5931, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.22605642290140487, |
|
"grad_norm": 0.37974539399147034, |
|
"learning_rate": 3.869843937809657e-05, |
|
"loss": 0.5777, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.22638215261740113, |
|
"grad_norm": 0.8011950850486755, |
|
"learning_rate": 3.8682152361780236e-05, |
|
"loss": 0.5187, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.2267078823333974, |
|
"grad_norm": 0.40006023645401, |
|
"learning_rate": 3.86658653454639e-05, |
|
"loss": 0.5292, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.22703361204939365, |
|
"grad_norm": 0.42605412006378174, |
|
"learning_rate": 3.8649578329147574e-05, |
|
"loss": 0.5704, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.2273593417653899, |
|
"grad_norm": 0.820277988910675, |
|
"learning_rate": 3.863329131283124e-05, |
|
"loss": 0.5641, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.22768507148138617, |
|
"grad_norm": 0.6671209931373596, |
|
"learning_rate": 3.8617004296514905e-05, |
|
"loss": 0.5942, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.22801080119738243, |
|
"grad_norm": 0.7214267253875732, |
|
"learning_rate": 3.860071728019857e-05, |
|
"loss": 0.6078, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.2283365309133787, |
|
"grad_norm": 0.5705024003982544, |
|
"learning_rate": 3.858443026388224e-05, |
|
"loss": 0.5111, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.22866226062937495, |
|
"grad_norm": 0.7017680406570435, |
|
"learning_rate": 3.856814324756591e-05, |
|
"loss": 0.5386, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.2289879903453712, |
|
"grad_norm": 0.36700716614723206, |
|
"learning_rate": 3.8551856231249575e-05, |
|
"loss": 0.5947, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.22931372006136747, |
|
"grad_norm": 1.018539309501648, |
|
"learning_rate": 3.853556921493324e-05, |
|
"loss": 0.5739, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.22963944977736375, |
|
"grad_norm": 0.8273037672042847, |
|
"learning_rate": 3.851928219861691e-05, |
|
"loss": 0.5247, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.22996517949336, |
|
"grad_norm": 1.0655425786972046, |
|
"learning_rate": 3.850299518230058e-05, |
|
"loss": 0.5397, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.23029090920935627, |
|
"grad_norm": 0.38495421409606934, |
|
"learning_rate": 3.8486708165984245e-05, |
|
"loss": 0.5844, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.23061663892535253, |
|
"grad_norm": 0.9659711122512817, |
|
"learning_rate": 3.847042114966791e-05, |
|
"loss": 0.5873, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.2309423686413488, |
|
"grad_norm": 0.7230137586593628, |
|
"learning_rate": 3.8454134133351576e-05, |
|
"loss": 0.593, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.23126809835734505, |
|
"grad_norm": 0.9325969219207764, |
|
"learning_rate": 3.843784711703524e-05, |
|
"loss": 0.5965, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.2315938280733413, |
|
"grad_norm": 0.6791651248931885, |
|
"learning_rate": 3.8421560100718915e-05, |
|
"loss": 0.6223, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.23191955778933757, |
|
"grad_norm": 0.8241651058197021, |
|
"learning_rate": 3.8405273084402573e-05, |
|
"loss": 0.5257, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.23224528750533383, |
|
"grad_norm": 0.8813059329986572, |
|
"learning_rate": 3.8388986068086246e-05, |
|
"loss": 0.5965, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.23257101722133008, |
|
"grad_norm": 0.7717010378837585, |
|
"learning_rate": 3.837269905176991e-05, |
|
"loss": 0.5502, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.23289674693732634, |
|
"grad_norm": 0.39482927322387695, |
|
"learning_rate": 3.8356412035453584e-05, |
|
"loss": 0.5618, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.2332224766533226, |
|
"grad_norm": 0.8985998630523682, |
|
"learning_rate": 3.834012501913724e-05, |
|
"loss": 0.5247, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.23354820636931886, |
|
"grad_norm": 0.4451032876968384, |
|
"learning_rate": 3.832383800282091e-05, |
|
"loss": 0.565, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.23387393608531512, |
|
"grad_norm": 0.46427956223487854, |
|
"learning_rate": 3.830755098650458e-05, |
|
"loss": 0.5511, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.23419966580131138, |
|
"grad_norm": 1.1371232271194458, |
|
"learning_rate": 3.829126397018825e-05, |
|
"loss": 0.5867, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.23452539551730764, |
|
"grad_norm": 0.5856015086174011, |
|
"learning_rate": 3.827497695387191e-05, |
|
"loss": 0.5425, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.2348511252333039, |
|
"grad_norm": 0.5723338723182678, |
|
"learning_rate": 3.825868993755558e-05, |
|
"loss": 0.5828, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.23517685494930016, |
|
"grad_norm": 0.6274189352989197, |
|
"learning_rate": 3.824240292123925e-05, |
|
"loss": 0.4961, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.23550258466529644, |
|
"grad_norm": 0.5841485857963562, |
|
"learning_rate": 3.822611590492292e-05, |
|
"loss": 0.5639, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.2358283143812927, |
|
"grad_norm": 0.9061130285263062, |
|
"learning_rate": 3.820982888860658e-05, |
|
"loss": 0.5126, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.23615404409728896, |
|
"grad_norm": 0.9499684572219849, |
|
"learning_rate": 3.819354187229025e-05, |
|
"loss": 0.5684, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.23647977381328522, |
|
"grad_norm": 0.7132393717765808, |
|
"learning_rate": 3.8177254855973914e-05, |
|
"loss": 0.5287, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.23680550352928148, |
|
"grad_norm": 0.8645475506782532, |
|
"learning_rate": 3.8160967839657587e-05, |
|
"loss": 0.564, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.23713123324527774, |
|
"grad_norm": 0.8675580024719238, |
|
"learning_rate": 3.814468082334125e-05, |
|
"loss": 0.5435, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.237456962961274, |
|
"grad_norm": 0.7194923162460327, |
|
"learning_rate": 3.812839380702492e-05, |
|
"loss": 0.5843, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.23778269267727026, |
|
"grad_norm": 0.782618522644043, |
|
"learning_rate": 3.8112106790708584e-05, |
|
"loss": 0.5609, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.23810842239326652, |
|
"grad_norm": 0.6671516299247742, |
|
"learning_rate": 3.809581977439225e-05, |
|
"loss": 0.4925, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.23843415210926278, |
|
"grad_norm": 0.8488081097602844, |
|
"learning_rate": 3.807953275807592e-05, |
|
"loss": 0.5536, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.23875988182525903, |
|
"grad_norm": 0.7259848117828369, |
|
"learning_rate": 3.806324574175959e-05, |
|
"loss": 0.5372, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.2390856115412553, |
|
"grad_norm": 0.5849174857139587, |
|
"learning_rate": 3.8046958725443253e-05, |
|
"loss": 0.5602, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.23941134125725155, |
|
"grad_norm": 0.36567142605781555, |
|
"learning_rate": 3.803067170912692e-05, |
|
"loss": 0.5976, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.2397370709732478, |
|
"grad_norm": 0.8540560007095337, |
|
"learning_rate": 3.801438469281059e-05, |
|
"loss": 0.576, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.24006280068924407, |
|
"grad_norm": 0.7733421921730042, |
|
"learning_rate": 3.799809767649426e-05, |
|
"loss": 0.5446, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.24038853040524033, |
|
"grad_norm": 0.6541240811347961, |
|
"learning_rate": 3.7981810660177916e-05, |
|
"loss": 0.5302, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.2407142601212366, |
|
"grad_norm": 0.6777580976486206, |
|
"learning_rate": 3.796552364386159e-05, |
|
"loss": 0.5742, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.24103998983723285, |
|
"grad_norm": 1.1045103073120117, |
|
"learning_rate": 3.7949236627545255e-05, |
|
"loss": 0.5391, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.2413657195532291, |
|
"grad_norm": 1.223781943321228, |
|
"learning_rate": 3.793294961122893e-05, |
|
"loss": 0.5754, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.2416914492692254, |
|
"grad_norm": 0.7645404934883118, |
|
"learning_rate": 3.7916662594912586e-05, |
|
"loss": 0.5424, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.24201717898522165, |
|
"grad_norm": 0.8637171983718872, |
|
"learning_rate": 3.790037557859626e-05, |
|
"loss": 0.5577, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.2423429087012179, |
|
"grad_norm": 0.633642315864563, |
|
"learning_rate": 3.7884088562279924e-05, |
|
"loss": 0.5513, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.24266863841721417, |
|
"grad_norm": 0.48609936237335205, |
|
"learning_rate": 3.786780154596359e-05, |
|
"loss": 0.6002, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.24299436813321043, |
|
"grad_norm": 0.3668748140335083, |
|
"learning_rate": 3.7851514529647256e-05, |
|
"loss": 0.5947, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.2433200978492067, |
|
"grad_norm": 0.735894501209259, |
|
"learning_rate": 3.783522751333092e-05, |
|
"loss": 0.5862, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.24364582756520295, |
|
"grad_norm": 0.8264063000679016, |
|
"learning_rate": 3.7818940497014594e-05, |
|
"loss": 0.5749, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.2439715572811992, |
|
"grad_norm": 0.482183575630188, |
|
"learning_rate": 3.780265348069826e-05, |
|
"loss": 0.5553, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.24429728699719547, |
|
"grad_norm": 0.6649850606918335, |
|
"learning_rate": 3.7786366464381925e-05, |
|
"loss": 0.6042, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.24462301671319173, |
|
"grad_norm": 0.5215208530426025, |
|
"learning_rate": 3.777007944806559e-05, |
|
"loss": 0.5134, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.24494874642918799, |
|
"grad_norm": 0.6028915643692017, |
|
"learning_rate": 3.775379243174926e-05, |
|
"loss": 0.5, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.24527447614518424, |
|
"grad_norm": 0.5038050413131714, |
|
"learning_rate": 3.773750541543293e-05, |
|
"loss": 0.6081, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.2456002058611805, |
|
"grad_norm": 0.568586528301239, |
|
"learning_rate": 3.7721218399116595e-05, |
|
"loss": 0.5484, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.24592593557717676, |
|
"grad_norm": 0.4442402720451355, |
|
"learning_rate": 3.770493138280026e-05, |
|
"loss": 0.5983, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.24625166529317302, |
|
"grad_norm": 0.775284469127655, |
|
"learning_rate": 3.7688644366483927e-05, |
|
"loss": 0.549, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.24657739500916928, |
|
"grad_norm": 0.7132833003997803, |
|
"learning_rate": 3.76723573501676e-05, |
|
"loss": 0.5317, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.24690312472516554, |
|
"grad_norm": 0.7935360074043274, |
|
"learning_rate": 3.7656070333851265e-05, |
|
"loss": 0.5389, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.2472288544411618, |
|
"grad_norm": 0.5749487280845642, |
|
"learning_rate": 3.7639783317534924e-05, |
|
"loss": 0.5918, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.2475545841571581, |
|
"grad_norm": 0.6536827087402344, |
|
"learning_rate": 3.7623496301218596e-05, |
|
"loss": 0.5245, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.24788031387315435, |
|
"grad_norm": 0.7014347314834595, |
|
"learning_rate": 3.760720928490226e-05, |
|
"loss": 0.5661, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.2482060435891506, |
|
"grad_norm": 0.8436623811721802, |
|
"learning_rate": 3.7590922268585934e-05, |
|
"loss": 0.5714, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.24853177330514686, |
|
"grad_norm": 0.6371897459030151, |
|
"learning_rate": 3.7574635252269593e-05, |
|
"loss": 0.5767, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.24885750302114312, |
|
"grad_norm": 0.7796430587768555, |
|
"learning_rate": 3.7558348235953266e-05, |
|
"loss": 0.5308, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.24918323273713938, |
|
"grad_norm": 0.6565324664115906, |
|
"learning_rate": 3.754206121963693e-05, |
|
"loss": 0.5377, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.24950896245313564, |
|
"grad_norm": 0.6670543551445007, |
|
"learning_rate": 3.75257742033206e-05, |
|
"loss": 0.6095, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.2498346921691319, |
|
"grad_norm": 0.8650514483451843, |
|
"learning_rate": 3.750948718700426e-05, |
|
"loss": 0.5586, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.25016042188512816, |
|
"grad_norm": 0.42015933990478516, |
|
"learning_rate": 3.749320017068793e-05, |
|
"loss": 0.5274, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.2504861516011244, |
|
"grad_norm": 0.5667533278465271, |
|
"learning_rate": 3.74769131543716e-05, |
|
"loss": 0.5628, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.2508118813171207, |
|
"grad_norm": 0.6887187361717224, |
|
"learning_rate": 3.746062613805527e-05, |
|
"loss": 0.5663, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.25113761103311694, |
|
"grad_norm": 0.4367005527019501, |
|
"learning_rate": 3.744433912173893e-05, |
|
"loss": 0.5368, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.2514633407491132, |
|
"grad_norm": 0.3392166197299957, |
|
"learning_rate": 3.74280521054226e-05, |
|
"loss": 0.5353, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.25178907046510945, |
|
"grad_norm": 0.5449352860450745, |
|
"learning_rate": 3.7411765089106264e-05, |
|
"loss": 0.5611, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.2521148001811057, |
|
"grad_norm": 0.6924061179161072, |
|
"learning_rate": 3.739547807278994e-05, |
|
"loss": 0.5918, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.252440529897102, |
|
"grad_norm": 0.8356592655181885, |
|
"learning_rate": 3.73791910564736e-05, |
|
"loss": 0.5713, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.25276625961309823, |
|
"grad_norm": 0.9207838177680969, |
|
"learning_rate": 3.736290404015727e-05, |
|
"loss": 0.5078, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.2530919893290945, |
|
"grad_norm": 0.6466575860977173, |
|
"learning_rate": 3.7346617023840934e-05, |
|
"loss": 0.5274, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.25341771904509075, |
|
"grad_norm": 0.5351524353027344, |
|
"learning_rate": 3.7330330007524606e-05, |
|
"loss": 0.5411, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.253743448761087, |
|
"grad_norm": 0.7786761522293091, |
|
"learning_rate": 3.731404299120827e-05, |
|
"loss": 0.4859, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.25406917847708327, |
|
"grad_norm": 0.6750699281692505, |
|
"learning_rate": 3.729775597489194e-05, |
|
"loss": 0.5689, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.2543949081930795, |
|
"grad_norm": 0.7088775038719177, |
|
"learning_rate": 3.7281468958575604e-05, |
|
"loss": 0.5325, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.2547206379090758, |
|
"grad_norm": 0.8920672535896301, |
|
"learning_rate": 3.726518194225927e-05, |
|
"loss": 0.5284, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.25504636762507205, |
|
"grad_norm": 0.6582838296890259, |
|
"learning_rate": 3.724889492594294e-05, |
|
"loss": 0.511, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.2553720973410683, |
|
"grad_norm": 0.6662094593048096, |
|
"learning_rate": 3.723260790962661e-05, |
|
"loss": 0.5618, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.25569782705706456, |
|
"grad_norm": 0.4346591830253601, |
|
"learning_rate": 3.721632089331027e-05, |
|
"loss": 0.54, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.2560235567730608, |
|
"grad_norm": 0.7967207431793213, |
|
"learning_rate": 3.720003387699394e-05, |
|
"loss": 0.5884, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.25634928648905714, |
|
"grad_norm": 0.4879821538925171, |
|
"learning_rate": 3.7183746860677605e-05, |
|
"loss": 0.5557, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.2566750162050534, |
|
"grad_norm": 0.5626016855239868, |
|
"learning_rate": 3.716745984436128e-05, |
|
"loss": 0.498, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.25700074592104966, |
|
"grad_norm": 0.5859974026679993, |
|
"learning_rate": 3.7151172828044936e-05, |
|
"loss": 0.5218, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.2573264756370459, |
|
"grad_norm": 0.7462596893310547, |
|
"learning_rate": 3.713488581172861e-05, |
|
"loss": 0.5093, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.2576522053530422, |
|
"grad_norm": 0.9555974006652832, |
|
"learning_rate": 3.7118598795412274e-05, |
|
"loss": 0.5348, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.25797793506903843, |
|
"grad_norm": 0.7466504573822021, |
|
"learning_rate": 3.710231177909595e-05, |
|
"loss": 0.5383, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.2583036647850347, |
|
"grad_norm": 0.8801865577697754, |
|
"learning_rate": 3.7086024762779606e-05, |
|
"loss": 0.4767, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.25862939450103095, |
|
"grad_norm": 0.48174184560775757, |
|
"learning_rate": 3.706973774646328e-05, |
|
"loss": 0.5528, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.2589551242170272, |
|
"grad_norm": 0.7198649048805237, |
|
"learning_rate": 3.7053450730146944e-05, |
|
"loss": 0.5953, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.25928085393302347, |
|
"grad_norm": 0.4515075385570526, |
|
"learning_rate": 3.703716371383061e-05, |
|
"loss": 0.5505, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.25960658364901973, |
|
"grad_norm": 0.706524670124054, |
|
"learning_rate": 3.7020876697514276e-05, |
|
"loss": 0.6011, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.259932313365016, |
|
"grad_norm": 0.6895307302474976, |
|
"learning_rate": 3.700458968119794e-05, |
|
"loss": 0.5188, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.26025804308101225, |
|
"grad_norm": 0.7927341461181641, |
|
"learning_rate": 3.6988302664881614e-05, |
|
"loss": 0.5739, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.2605837727970085, |
|
"grad_norm": 0.8496550917625427, |
|
"learning_rate": 3.697201564856528e-05, |
|
"loss": 0.5152, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.26090950251300477, |
|
"grad_norm": 0.47138693928718567, |
|
"learning_rate": 3.6955728632248945e-05, |
|
"loss": 0.5475, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.261235232229001, |
|
"grad_norm": 0.8020485639572144, |
|
"learning_rate": 3.693944161593261e-05, |
|
"loss": 0.5489, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.2615609619449973, |
|
"grad_norm": 0.6385429501533508, |
|
"learning_rate": 3.692315459961628e-05, |
|
"loss": 0.5457, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.26188669166099354, |
|
"grad_norm": 0.6027743220329285, |
|
"learning_rate": 3.690686758329995e-05, |
|
"loss": 0.5412, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.2622124213769898, |
|
"grad_norm": 0.6040454506874084, |
|
"learning_rate": 3.6890580566983615e-05, |
|
"loss": 0.5348, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.26253815109298606, |
|
"grad_norm": 0.6697177290916443, |
|
"learning_rate": 3.687429355066728e-05, |
|
"loss": 0.509, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.2628638808089823, |
|
"grad_norm": 0.8428653478622437, |
|
"learning_rate": 3.6858006534350946e-05, |
|
"loss": 0.5505, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.2631896105249786, |
|
"grad_norm": 0.9421257972717285, |
|
"learning_rate": 3.684171951803462e-05, |
|
"loss": 0.5587, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.26351534024097484, |
|
"grad_norm": 0.7752894759178162, |
|
"learning_rate": 3.6825432501718285e-05, |
|
"loss": 0.5308, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.2638410699569711, |
|
"grad_norm": 0.9658520817756653, |
|
"learning_rate": 3.6809145485401944e-05, |
|
"loss": 0.5394, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.26416679967296736, |
|
"grad_norm": 0.3100132644176483, |
|
"learning_rate": 3.6792858469085616e-05, |
|
"loss": 0.5616, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.2644925293889636, |
|
"grad_norm": 1.0838834047317505, |
|
"learning_rate": 3.677657145276928e-05, |
|
"loss": 0.5374, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.2648182591049599, |
|
"grad_norm": 0.9311345219612122, |
|
"learning_rate": 3.6760284436452954e-05, |
|
"loss": 0.5353, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.26514398882095613, |
|
"grad_norm": 0.32365360856056213, |
|
"learning_rate": 3.674399742013661e-05, |
|
"loss": 0.5493, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.2654697185369524, |
|
"grad_norm": 0.6390203833580017, |
|
"learning_rate": 3.6727710403820286e-05, |
|
"loss": 0.5205, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.26579544825294865, |
|
"grad_norm": 0.6106113195419312, |
|
"learning_rate": 3.671142338750395e-05, |
|
"loss": 0.5161, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.2661211779689449, |
|
"grad_norm": 0.4415883421897888, |
|
"learning_rate": 3.669513637118762e-05, |
|
"loss": 0.5235, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.26644690768494117, |
|
"grad_norm": 0.8828484416007996, |
|
"learning_rate": 3.667884935487128e-05, |
|
"loss": 0.5214, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.26677263740093743, |
|
"grad_norm": 0.8186760544776917, |
|
"learning_rate": 3.666256233855495e-05, |
|
"loss": 0.5435, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.2670983671169337, |
|
"grad_norm": 0.43989554047584534, |
|
"learning_rate": 3.664627532223862e-05, |
|
"loss": 0.5653, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.26742409683292995, |
|
"grad_norm": 1.083422303199768, |
|
"learning_rate": 3.662998830592229e-05, |
|
"loss": 0.5338, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.2677498265489262, |
|
"grad_norm": 0.40522611141204834, |
|
"learning_rate": 3.661370128960596e-05, |
|
"loss": 0.4892, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.26807555626492247, |
|
"grad_norm": 0.7010061740875244, |
|
"learning_rate": 3.659741427328962e-05, |
|
"loss": 0.5372, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.2684012859809188, |
|
"grad_norm": 0.9971382021903992, |
|
"learning_rate": 3.6581127256973284e-05, |
|
"loss": 0.501, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.26872701569691504, |
|
"grad_norm": 0.5222276449203491, |
|
"learning_rate": 3.656484024065696e-05, |
|
"loss": 0.5194, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.2690527454129113, |
|
"grad_norm": 0.724824845790863, |
|
"learning_rate": 3.654855322434062e-05, |
|
"loss": 0.499, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.26937847512890756, |
|
"grad_norm": 0.48272421956062317, |
|
"learning_rate": 3.653226620802429e-05, |
|
"loss": 0.486, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.2697042048449038, |
|
"grad_norm": 0.8187432885169983, |
|
"learning_rate": 3.6515979191707954e-05, |
|
"loss": 0.5634, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.2700299345609001, |
|
"grad_norm": 0.46917855739593506, |
|
"learning_rate": 3.6499692175391626e-05, |
|
"loss": 0.5468, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.27035566427689633, |
|
"grad_norm": 0.5338607430458069, |
|
"learning_rate": 3.648340515907529e-05, |
|
"loss": 0.481, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.2706813939928926, |
|
"grad_norm": 0.5420836806297302, |
|
"learning_rate": 3.646711814275896e-05, |
|
"loss": 0.5391, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.27100712370888885, |
|
"grad_norm": 0.5124307870864868, |
|
"learning_rate": 3.6450831126442624e-05, |
|
"loss": 0.5446, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.2713328534248851, |
|
"grad_norm": 0.5944223403930664, |
|
"learning_rate": 3.643454411012629e-05, |
|
"loss": 0.5759, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.27165858314088137, |
|
"grad_norm": 1.1431384086608887, |
|
"learning_rate": 3.641825709380996e-05, |
|
"loss": 0.5416, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.27198431285687763, |
|
"grad_norm": 0.9613766670227051, |
|
"learning_rate": 3.640197007749363e-05, |
|
"loss": 0.521, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.2723100425728739, |
|
"grad_norm": 0.7477935552597046, |
|
"learning_rate": 3.638568306117729e-05, |
|
"loss": 0.558, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.27263577228887015, |
|
"grad_norm": 0.47112804651260376, |
|
"learning_rate": 3.636939604486096e-05, |
|
"loss": 0.5083, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.2729615020048664, |
|
"grad_norm": 0.5914379954338074, |
|
"learning_rate": 3.6353109028544625e-05, |
|
"loss": 0.5776, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.27328723172086267, |
|
"grad_norm": 0.5500662326812744, |
|
"learning_rate": 3.63368220122283e-05, |
|
"loss": 0.5194, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.2736129614368589, |
|
"grad_norm": 0.41591793298721313, |
|
"learning_rate": 3.6320534995911956e-05, |
|
"loss": 0.5266, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.2739386911528552, |
|
"grad_norm": 1.080356478691101, |
|
"learning_rate": 3.630424797959563e-05, |
|
"loss": 0.4964, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.27426442086885144, |
|
"grad_norm": 0.40892690420150757, |
|
"learning_rate": 3.6287960963279294e-05, |
|
"loss": 0.5163, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.2745901505848477, |
|
"grad_norm": 0.7729841470718384, |
|
"learning_rate": 3.627167394696297e-05, |
|
"loss": 0.5336, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.27491588030084396, |
|
"grad_norm": 0.6264617443084717, |
|
"learning_rate": 3.6255386930646626e-05, |
|
"loss": 0.5762, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.2752416100168402, |
|
"grad_norm": 0.8050372004508972, |
|
"learning_rate": 3.623909991433029e-05, |
|
"loss": 0.4509, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.2755673397328365, |
|
"grad_norm": 0.621804416179657, |
|
"learning_rate": 3.6222812898013964e-05, |
|
"loss": 0.5174, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.27589306944883274, |
|
"grad_norm": 0.5717790126800537, |
|
"learning_rate": 3.620652588169763e-05, |
|
"loss": 0.5431, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.276218799164829, |
|
"grad_norm": 0.394345223903656, |
|
"learning_rate": 3.6190238865381295e-05, |
|
"loss": 0.5294, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.27654452888082526, |
|
"grad_norm": 0.8917814493179321, |
|
"learning_rate": 3.617395184906496e-05, |
|
"loss": 0.4955, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.2768702585968215, |
|
"grad_norm": 0.721481442451477, |
|
"learning_rate": 3.6157664832748634e-05, |
|
"loss": 0.5433, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.2771959883128178, |
|
"grad_norm": 0.6476948857307434, |
|
"learning_rate": 3.61413778164323e-05, |
|
"loss": 0.563, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.27752171802881404, |
|
"grad_norm": 0.38036003708839417, |
|
"learning_rate": 3.6125090800115965e-05, |
|
"loss": 0.516, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.2778474477448103, |
|
"grad_norm": 0.6185033917427063, |
|
"learning_rate": 3.610880378379963e-05, |
|
"loss": 0.5178, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.27817317746080655, |
|
"grad_norm": 0.8313725590705872, |
|
"learning_rate": 3.60925167674833e-05, |
|
"loss": 0.5296, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.2784989071768028, |
|
"grad_norm": 0.5369439721107483, |
|
"learning_rate": 3.607622975116697e-05, |
|
"loss": 0.5803, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.27882463689279907, |
|
"grad_norm": 0.7777513265609741, |
|
"learning_rate": 3.6059942734850635e-05, |
|
"loss": 0.4875, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.27915036660879533, |
|
"grad_norm": 0.5527925491333008, |
|
"learning_rate": 3.60436557185343e-05, |
|
"loss": 0.5141, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.2794760963247916, |
|
"grad_norm": 0.8335199356079102, |
|
"learning_rate": 3.6027368702217966e-05, |
|
"loss": 0.4851, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.27980182604078785, |
|
"grad_norm": 0.7015230059623718, |
|
"learning_rate": 3.601108168590163e-05, |
|
"loss": 0.5395, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.2801275557567841, |
|
"grad_norm": 0.7245033979415894, |
|
"learning_rate": 3.5994794669585305e-05, |
|
"loss": 0.5204, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.2804532854727804, |
|
"grad_norm": 0.8472508192062378, |
|
"learning_rate": 3.5978507653268964e-05, |
|
"loss": 0.5087, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.2807790151887767, |
|
"grad_norm": 0.7517431974411011, |
|
"learning_rate": 3.5962220636952636e-05, |
|
"loss": 0.5176, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.28110474490477294, |
|
"grad_norm": 0.5864343643188477, |
|
"learning_rate": 3.59459336206363e-05, |
|
"loss": 0.5828, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.2814304746207692, |
|
"grad_norm": 0.8981267809867859, |
|
"learning_rate": 3.5929646604319974e-05, |
|
"loss": 0.5309, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.28175620433676546, |
|
"grad_norm": 0.8167164325714111, |
|
"learning_rate": 3.591335958800364e-05, |
|
"loss": 0.5513, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.2820819340527617, |
|
"grad_norm": 0.7764830589294434, |
|
"learning_rate": 3.58970725716873e-05, |
|
"loss": 0.5249, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.282407663768758, |
|
"grad_norm": 0.7545201182365417, |
|
"learning_rate": 3.588078555537097e-05, |
|
"loss": 0.5293, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.28273339348475424, |
|
"grad_norm": 0.6954336166381836, |
|
"learning_rate": 3.586449853905464e-05, |
|
"loss": 0.5532, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.2830591232007505, |
|
"grad_norm": 0.6742025017738342, |
|
"learning_rate": 3.584821152273831e-05, |
|
"loss": 0.5356, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.28338485291674675, |
|
"grad_norm": 0.731679379940033, |
|
"learning_rate": 3.583192450642197e-05, |
|
"loss": 0.5128, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.283710582632743, |
|
"grad_norm": 0.7906468510627747, |
|
"learning_rate": 3.581563749010564e-05, |
|
"loss": 0.5359, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.2840363123487393, |
|
"grad_norm": 0.36753523349761963, |
|
"learning_rate": 3.579935047378931e-05, |
|
"loss": 0.5366, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.28436204206473553, |
|
"grad_norm": 0.6043976545333862, |
|
"learning_rate": 3.578306345747298e-05, |
|
"loss": 0.4995, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.2846877717807318, |
|
"grad_norm": 0.7573038339614868, |
|
"learning_rate": 3.576677644115664e-05, |
|
"loss": 0.5093, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.28501350149672805, |
|
"grad_norm": 0.25290992856025696, |
|
"learning_rate": 3.5750489424840304e-05, |
|
"loss": 0.4948, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.2853392312127243, |
|
"grad_norm": 0.6551434397697449, |
|
"learning_rate": 3.5734202408523977e-05, |
|
"loss": 0.5116, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.28566496092872057, |
|
"grad_norm": 0.6715214252471924, |
|
"learning_rate": 3.571791539220764e-05, |
|
"loss": 0.6104, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.2859906906447168, |
|
"grad_norm": 0.7275449633598328, |
|
"learning_rate": 3.570162837589131e-05, |
|
"loss": 0.506, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.2863164203607131, |
|
"grad_norm": 0.2885235846042633, |
|
"learning_rate": 3.5685341359574974e-05, |
|
"loss": 0.4684, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.28664215007670935, |
|
"grad_norm": 0.9342713356018066, |
|
"learning_rate": 3.5669054343258646e-05, |
|
"loss": 0.5293, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.2869678797927056, |
|
"grad_norm": 1.0423755645751953, |
|
"learning_rate": 3.565276732694231e-05, |
|
"loss": 0.5466, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.28729360950870186, |
|
"grad_norm": 1.0259456634521484, |
|
"learning_rate": 3.563648031062598e-05, |
|
"loss": 0.4885, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.2876193392246981, |
|
"grad_norm": 0.8733958601951599, |
|
"learning_rate": 3.5620193294309643e-05, |
|
"loss": 0.5353, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.2879450689406944, |
|
"grad_norm": 0.33869871497154236, |
|
"learning_rate": 3.560390627799331e-05, |
|
"loss": 0.5465, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.28827079865669064, |
|
"grad_norm": 0.5838894844055176, |
|
"learning_rate": 3.558761926167698e-05, |
|
"loss": 0.555, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.2885965283726869, |
|
"grad_norm": 0.8616543412208557, |
|
"learning_rate": 3.557133224536065e-05, |
|
"loss": 0.5173, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.28892225808868316, |
|
"grad_norm": 0.8486323356628418, |
|
"learning_rate": 3.555504522904431e-05, |
|
"loss": 0.5258, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.2892479878046794, |
|
"grad_norm": 0.6569567918777466, |
|
"learning_rate": 3.553875821272798e-05, |
|
"loss": 0.5097, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.2895737175206757, |
|
"grad_norm": 0.6821163296699524, |
|
"learning_rate": 3.5522471196411645e-05, |
|
"loss": 0.5428, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.28989944723667194, |
|
"grad_norm": 0.6147534251213074, |
|
"learning_rate": 3.550618418009532e-05, |
|
"loss": 0.5544, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.2902251769526682, |
|
"grad_norm": 0.42478904128074646, |
|
"learning_rate": 3.5489897163778976e-05, |
|
"loss": 0.5376, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.29055090666866445, |
|
"grad_norm": 0.5254961252212524, |
|
"learning_rate": 3.547361014746265e-05, |
|
"loss": 0.4964, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.2908766363846607, |
|
"grad_norm": 0.6934669017791748, |
|
"learning_rate": 3.5457323131146314e-05, |
|
"loss": 0.4835, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.291202366100657, |
|
"grad_norm": 0.4250465929508209, |
|
"learning_rate": 3.544103611482999e-05, |
|
"loss": 0.4954, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.29152809581665323, |
|
"grad_norm": 0.6067728996276855, |
|
"learning_rate": 3.5424749098513646e-05, |
|
"loss": 0.4926, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.2918538255326495, |
|
"grad_norm": 0.5424463748931885, |
|
"learning_rate": 3.540846208219731e-05, |
|
"loss": 0.5627, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.2921795552486458, |
|
"grad_norm": 0.5810889005661011, |
|
"learning_rate": 3.5392175065880984e-05, |
|
"loss": 0.4316, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.29250528496464206, |
|
"grad_norm": 0.4583912491798401, |
|
"learning_rate": 3.537588804956465e-05, |
|
"loss": 0.4987, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.2928310146806383, |
|
"grad_norm": 0.4320780634880066, |
|
"learning_rate": 3.5359601033248315e-05, |
|
"loss": 0.5204, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.2931567443966346, |
|
"grad_norm": 0.6955101490020752, |
|
"learning_rate": 3.534331401693198e-05, |
|
"loss": 0.5179, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.29348247411263084, |
|
"grad_norm": 0.512250542640686, |
|
"learning_rate": 3.5327027000615654e-05, |
|
"loss": 0.4909, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.2938082038286271, |
|
"grad_norm": 0.7975231409072876, |
|
"learning_rate": 3.531073998429932e-05, |
|
"loss": 0.4845, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.29413393354462336, |
|
"grad_norm": 0.25338149070739746, |
|
"learning_rate": 3.5294452967982985e-05, |
|
"loss": 0.4963, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.2944596632606196, |
|
"grad_norm": 0.43115437030792236, |
|
"learning_rate": 3.527816595166665e-05, |
|
"loss": 0.5203, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.2947853929766159, |
|
"grad_norm": 0.830754280090332, |
|
"learning_rate": 3.5261878935350317e-05, |
|
"loss": 0.4916, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.29511112269261214, |
|
"grad_norm": 0.8370751738548279, |
|
"learning_rate": 3.524559191903399e-05, |
|
"loss": 0.547, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.2954368524086084, |
|
"grad_norm": 0.7122400403022766, |
|
"learning_rate": 3.5229304902717655e-05, |
|
"loss": 0.5126, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.29576258212460466, |
|
"grad_norm": 0.4084763824939728, |
|
"learning_rate": 3.521301788640132e-05, |
|
"loss": 0.4971, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.2960883118406009, |
|
"grad_norm": 0.8079352974891663, |
|
"learning_rate": 3.5196730870084986e-05, |
|
"loss": 0.4992, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.2964140415565972, |
|
"grad_norm": 0.25352516770362854, |
|
"learning_rate": 3.518044385376865e-05, |
|
"loss": 0.5333, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.29673977127259343, |
|
"grad_norm": 0.5390329957008362, |
|
"learning_rate": 3.5164156837452324e-05, |
|
"loss": 0.5007, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.2970655009885897, |
|
"grad_norm": 0.6617804765701294, |
|
"learning_rate": 3.514786982113599e-05, |
|
"loss": 0.548, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.29739123070458595, |
|
"grad_norm": 0.7202132940292358, |
|
"learning_rate": 3.5131582804819656e-05, |
|
"loss": 0.5417, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.2977169604205822, |
|
"grad_norm": 0.28012895584106445, |
|
"learning_rate": 3.511529578850332e-05, |
|
"loss": 0.4883, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.29804269013657847, |
|
"grad_norm": 0.3527827560901642, |
|
"learning_rate": 3.5099008772186994e-05, |
|
"loss": 0.523, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.29836841985257473, |
|
"grad_norm": 0.7193790078163147, |
|
"learning_rate": 3.508272175587066e-05, |
|
"loss": 0.5148, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.298694149568571, |
|
"grad_norm": 0.9702345728874207, |
|
"learning_rate": 3.506643473955432e-05, |
|
"loss": 0.4781, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.29901987928456725, |
|
"grad_norm": 0.7323670983314514, |
|
"learning_rate": 3.505014772323799e-05, |
|
"loss": 0.5394, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.2993456090005635, |
|
"grad_norm": 0.6757960915565491, |
|
"learning_rate": 3.503386070692166e-05, |
|
"loss": 0.4984, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.29967133871655977, |
|
"grad_norm": 0.7119109630584717, |
|
"learning_rate": 3.501757369060533e-05, |
|
"loss": 0.5502, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.299997068432556, |
|
"grad_norm": 0.6820542216300964, |
|
"learning_rate": 3.500128667428899e-05, |
|
"loss": 0.5498, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.3003227981485523, |
|
"grad_norm": 0.784050703048706, |
|
"learning_rate": 3.498499965797266e-05, |
|
"loss": 0.5445, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.30064852786454854, |
|
"grad_norm": 0.6549366116523743, |
|
"learning_rate": 3.496871264165633e-05, |
|
"loss": 0.5326, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.3009742575805448, |
|
"grad_norm": 0.4872061014175415, |
|
"learning_rate": 3.495242562533999e-05, |
|
"loss": 0.5093, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.30129998729654106, |
|
"grad_norm": 0.3646996319293976, |
|
"learning_rate": 3.493613860902366e-05, |
|
"loss": 0.5476, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.3016257170125373, |
|
"grad_norm": 0.5709706544876099, |
|
"learning_rate": 3.4919851592707324e-05, |
|
"loss": 0.4513, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.3019514467285336, |
|
"grad_norm": 0.6031984090805054, |
|
"learning_rate": 3.4903564576390996e-05, |
|
"loss": 0.5044, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.30227717644452984, |
|
"grad_norm": 0.8381587862968445, |
|
"learning_rate": 3.488727756007466e-05, |
|
"loss": 0.5128, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.3026029061605261, |
|
"grad_norm": 1.0859401226043701, |
|
"learning_rate": 3.487099054375833e-05, |
|
"loss": 0.5328, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.30292863587652236, |
|
"grad_norm": 0.34642109274864197, |
|
"learning_rate": 3.4854703527441994e-05, |
|
"loss": 0.4852, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.3032543655925186, |
|
"grad_norm": 0.6529460549354553, |
|
"learning_rate": 3.483841651112566e-05, |
|
"loss": 0.5032, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.3035800953085149, |
|
"grad_norm": 0.7026881575584412, |
|
"learning_rate": 3.482212949480933e-05, |
|
"loss": 0.6338, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.30390582502451113, |
|
"grad_norm": 0.49741417169570923, |
|
"learning_rate": 3.4805842478493e-05, |
|
"loss": 0.5231, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.30423155474050745, |
|
"grad_norm": 0.6611301898956299, |
|
"learning_rate": 3.478955546217666e-05, |
|
"loss": 0.5189, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.3045572844565037, |
|
"grad_norm": 0.6907228827476501, |
|
"learning_rate": 3.477326844586033e-05, |
|
"loss": 0.5256, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.30488301417249997, |
|
"grad_norm": 0.5975654721260071, |
|
"learning_rate": 3.4756981429544e-05, |
|
"loss": 0.522, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.3052087438884962, |
|
"grad_norm": 0.6043006777763367, |
|
"learning_rate": 3.474069441322767e-05, |
|
"loss": 0.5018, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.3055344736044925, |
|
"grad_norm": 0.5697898864746094, |
|
"learning_rate": 3.4724407396911326e-05, |
|
"loss": 0.5009, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.30586020332048874, |
|
"grad_norm": 0.40364518761634827, |
|
"learning_rate": 3.4708120380595e-05, |
|
"loss": 0.4642, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.306185933036485, |
|
"grad_norm": 0.940877377986908, |
|
"learning_rate": 3.4691833364278664e-05, |
|
"loss": 0.5136, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.30651166275248126, |
|
"grad_norm": 0.7497209310531616, |
|
"learning_rate": 3.467554634796234e-05, |
|
"loss": 0.5261, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.3068373924684775, |
|
"grad_norm": 0.8120318651199341, |
|
"learning_rate": 3.4659259331645996e-05, |
|
"loss": 0.4756, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.3071631221844738, |
|
"grad_norm": 0.6802115440368652, |
|
"learning_rate": 3.464297231532967e-05, |
|
"loss": 0.5257, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.30748885190047004, |
|
"grad_norm": 0.43083488941192627, |
|
"learning_rate": 3.4626685299013334e-05, |
|
"loss": 0.5365, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.3078145816164663, |
|
"grad_norm": 0.6194273233413696, |
|
"learning_rate": 3.4610398282697e-05, |
|
"loss": 0.5157, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.30814031133246256, |
|
"grad_norm": 0.5603410601615906, |
|
"learning_rate": 3.4594111266380666e-05, |
|
"loss": 0.51, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.3084660410484588, |
|
"grad_norm": 1.0651506185531616, |
|
"learning_rate": 3.457782425006433e-05, |
|
"loss": 0.4759, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.3087917707644551, |
|
"grad_norm": 0.7674971222877502, |
|
"learning_rate": 3.4561537233748004e-05, |
|
"loss": 0.467, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.30911750048045133, |
|
"grad_norm": 0.9666951298713684, |
|
"learning_rate": 3.454525021743167e-05, |
|
"loss": 0.5524, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.3094432301964476, |
|
"grad_norm": 0.6148163080215454, |
|
"learning_rate": 3.4528963201115335e-05, |
|
"loss": 0.5345, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.30976895991244385, |
|
"grad_norm": 0.7641096711158752, |
|
"learning_rate": 3.4512676184799e-05, |
|
"loss": 0.4872, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.3100946896284401, |
|
"grad_norm": 0.6152538657188416, |
|
"learning_rate": 3.449638916848267e-05, |
|
"loss": 0.4832, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.31042041934443637, |
|
"grad_norm": 0.7761083841323853, |
|
"learning_rate": 3.448010215216634e-05, |
|
"loss": 0.4761, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.31074614906043263, |
|
"grad_norm": 0.6005348563194275, |
|
"learning_rate": 3.4463815135850005e-05, |
|
"loss": 0.4585, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.3110718787764289, |
|
"grad_norm": 0.7649496793746948, |
|
"learning_rate": 3.444752811953367e-05, |
|
"loss": 0.5283, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.31139760849242515, |
|
"grad_norm": 0.9503573179244995, |
|
"learning_rate": 3.4431241103217336e-05, |
|
"loss": 0.5032, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.3117233382084214, |
|
"grad_norm": 0.8403215408325195, |
|
"learning_rate": 3.441495408690101e-05, |
|
"loss": 0.5172, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.31204906792441767, |
|
"grad_norm": 0.5137957334518433, |
|
"learning_rate": 3.4398667070584675e-05, |
|
"loss": 0.5551, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.3123747976404139, |
|
"grad_norm": 0.6618998646736145, |
|
"learning_rate": 3.438238005426834e-05, |
|
"loss": 0.5237, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.3127005273564102, |
|
"grad_norm": 0.3272695541381836, |
|
"learning_rate": 3.4366093037952006e-05, |
|
"loss": 0.4556, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.31302625707240644, |
|
"grad_norm": 0.7416215538978577, |
|
"learning_rate": 3.434980602163567e-05, |
|
"loss": 0.5039, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.3133519867884027, |
|
"grad_norm": 0.9183087944984436, |
|
"learning_rate": 3.4333519005319344e-05, |
|
"loss": 0.5408, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.31367771650439896, |
|
"grad_norm": 0.3782617151737213, |
|
"learning_rate": 3.431723198900301e-05, |
|
"loss": 0.5113, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.3140034462203952, |
|
"grad_norm": 0.6314922571182251, |
|
"learning_rate": 3.4300944972686676e-05, |
|
"loss": 0.4955, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.3143291759363915, |
|
"grad_norm": 0.3009500801563263, |
|
"learning_rate": 3.428465795637034e-05, |
|
"loss": 0.5114, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.31465490565238774, |
|
"grad_norm": 0.8378229737281799, |
|
"learning_rate": 3.4268370940054014e-05, |
|
"loss": 0.5287, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.314980635368384, |
|
"grad_norm": 0.7249593138694763, |
|
"learning_rate": 3.425208392373768e-05, |
|
"loss": 0.5209, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.31530636508438026, |
|
"grad_norm": 0.45489412546157837, |
|
"learning_rate": 3.423579690742134e-05, |
|
"loss": 0.5745, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.3156320948003765, |
|
"grad_norm": 0.6379255056381226, |
|
"learning_rate": 3.421950989110501e-05, |
|
"loss": 0.5199, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.31595782451637283, |
|
"grad_norm": 0.8550392389297485, |
|
"learning_rate": 3.420322287478868e-05, |
|
"loss": 0.5374, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.3162835542323691, |
|
"grad_norm": 0.5571677684783936, |
|
"learning_rate": 3.418693585847235e-05, |
|
"loss": 0.5057, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.31660928394836535, |
|
"grad_norm": 0.48302140831947327, |
|
"learning_rate": 3.417064884215601e-05, |
|
"loss": 0.5496, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.3169350136643616, |
|
"grad_norm": 0.7864711284637451, |
|
"learning_rate": 3.415436182583968e-05, |
|
"loss": 0.5132, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.31726074338035787, |
|
"grad_norm": 0.5517250299453735, |
|
"learning_rate": 3.413807480952335e-05, |
|
"loss": 0.4826, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.3175864730963541, |
|
"grad_norm": 0.7834230065345764, |
|
"learning_rate": 3.412178779320701e-05, |
|
"loss": 0.5186, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.3179122028123504, |
|
"grad_norm": 0.938097357749939, |
|
"learning_rate": 3.410550077689068e-05, |
|
"loss": 0.4817, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.31823793252834665, |
|
"grad_norm": 0.25078582763671875, |
|
"learning_rate": 3.4089213760574344e-05, |
|
"loss": 0.4996, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.3185636622443429, |
|
"grad_norm": 0.7896013259887695, |
|
"learning_rate": 3.4072926744258016e-05, |
|
"loss": 0.5163, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.31888939196033916, |
|
"grad_norm": 0.6857266426086426, |
|
"learning_rate": 3.405663972794168e-05, |
|
"loss": 0.4952, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.3192151216763354, |
|
"grad_norm": 0.5710707306861877, |
|
"learning_rate": 3.404035271162535e-05, |
|
"loss": 0.5273, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.3195408513923317, |
|
"grad_norm": 0.5274339914321899, |
|
"learning_rate": 3.4024065695309014e-05, |
|
"loss": 0.5385, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.31986658110832794, |
|
"grad_norm": 0.27135804295539856, |
|
"learning_rate": 3.400777867899268e-05, |
|
"loss": 0.5042, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.3201923108243242, |
|
"grad_norm": 0.6852828860282898, |
|
"learning_rate": 3.399149166267635e-05, |
|
"loss": 0.5214, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.32051804054032046, |
|
"grad_norm": 0.5614081621170044, |
|
"learning_rate": 3.397520464636002e-05, |
|
"loss": 0.5023, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.3208437702563167, |
|
"grad_norm": 0.7719017863273621, |
|
"learning_rate": 3.395891763004368e-05, |
|
"loss": 0.4919, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.321169499972313, |
|
"grad_norm": 0.8100476264953613, |
|
"learning_rate": 3.394263061372735e-05, |
|
"loss": 0.4607, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.32149522968830924, |
|
"grad_norm": 0.6814531087875366, |
|
"learning_rate": 3.392634359741102e-05, |
|
"loss": 0.5457, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.3218209594043055, |
|
"grad_norm": 1.0356829166412354, |
|
"learning_rate": 3.391005658109469e-05, |
|
"loss": 0.4844, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.32214668912030175, |
|
"grad_norm": 0.8719603419303894, |
|
"learning_rate": 3.3893769564778346e-05, |
|
"loss": 0.5182, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.322472418836298, |
|
"grad_norm": 0.6145396828651428, |
|
"learning_rate": 3.387748254846202e-05, |
|
"loss": 0.4732, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.3227981485522943, |
|
"grad_norm": 1.005679726600647, |
|
"learning_rate": 3.3861195532145684e-05, |
|
"loss": 0.5182, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.32312387826829053, |
|
"grad_norm": 0.29751360416412354, |
|
"learning_rate": 3.384490851582936e-05, |
|
"loss": 0.4823, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.3234496079842868, |
|
"grad_norm": 0.7968891263008118, |
|
"learning_rate": 3.3828621499513016e-05, |
|
"loss": 0.5235, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.32377533770028305, |
|
"grad_norm": 0.7049364447593689, |
|
"learning_rate": 3.381233448319669e-05, |
|
"loss": 0.5392, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.3241010674162793, |
|
"grad_norm": 0.6265050172805786, |
|
"learning_rate": 3.3796047466880354e-05, |
|
"loss": 0.5119, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.32442679713227557, |
|
"grad_norm": 0.6732152104377747, |
|
"learning_rate": 3.377976045056402e-05, |
|
"loss": 0.4837, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.3247525268482718, |
|
"grad_norm": 0.25657424330711365, |
|
"learning_rate": 3.3763473434247686e-05, |
|
"loss": 0.5199, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.3250782565642681, |
|
"grad_norm": 0.4994146227836609, |
|
"learning_rate": 3.374718641793135e-05, |
|
"loss": 0.4894, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.32540398628026435, |
|
"grad_norm": 0.7468940615653992, |
|
"learning_rate": 3.3730899401615024e-05, |
|
"loss": 0.5409, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.3257297159962606, |
|
"grad_norm": 0.17829063534736633, |
|
"learning_rate": 3.371461238529869e-05, |
|
"loss": 0.5111, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.32605544571225686, |
|
"grad_norm": 0.6492403745651245, |
|
"learning_rate": 3.369832536898236e-05, |
|
"loss": 0.5085, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 0.3263811754282531, |
|
"grad_norm": 0.41203296184539795, |
|
"learning_rate": 3.368203835266602e-05, |
|
"loss": 0.4674, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.3267069051442494, |
|
"grad_norm": 0.6258901953697205, |
|
"learning_rate": 3.366575133634969e-05, |
|
"loss": 0.4797, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 0.32703263486024564, |
|
"grad_norm": 0.5243533849716187, |
|
"learning_rate": 3.364946432003336e-05, |
|
"loss": 0.4851, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.3273583645762419, |
|
"grad_norm": 0.7344015836715698, |
|
"learning_rate": 3.3633177303717025e-05, |
|
"loss": 0.4964, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.32768409429223816, |
|
"grad_norm": 1.1914827823638916, |
|
"learning_rate": 3.361689028740069e-05, |
|
"loss": 0.4923, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.3280098240082345, |
|
"grad_norm": 0.7036446928977966, |
|
"learning_rate": 3.3600603271084356e-05, |
|
"loss": 0.5234, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 0.32833555372423073, |
|
"grad_norm": 0.8239650726318359, |
|
"learning_rate": 3.358431625476803e-05, |
|
"loss": 0.4715, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.328661283440227, |
|
"grad_norm": 0.6158246397972107, |
|
"learning_rate": 3.3568029238451695e-05, |
|
"loss": 0.488, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 0.32898701315622325, |
|
"grad_norm": 0.708604633808136, |
|
"learning_rate": 3.355174222213536e-05, |
|
"loss": 0.4674, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.3293127428722195, |
|
"grad_norm": 0.5420898199081421, |
|
"learning_rate": 3.3535455205819026e-05, |
|
"loss": 0.4741, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 0.32963847258821577, |
|
"grad_norm": 0.49769943952560425, |
|
"learning_rate": 3.351916818950269e-05, |
|
"loss": 0.4638, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.32996420230421203, |
|
"grad_norm": 0.7099531888961792, |
|
"learning_rate": 3.3502881173186364e-05, |
|
"loss": 0.5236, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 0.3302899320202083, |
|
"grad_norm": 0.712815523147583, |
|
"learning_rate": 3.348659415687003e-05, |
|
"loss": 0.5268, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.33061566173620455, |
|
"grad_norm": 0.8762120008468628, |
|
"learning_rate": 3.3470307140553696e-05, |
|
"loss": 0.5045, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.3309413914522008, |
|
"grad_norm": 0.7411269545555115, |
|
"learning_rate": 3.345402012423736e-05, |
|
"loss": 0.5017, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.33126712116819707, |
|
"grad_norm": 0.7993664145469666, |
|
"learning_rate": 3.343773310792103e-05, |
|
"loss": 0.4866, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 0.3315928508841933, |
|
"grad_norm": 0.9997897148132324, |
|
"learning_rate": 3.34214460916047e-05, |
|
"loss": 0.5033, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.3319185806001896, |
|
"grad_norm": 0.3995771110057831, |
|
"learning_rate": 3.340515907528836e-05, |
|
"loss": 0.5037, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 0.33224431031618584, |
|
"grad_norm": 0.4990951418876648, |
|
"learning_rate": 3.338887205897203e-05, |
|
"loss": 0.5353, |
|
"step": 102000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 307003, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.515680604094464e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|