| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997450352701209, |
| "eval_steps": 500, |
| "global_step": 3529, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.002832941443100371, |
| "grad_norm": 174.56146240234375, |
| "learning_rate": 9.433962264150944e-07, |
| "loss": 17.5632, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.005665882886200742, |
| "grad_norm": 99.76522064208984, |
| "learning_rate": 1.8867924528301889e-06, |
| "loss": 16.5212, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.008498824329301113, |
| "grad_norm": 61.843570709228516, |
| "learning_rate": 2.830188679245283e-06, |
| "loss": 14.66, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011331765772401484, |
| "grad_norm": 58.26314926147461, |
| "learning_rate": 3.7735849056603777e-06, |
| "loss": 12.8877, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.014164707215501856, |
| "grad_norm": 64.67306518554688, |
| "learning_rate": 4.716981132075472e-06, |
| "loss": 12.6102, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.016997648658602225, |
| "grad_norm": 40.95338439941406, |
| "learning_rate": 5.660377358490566e-06, |
| "loss": 10.3169, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.019830590101702596, |
| "grad_norm": 59.99547576904297, |
| "learning_rate": 6.60377358490566e-06, |
| "loss": 10.4793, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.022663531544802967, |
| "grad_norm": 69.4089584350586, |
| "learning_rate": 7.5471698113207555e-06, |
| "loss": 10.2298, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02549647298790334, |
| "grad_norm": 38.2027702331543, |
| "learning_rate": 8.49056603773585e-06, |
| "loss": 8.5679, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.028329414431003713, |
| "grad_norm": 41.05867004394531, |
| "learning_rate": 9.433962264150944e-06, |
| "loss": 8.6834, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.031162355874104083, |
| "grad_norm": 45.03547286987305, |
| "learning_rate": 9.999966306552455e-06, |
| "loss": 7.8071, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.03399529731720445, |
| "grad_norm": 43.03623580932617, |
| "learning_rate": 9.999587260482597e-06, |
| "loss": 8.2945, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.036828238760304825, |
| "grad_norm": 37.273048400878906, |
| "learning_rate": 9.998787083568112e-06, |
| "loss": 8.0273, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03966118020340519, |
| "grad_norm": 55.94173049926758, |
| "learning_rate": 9.997565843210401e-06, |
| "loss": 7.1597, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04249412164650557, |
| "grad_norm": 50.30799102783203, |
| "learning_rate": 9.995923642278351e-06, |
| "loss": 7.2908, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.045327063089605935, |
| "grad_norm": 53.113243103027344, |
| "learning_rate": 9.993860619099673e-06, |
| "loss": 5.9006, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04816000453270631, |
| "grad_norm": 51.57769012451172, |
| "learning_rate": 9.991376947449254e-06, |
| "loss": 8.8304, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05099294597580668, |
| "grad_norm": 38.17790985107422, |
| "learning_rate": 9.988472836534509e-06, |
| "loss": 8.6644, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05382588741890705, |
| "grad_norm": 34.28203582763672, |
| "learning_rate": 9.985148530977767e-06, |
| "loss": 6.6272, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.056658828862007425, |
| "grad_norm": 34.18405532836914, |
| "learning_rate": 9.981404310795667e-06, |
| "loss": 8.2372, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05949177030510779, |
| "grad_norm": 40.83757400512695, |
| "learning_rate": 9.97724049137556e-06, |
| "loss": 5.3928, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.06232471174820817, |
| "grad_norm": 36.30077362060547, |
| "learning_rate": 9.972657423448961e-06, |
| "loss": 6.5196, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.06515765319130853, |
| "grad_norm": 38.03015899658203, |
| "learning_rate": 9.96765549306199e-06, |
| "loss": 7.1923, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0679905946344089, |
| "grad_norm": 42.84525680541992, |
| "learning_rate": 9.962235121542858e-06, |
| "loss": 6.9849, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.07082353607750928, |
| "grad_norm": 34.071800231933594, |
| "learning_rate": 9.956396765466382e-06, |
| "loss": 7.0978, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07365647752060965, |
| "grad_norm": 37.807029724121094, |
| "learning_rate": 9.950140916615526e-06, |
| "loss": 7.5077, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07648941896371002, |
| "grad_norm": 36.55296325683594, |
| "learning_rate": 9.943468101939968e-06, |
| "loss": 6.6867, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07932236040681039, |
| "grad_norm": 31.735977172851562, |
| "learning_rate": 9.936378883511722e-06, |
| "loss": 8.5626, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.08215530184991077, |
| "grad_norm": 51.97509002685547, |
| "learning_rate": 9.92887385847779e-06, |
| "loss": 7.1104, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.08498824329301113, |
| "grad_norm": 56.030025482177734, |
| "learning_rate": 9.920953659009863e-06, |
| "loss": 6.6099, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0878211847361115, |
| "grad_norm": 37.08720016479492, |
| "learning_rate": 9.912618952251071e-06, |
| "loss": 4.9933, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.09065412617921187, |
| "grad_norm": 34.61451721191406, |
| "learning_rate": 9.903870440259787e-06, |
| "loss": 5.7727, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.09348706762231225, |
| "grad_norm": 35.92675018310547, |
| "learning_rate": 9.89470885995049e-06, |
| "loss": 4.8536, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.09632000906541262, |
| "grad_norm": 31.899490356445312, |
| "learning_rate": 9.885134983031694e-06, |
| "loss": 5.6988, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.09915295050851299, |
| "grad_norm": 37.71702194213867, |
| "learning_rate": 9.875149615940943e-06, |
| "loss": 8.0547, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.10198589195161337, |
| "grad_norm": 32.81459426879883, |
| "learning_rate": 9.864753599776883e-06, |
| "loss": 5.7466, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.10481883339471373, |
| "grad_norm": 34.12638854980469, |
| "learning_rate": 9.853947810228416e-06, |
| "loss": 6.3535, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1076517748378141, |
| "grad_norm": 34.04792022705078, |
| "learning_rate": 9.842733157500932e-06, |
| "loss": 5.7424, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.11048471628091447, |
| "grad_norm": 33.2330322265625, |
| "learning_rate": 9.831110586239643e-06, |
| "loss": 5.4935, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.11331765772401485, |
| "grad_norm": 33.370567321777344, |
| "learning_rate": 9.819081075450014e-06, |
| "loss": 5.5257, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11615059916711522, |
| "grad_norm": 34.091304779052734, |
| "learning_rate": 9.806645638415302e-06, |
| "loss": 6.1631, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.11898354061021559, |
| "grad_norm": 28.293777465820312, |
| "learning_rate": 9.79380532261119e-06, |
| "loss": 6.2594, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.12181648205331595, |
| "grad_norm": 31.169191360473633, |
| "learning_rate": 9.780561209617569e-06, |
| "loss": 5.428, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.12464942349641633, |
| "grad_norm": 31.67852783203125, |
| "learning_rate": 9.766914415027426e-06, |
| "loss": 6.3704, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.1274823649395167, |
| "grad_norm": 24.92995262145996, |
| "learning_rate": 9.752866088352882e-06, |
| "loss": 6.3413, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.13031530638261707, |
| "grad_norm": 30.267122268676758, |
| "learning_rate": 9.738417412928348e-06, |
| "loss": 6.1918, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.13314824782571744, |
| "grad_norm": 34.71146011352539, |
| "learning_rate": 9.72356960581087e-06, |
| "loss": 5.2388, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1359811892688178, |
| "grad_norm": 32.87137985229492, |
| "learning_rate": 9.7083239176776e-06, |
| "loss": 4.2622, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.13881413071191817, |
| "grad_norm": 37.83769226074219, |
| "learning_rate": 9.692681632720448e-06, |
| "loss": 4.1838, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.14164707215501857, |
| "grad_norm": 29.84713363647461, |
| "learning_rate": 9.676644068537915e-06, |
| "loss": 6.1015, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14448001359811893, |
| "grad_norm": 37.30936050415039, |
| "learning_rate": 9.660212576024102e-06, |
| "loss": 6.1121, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1473129550412193, |
| "grad_norm": 43.746490478515625, |
| "learning_rate": 9.64338853925493e-06, |
| "loss": 6.0385, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.15014589648431967, |
| "grad_norm": 32.99515151977539, |
| "learning_rate": 9.62617337537154e-06, |
| "loss": 6.1911, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.15297883792742004, |
| "grad_norm": 46.957340240478516, |
| "learning_rate": 9.608568534460938e-06, |
| "loss": 5.822, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.1558117793705204, |
| "grad_norm": 33.011844635009766, |
| "learning_rate": 9.590575499433837e-06, |
| "loss": 7.1735, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.15864472081362077, |
| "grad_norm": 31.042083740234375, |
| "learning_rate": 9.572195785899756e-06, |
| "loss": 6.8695, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.16147766225672117, |
| "grad_norm": 26.289737701416016, |
| "learning_rate": 9.553430942039352e-06, |
| "loss": 6.9135, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.16431060369982153, |
| "grad_norm": 29.071701049804688, |
| "learning_rate": 9.534282548474008e-06, |
| "loss": 5.075, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.1671435451429219, |
| "grad_norm": 46.65534973144531, |
| "learning_rate": 9.514752218132703e-06, |
| "loss": 7.0842, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.16997648658602227, |
| "grad_norm": 31.195234298706055, |
| "learning_rate": 9.494841596116138e-06, |
| "loss": 4.9555, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.17280942802912264, |
| "grad_norm": 26.25963020324707, |
| "learning_rate": 9.474552359558167e-06, |
| "loss": 8.0829, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.175642369472223, |
| "grad_norm": 25.132131576538086, |
| "learning_rate": 9.453886217484536e-06, |
| "loss": 4.0549, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.17847531091532337, |
| "grad_norm": 47.06711196899414, |
| "learning_rate": 9.432844910668914e-06, |
| "loss": 6.1125, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.18130825235842374, |
| "grad_norm": 41.20506286621094, |
| "learning_rate": 9.41143021148627e-06, |
| "loss": 6.7009, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.18414119380152413, |
| "grad_norm": 27.037729263305664, |
| "learning_rate": 9.389643923763573e-06, |
| "loss": 6.8328, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.1869741352446245, |
| "grad_norm": 44.91098403930664, |
| "learning_rate": 9.367487882627866e-06, |
| "loss": 5.0284, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.18980707668772487, |
| "grad_norm": 32.71237564086914, |
| "learning_rate": 9.344963954351662e-06, |
| "loss": 6.0377, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.19264001813082524, |
| "grad_norm": 24.608020782470703, |
| "learning_rate": 9.32207403619577e-06, |
| "loss": 3.9539, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.1954729595739256, |
| "grad_norm": 45.37845230102539, |
| "learning_rate": 9.298820056249459e-06, |
| "loss": 7.6906, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.19830590101702597, |
| "grad_norm": 26.40629768371582, |
| "learning_rate": 9.275203973268064e-06, |
| "loss": 5.7302, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.20113884246012634, |
| "grad_norm": 25.433490753173828, |
| "learning_rate": 9.251227776507989e-06, |
| "loss": 5.6252, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.20397178390322673, |
| "grad_norm": 25.276575088500977, |
| "learning_rate": 9.226893485559146e-06, |
| "loss": 5.8884, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2068047253463271, |
| "grad_norm": 45.13107681274414, |
| "learning_rate": 9.202203150174836e-06, |
| "loss": 9.215, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.20963766678942747, |
| "grad_norm": 26.52821922302246, |
| "learning_rate": 9.177158850099099e-06, |
| "loss": 5.7232, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.21247060823252784, |
| "grad_norm": 43.29339599609375, |
| "learning_rate": 9.151762694891522e-06, |
| "loss": 6.7846, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2153035496756282, |
| "grad_norm": 29.308732986450195, |
| "learning_rate": 9.12601682374955e-06, |
| "loss": 5.8371, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.21813649111872857, |
| "grad_norm": 25.373172760009766, |
| "learning_rate": 9.099923405328293e-06, |
| "loss": 3.9846, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.22096943256182894, |
| "grad_norm": 33.12062454223633, |
| "learning_rate": 9.073484637557852e-06, |
| "loss": 4.8174, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.2238023740049293, |
| "grad_norm": 24.834850311279297, |
| "learning_rate": 9.046702747458186e-06, |
| "loss": 5.8073, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.2266353154480297, |
| "grad_norm": 23.760942459106445, |
| "learning_rate": 9.019579990951514e-06, |
| "loss": 5.6668, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22946825689113007, |
| "grad_norm": 25.230995178222656, |
| "learning_rate": 8.992118652672302e-06, |
| "loss": 5.6386, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.23230119833423044, |
| "grad_norm": 27.033655166625977, |
| "learning_rate": 8.964321045774808e-06, |
| "loss": 5.1316, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.2351341397773308, |
| "grad_norm": 36.77193832397461, |
| "learning_rate": 8.936189511738254e-06, |
| "loss": 5.0568, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.23796708122043117, |
| "grad_norm": 21.841785430908203, |
| "learning_rate": 8.907726420169583e-06, |
| "loss": 5.5521, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.24080002266353154, |
| "grad_norm": 46.69823455810547, |
| "learning_rate": 8.878934168603865e-06, |
| "loss": 6.7058, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2436329641066319, |
| "grad_norm": 23.228717803955078, |
| "learning_rate": 8.849815182302345e-06, |
| "loss": 7.8944, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.2464659055497323, |
| "grad_norm": 33.86655807495117, |
| "learning_rate": 8.820371914048153e-06, |
| "loss": 4.8468, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.24929884699283267, |
| "grad_norm": 25.241182327270508, |
| "learning_rate": 8.790606843939705e-06, |
| "loss": 3.709, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.25213178843593304, |
| "grad_norm": 25.6811580657959, |
| "learning_rate": 8.760522479181784e-06, |
| "loss": 4.844, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.2549647298790334, |
| "grad_norm": 43.929115295410156, |
| "learning_rate": 8.730121353874365e-06, |
| "loss": 6.7687, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.25779767132213377, |
| "grad_norm": 25.351106643676758, |
| "learning_rate": 8.69940602879915e-06, |
| "loss": 3.7733, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.26063061276523414, |
| "grad_norm": 45.543373107910156, |
| "learning_rate": 8.66837909120387e-06, |
| "loss": 6.5226, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2634635542083345, |
| "grad_norm": 35.3692626953125, |
| "learning_rate": 8.637043154584351e-06, |
| "loss": 7.782, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.2662964956514349, |
| "grad_norm": 23.175140380859375, |
| "learning_rate": 8.60540085846437e-06, |
| "loss": 3.7581, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.26912943709453524, |
| "grad_norm": 22.461284637451172, |
| "learning_rate": 8.573454868173325e-06, |
| "loss": 3.8114, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2719623785376356, |
| "grad_norm": 30.49061393737793, |
| "learning_rate": 8.541207874621718e-06, |
| "loss": 5.6752, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.274795319980736, |
| "grad_norm": 43.03390121459961, |
| "learning_rate": 8.508662594074496e-06, |
| "loss": 5.8459, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.27762826142383634, |
| "grad_norm": 144.74916076660156, |
| "learning_rate": 8.475821767922254e-06, |
| "loss": 6.1604, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.28046120286693677, |
| "grad_norm": 26.303081512451172, |
| "learning_rate": 8.442688162450315e-06, |
| "loss": 4.7632, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.28329414431003713, |
| "grad_norm": 28.066007614135742, |
| "learning_rate": 8.409264568605714e-06, |
| "loss": 8.0062, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2861270857531375, |
| "grad_norm": 23.32230567932129, |
| "learning_rate": 8.375553801762119e-06, |
| "loss": 3.9505, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.28896002719623787, |
| "grad_norm": 53.46368408203125, |
| "learning_rate": 8.34155870148267e-06, |
| "loss": 4.7727, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.29179296863933823, |
| "grad_norm": 28.557096481323242, |
| "learning_rate": 8.307282131280805e-06, |
| "loss": 3.876, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2946259100824386, |
| "grad_norm": 26.72674560546875, |
| "learning_rate": 8.272726978379049e-06, |
| "loss": 3.6362, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.29745885152553897, |
| "grad_norm": 33.64091110229492, |
| "learning_rate": 8.23789615346582e-06, |
| "loss": 4.8435, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.30029179296863934, |
| "grad_norm": 25.513519287109375, |
| "learning_rate": 8.202792590450246e-06, |
| "loss": 4.8615, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3031247344117397, |
| "grad_norm": 26.183082580566406, |
| "learning_rate": 8.167419246215042e-06, |
| "loss": 3.7897, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.30595767585484007, |
| "grad_norm": 37.91279983520508, |
| "learning_rate": 8.131779100367438e-06, |
| "loss": 3.8092, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.30879061729794044, |
| "grad_norm": 20.643037796020508, |
| "learning_rate": 8.09587515498819e-06, |
| "loss": 5.8217, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.3116235587410408, |
| "grad_norm": 24.009424209594727, |
| "learning_rate": 8.059710434378717e-06, |
| "loss": 4.6594, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3144565001841412, |
| "grad_norm": 26.472389221191406, |
| "learning_rate": 8.02328798480635e-06, |
| "loss": 6.5989, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.31728944162724154, |
| "grad_norm": 42.69245529174805, |
| "learning_rate": 7.986610874247736e-06, |
| "loss": 6.7969, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3201223830703419, |
| "grad_norm": 20.50579833984375, |
| "learning_rate": 7.949682192130407e-06, |
| "loss": 7.532, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.32295532451344233, |
| "grad_norm": 33.428890228271484, |
| "learning_rate": 7.912505049072559e-06, |
| "loss": 5.5098, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3257882659565427, |
| "grad_norm": 33.04521560668945, |
| "learning_rate": 7.875082576621024e-06, |
| "loss": 5.7852, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.32862120739964307, |
| "grad_norm": 29.72992706298828, |
| "learning_rate": 7.837417926987496e-06, |
| "loss": 3.8586, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.33145414884274343, |
| "grad_norm": 22.467132568359375, |
| "learning_rate": 7.799514272783014e-06, |
| "loss": 5.6287, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3342870902858438, |
| "grad_norm": 25.866819381713867, |
| "learning_rate": 7.761374806750712e-06, |
| "loss": 3.7462, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.33712003172894417, |
| "grad_norm": 21.801698684692383, |
| "learning_rate": 7.723002741496892e-06, |
| "loss": 5.6068, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.33995297317204454, |
| "grad_norm": 41.93526840209961, |
| "learning_rate": 7.684401309220416e-06, |
| "loss": 5.8573, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3427859146151449, |
| "grad_norm": 23.245235443115234, |
| "learning_rate": 7.645573761440444e-06, |
| "loss": 4.5851, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.34561885605824527, |
| "grad_norm": 24.502330780029297, |
| "learning_rate": 7.606523368722554e-06, |
| "loss": 6.4644, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.34845179750134564, |
| "grad_norm": 16.042354583740234, |
| "learning_rate": 7.567253420403249e-06, |
| "loss": 5.6877, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.351284738944446, |
| "grad_norm": 26.405628204345703, |
| "learning_rate": 7.527767224312883e-06, |
| "loss": 4.764, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.3541176803875464, |
| "grad_norm": 40.40938186645508, |
| "learning_rate": 7.488068106497035e-06, |
| "loss": 5.8002, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.35695062183064674, |
| "grad_norm": 25.338321685791016, |
| "learning_rate": 7.448159410936348e-06, |
| "loss": 5.5113, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.3597835632737471, |
| "grad_norm": 39.411128997802734, |
| "learning_rate": 7.4080444992648534e-06, |
| "loss": 5.5444, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.3626165047168475, |
| "grad_norm": 22.218137741088867, |
| "learning_rate": 7.3677267504868055e-06, |
| "loss": 4.4882, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.3654494461599479, |
| "grad_norm": 43.15862274169922, |
| "learning_rate": 7.327209560692063e-06, |
| "loss": 6.6107, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.36828238760304827, |
| "grad_norm": 20.51604652404785, |
| "learning_rate": 7.2864963427700284e-06, |
| "loss": 5.6351, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.37111532904614863, |
| "grad_norm": 21.392065048217773, |
| "learning_rate": 7.2455905261221585e-06, |
| "loss": 5.7755, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.373948270489249, |
| "grad_norm": 28.160072326660156, |
| "learning_rate": 7.204495556373106e-06, |
| "loss": 6.5779, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.37678121193234937, |
| "grad_norm": 41.15205764770508, |
| "learning_rate": 7.163214895080479e-06, |
| "loss": 6.4435, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.37961415337544974, |
| "grad_norm": 26.100757598876953, |
| "learning_rate": 7.121752019443266e-06, |
| "loss": 6.5864, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3824470948185501, |
| "grad_norm": 41.462791442871094, |
| "learning_rate": 7.080110422008937e-06, |
| "loss": 5.6488, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.38528003626165047, |
| "grad_norm": 22.369388580322266, |
| "learning_rate": 7.038293610379255e-06, |
| "loss": 4.4922, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.38811297770475084, |
| "grad_norm": 19.927444458007812, |
| "learning_rate": 6.996305106914824e-06, |
| "loss": 4.5791, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3909459191478512, |
| "grad_norm": 25.871030807495117, |
| "learning_rate": 6.954148448438389e-06, |
| "loss": 4.5578, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.3937788605909516, |
| "grad_norm": 18.420751571655273, |
| "learning_rate": 6.911827185936914e-06, |
| "loss": 4.6252, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.39661180203405194, |
| "grad_norm": 27.263010025024414, |
| "learning_rate": 6.869344884262473e-06, |
| "loss": 5.5235, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3994447434771523, |
| "grad_norm": 24.479764938354492, |
| "learning_rate": 6.8267051218319766e-06, |
| "loss": 5.6514, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.4022776849202527, |
| "grad_norm": 23.21695899963379, |
| "learning_rate": 6.7839114903257404e-06, |
| "loss": 7.5326, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.40511062636335304, |
| "grad_norm": 21.287368774414062, |
| "learning_rate": 6.74096759438496e-06, |
| "loss": 3.895, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.40794356780645347, |
| "grad_norm": 25.839454650878906, |
| "learning_rate": 6.697877051308067e-06, |
| "loss": 6.3928, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.41077650924955383, |
| "grad_norm": 22.896682739257812, |
| "learning_rate": 6.654643490746042e-06, |
| "loss": 4.5232, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4136094506926542, |
| "grad_norm": 25.252422332763672, |
| "learning_rate": 6.611270554396676e-06, |
| "loss": 6.5998, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.41644239213575457, |
| "grad_norm": 24.610836029052734, |
| "learning_rate": 6.567761895697816e-06, |
| "loss": 4.6121, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.41927533357885494, |
| "grad_norm": 19.092580795288086, |
| "learning_rate": 6.524121179519625e-06, |
| "loss": 3.6029, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.4221082750219553, |
| "grad_norm": 22.915136337280273, |
| "learning_rate": 6.480352081855884e-06, |
| "loss": 3.6352, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.42494121646505567, |
| "grad_norm": 29.044233322143555, |
| "learning_rate": 6.436458289514342e-06, |
| "loss": 4.6979, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.42777415790815604, |
| "grad_norm": 39.80937194824219, |
| "learning_rate": 6.392443499806175e-06, |
| "loss": 4.6673, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.4306070993512564, |
| "grad_norm": 22.760765075683594, |
| "learning_rate": 6.348311420234542e-06, |
| "loss": 4.6801, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.4334400407943568, |
| "grad_norm": 21.216337203979492, |
| "learning_rate": 6.304065768182295e-06, |
| "loss": 5.7451, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.43627298223745714, |
| "grad_norm": 20.622943878173828, |
| "learning_rate": 6.259710270598848e-06, |
| "loss": 5.6216, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4391059236805575, |
| "grad_norm": 40.299949645996094, |
| "learning_rate": 6.215248663686251e-06, |
| "loss": 6.5508, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.4419388651236579, |
| "grad_norm": 28.81671905517578, |
| "learning_rate": 6.170684692584469e-06, |
| "loss": 3.5039, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.44477180656675824, |
| "grad_norm": 43.010169982910156, |
| "learning_rate": 6.126022111055929e-06, |
| "loss": 6.4925, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.4476047480098586, |
| "grad_norm": 23.351240158081055, |
| "learning_rate": 6.081264681169317e-06, |
| "loss": 3.4456, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.45043768945295903, |
| "grad_norm": 40.19292449951172, |
| "learning_rate": 6.0364161729826905e-06, |
| "loss": 4.4953, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.4532706308960594, |
| "grad_norm": 25.595369338989258, |
| "learning_rate": 5.991480364225924e-06, |
| "loss": 6.2619, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.45610357233915977, |
| "grad_norm": 32.5233268737793, |
| "learning_rate": 5.946461039982485e-06, |
| "loss": 5.5702, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.45893651378226014, |
| "grad_norm": 25.565658569335938, |
| "learning_rate": 5.901361992370614e-06, |
| "loss": 3.5389, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.4617694552253605, |
| "grad_norm": 21.443763732910156, |
| "learning_rate": 5.856187020223901e-06, |
| "loss": 4.6532, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.46460239666846087, |
| "grad_norm": 26.775903701782227, |
| "learning_rate": 5.8109399287712935e-06, |
| "loss": 5.7745, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.46743533811156124, |
| "grad_norm": 20.02845001220703, |
| "learning_rate": 5.765624529316573e-06, |
| "loss": 5.506, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.4702682795546616, |
| "grad_norm": 22.177770614624023, |
| "learning_rate": 5.7202446389173225e-06, |
| "loss": 3.5255, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.473101220997762, |
| "grad_norm": 27.885957717895508, |
| "learning_rate": 5.674804080063392e-06, |
| "loss": 3.5088, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.47593416244086234, |
| "grad_norm": 33.34544372558594, |
| "learning_rate": 5.62930668035493e-06, |
| "loss": 4.4746, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.4787671038839627, |
| "grad_norm": 24.865848541259766, |
| "learning_rate": 5.5837562721799644e-06, |
| "loss": 6.4182, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4816000453270631, |
| "grad_norm": 20.06027603149414, |
| "learning_rate": 5.538156692391592e-06, |
| "loss": 3.499, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.48443298677016344, |
| "grad_norm": 28.240829467773438, |
| "learning_rate": 5.4925117819847925e-06, |
| "loss": 5.4651, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.4872659282132638, |
| "grad_norm": 39.07200241088867, |
| "learning_rate": 5.44682538577288e-06, |
| "loss": 4.7134, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.4900988696563642, |
| "grad_norm": 31.383825302124023, |
| "learning_rate": 5.4011013520636466e-06, |
| "loss": 4.4705, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.4929318110994646, |
| "grad_norm": 40.832984924316406, |
| "learning_rate": 5.355343532335215e-06, |
| "loss": 7.2469, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.49576475254256497, |
| "grad_norm": 20.33405303955078, |
| "learning_rate": 5.309555780911604e-06, |
| "loss": 5.4482, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.49859769398566534, |
| "grad_norm": 22.8585262298584, |
| "learning_rate": 5.263741954638072e-06, |
| "loss": 4.4573, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5014306354287656, |
| "grad_norm": 42.46244430541992, |
| "learning_rate": 5.217905912556248e-06, |
| "loss": 5.5277, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.5042635768718661, |
| "grad_norm": 21.30562973022461, |
| "learning_rate": 5.172051515579065e-06, |
| "loss": 5.4764, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5070965183149664, |
| "grad_norm": 18.9359130859375, |
| "learning_rate": 5.126182626165547e-06, |
| "loss": 6.4232, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5099294597580668, |
| "grad_norm": 33.8026123046875, |
| "learning_rate": 5.080303107995461e-06, |
| "loss": 6.6042, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.5127624012011671, |
| "grad_norm": 40.52323913574219, |
| "learning_rate": 5.034416825643868e-06, |
| "loss": 5.5848, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.5155953426442675, |
| "grad_norm": 27.342744827270508, |
| "learning_rate": 4.988527644255591e-06, |
| "loss": 5.2504, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.518428284087368, |
| "grad_norm": 19.118297576904297, |
| "learning_rate": 4.942639429219661e-06, |
| "loss": 4.5668, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5212612255304683, |
| "grad_norm": 41.146236419677734, |
| "learning_rate": 4.896756045843698e-06, |
| "loss": 6.0831, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.5240941669735687, |
| "grad_norm": 19.685937881469727, |
| "learning_rate": 4.85088135902834e-06, |
| "loss": 5.5025, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.526927108416669, |
| "grad_norm": 22.97096061706543, |
| "learning_rate": 4.805019232941689e-06, |
| "loss": 4.4157, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5297600498597694, |
| "grad_norm": 27.627784729003906, |
| "learning_rate": 4.7591735306938144e-06, |
| "loss": 4.3861, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.5325929913028697, |
| "grad_norm": 25.308032989501953, |
| "learning_rate": 4.713348114011357e-06, |
| "loss": 7.2963, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.5354259327459702, |
| "grad_norm": 19.11351203918457, |
| "learning_rate": 4.667546842912239e-06, |
| "loss": 4.2907, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.5382588741890705, |
| "grad_norm": 28.81739044189453, |
| "learning_rate": 4.6217735753805235e-06, |
| "loss": 4.5385, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5410918156321709, |
| "grad_norm": 20.510547637939453, |
| "learning_rate": 4.576032167041452e-06, |
| "loss": 7.2043, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.5439247570752712, |
| "grad_norm": 26.19765281677246, |
| "learning_rate": 4.530326470836659e-06, |
| "loss": 4.3494, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.5467576985183716, |
| "grad_norm": 25.779802322387695, |
| "learning_rate": 4.484660336699638e-06, |
| "loss": 5.3226, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.549590639961472, |
| "grad_norm": 26.97022247314453, |
| "learning_rate": 4.439037611231448e-06, |
| "loss": 6.5069, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.5524235814045724, |
| "grad_norm": 26.32407569885254, |
| "learning_rate": 4.393462137376696e-06, |
| "loss": 3.545, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5552565228476727, |
| "grad_norm": 30.962535858154297, |
| "learning_rate": 4.347937754099841e-06, |
| "loss": 4.4292, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.5580894642907731, |
| "grad_norm": 38.1851921081543, |
| "learning_rate": 4.302468296061823e-06, |
| "loss": 4.3079, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.5609224057338735, |
| "grad_norm": 21.038278579711914, |
| "learning_rate": 4.257057593297055e-06, |
| "loss": 4.5294, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.5637553471769738, |
| "grad_norm": 20.618942260742188, |
| "learning_rate": 4.211709470890815e-06, |
| "loss": 7.2449, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5665882886200743, |
| "grad_norm": 21.230995178222656, |
| "learning_rate": 4.166427748657034e-06, |
| "loss": 4.3681, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5694212300631746, |
| "grad_norm": 20.577428817749023, |
| "learning_rate": 4.121216240816559e-06, |
| "loss": 5.3925, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.572254171506275, |
| "grad_norm": 21.1496524810791, |
| "learning_rate": 4.076078755675852e-06, |
| "loss": 5.0495, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.5750871129493753, |
| "grad_norm": 26.215744018554688, |
| "learning_rate": 4.0310190953062155e-06, |
| "loss": 5.5832, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.5779200543924757, |
| "grad_norm": 33.668174743652344, |
| "learning_rate": 3.986041055223526e-06, |
| "loss": 5.1639, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.580752995835576, |
| "grad_norm": 28.786453247070312, |
| "learning_rate": 3.9411484240685315e-06, |
| "loss": 3.3797, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.5835859372786765, |
| "grad_norm": 24.81963348388672, |
| "learning_rate": 3.8963449832877164e-06, |
| "loss": 6.3189, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.5864188787217768, |
| "grad_norm": 25.143753051757812, |
| "learning_rate": 3.851634506814782e-06, |
| "loss": 6.463, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.5892518201648772, |
| "grad_norm": 39.29959487915039, |
| "learning_rate": 3.8070207607527587e-06, |
| "loss": 7.5255, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.5920847616079775, |
| "grad_norm": 19.030284881591797, |
| "learning_rate": 3.7625075030567683e-06, |
| "loss": 4.2513, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.5949177030510779, |
| "grad_norm": 24.105989456176758, |
| "learning_rate": 3.718098483217484e-06, |
| "loss": 3.3586, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5977506444941783, |
| "grad_norm": 38.95778274536133, |
| "learning_rate": 3.673797441945304e-06, |
| "loss": 4.2773, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.6005835859372787, |
| "grad_norm": 42.26526641845703, |
| "learning_rate": 3.629608110855248e-06, |
| "loss": 5.2586, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.6034165273803791, |
| "grad_norm": 21.60348892211914, |
| "learning_rate": 3.585534212152643e-06, |
| "loss": 4.4408, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.6062494688234794, |
| "grad_norm": 39.41062545776367, |
| "learning_rate": 3.5415794583195846e-06, |
| "loss": 4.5132, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.6090824102665798, |
| "grad_norm": 32.25893783569336, |
| "learning_rate": 3.497747551802221e-06, |
| "loss": 8.4284, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.6119153517096801, |
| "grad_norm": 34.556373596191406, |
| "learning_rate": 3.4540421846988916e-06, |
| "loss": 6.3801, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.6147482931527806, |
| "grad_norm": 27.99374771118164, |
| "learning_rate": 3.4104670384491234e-06, |
| "loss": 5.5573, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.6175812345958809, |
| "grad_norm": 23.997901916503906, |
| "learning_rate": 3.367025783523534e-06, |
| "loss": 4.2779, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.6204141760389813, |
| "grad_norm": 41.76970291137695, |
| "learning_rate": 3.3237220791146597e-06, |
| "loss": 5.241, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.6232471174820816, |
| "grad_norm": 27.922670364379883, |
| "learning_rate": 3.2805595728287255e-06, |
| "loss": 4.2649, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.626080058925182, |
| "grad_norm": 33.54890060424805, |
| "learning_rate": 3.2375419003783957e-06, |
| "loss": 6.0635, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.6289130003682823, |
| "grad_norm": 21.987178802490234, |
| "learning_rate": 3.1946726852765325e-06, |
| "loss": 5.1542, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.6317459418113828, |
| "grad_norm": 35.2348518371582, |
| "learning_rate": 3.1519555385309685e-06, |
| "loss": 4.2332, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.6345788832544831, |
| "grad_norm": 39.060691833496094, |
| "learning_rate": 3.1093940583403447e-06, |
| "loss": 8.0693, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.6374118246975835, |
| "grad_norm": 20.76451873779297, |
| "learning_rate": 3.066991829791024e-06, |
| "loss": 5.3108, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.6402447661406838, |
| "grad_norm": 40.92884826660156, |
| "learning_rate": 3.024752424555105e-06, |
| "loss": 4.2548, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.6430777075837842, |
| "grad_norm": 24.043121337890625, |
| "learning_rate": 2.982679400589569e-06, |
| "loss": 5.3648, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.6459106490268847, |
| "grad_norm": 22.929412841796875, |
| "learning_rate": 2.9407763018365854e-06, |
| "loss": 4.2817, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.648743590469985, |
| "grad_norm": 36.0571174621582, |
| "learning_rate": 2.899046657924992e-06, |
| "loss": 7.9167, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.6515765319130854, |
| "grad_norm": 23.849647521972656, |
| "learning_rate": 2.8574939838729844e-06, |
| "loss": 4.44, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6544094733561857, |
| "grad_norm": 42.65750503540039, |
| "learning_rate": 2.8161217797920304e-06, |
| "loss": 5.6655, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.6572424147992861, |
| "grad_norm": 23.45660400390625, |
| "learning_rate": 2.774933530592054e-06, |
| "loss": 5.4841, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.6600753562423864, |
| "grad_norm": 21.22451400756836, |
| "learning_rate": 2.733932705687883e-06, |
| "loss": 3.3468, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.6629082976854869, |
| "grad_norm": 37.178993225097656, |
| "learning_rate": 2.693122758707013e-06, |
| "loss": 5.1606, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.6657412391285872, |
| "grad_norm": 24.34912109375, |
| "learning_rate": 2.652507127198689e-06, |
| "loss": 7.2961, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.6685741805716876, |
| "grad_norm": 40.61592483520508, |
| "learning_rate": 2.612089232344371e-06, |
| "loss": 6.3695, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.6714071220147879, |
| "grad_norm": 20.37811279296875, |
| "learning_rate": 2.571872478669528e-06, |
| "loss": 3.3039, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.6742400634578883, |
| "grad_norm": 25.745912551879883, |
| "learning_rate": 2.5318602537568904e-06, |
| "loss": 4.2973, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.6770730049009887, |
| "grad_norm": 22.395126342773438, |
| "learning_rate": 2.4920559279610886e-06, |
| "loss": 4.1162, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.6799059463440891, |
| "grad_norm": 34.32621383666992, |
| "learning_rate": 2.452462854124758e-06, |
| "loss": 4.1658, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6827388877871894, |
| "grad_norm": 39.03499984741211, |
| "learning_rate": 2.413084367296127e-06, |
| "loss": 6.3083, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.6855718292302898, |
| "grad_norm": 47.788394927978516, |
| "learning_rate": 2.373923784448089e-06, |
| "loss": 4.2861, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.6884047706733902, |
| "grad_norm": 26.90192413330078, |
| "learning_rate": 2.3349844041988044e-06, |
| "loss": 4.3008, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.6912377121164905, |
| "grad_norm": 22.178869247436523, |
| "learning_rate": 2.296269506533846e-06, |
| "loss": 5.2767, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.694070653559591, |
| "grad_norm": 21.529335021972656, |
| "learning_rate": 2.2577823525299205e-06, |
| "loss": 7.1097, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.6969035950026913, |
| "grad_norm": 20.215675354003906, |
| "learning_rate": 2.2195261840801757e-06, |
| "loss": 7.1815, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.6997365364457917, |
| "grad_norm": 21.300861358642578, |
| "learning_rate": 2.18150422362112e-06, |
| "loss": 6.9142, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.702569477888892, |
| "grad_norm": 30.098453521728516, |
| "learning_rate": 2.1437196738611958e-06, |
| "loss": 4.4774, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.7054024193319924, |
| "grad_norm": 25.317970275878906, |
| "learning_rate": 2.1061757175110024e-06, |
| "loss": 4.4772, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.7082353607750927, |
| "grad_norm": 30.881681442260742, |
| "learning_rate": 2.0688755170152e-06, |
| "loss": 4.2296, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.7110683022181932, |
| "grad_norm": 23.95901107788086, |
| "learning_rate": 2.031822214286134e-06, |
| "loss": 5.0405, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.7139012436612935, |
| "grad_norm": 41.624210357666016, |
| "learning_rate": 1.9950189304391855e-06, |
| "loss": 6.3358, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.7167341851043939, |
| "grad_norm": 34.76797866821289, |
| "learning_rate": 1.958468765529853e-06, |
| "loss": 5.061, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.7195671265474942, |
| "grad_norm": 20.406444549560547, |
| "learning_rate": 1.9221747982926493e-06, |
| "loss": 5.1701, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.7224000679905946, |
| "grad_norm": 24.22311782836914, |
| "learning_rate": 1.8861400858817508e-06, |
| "loss": 4.2621, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.725233009433695, |
| "grad_norm": 37.65345001220703, |
| "learning_rate": 1.8503676636134882e-06, |
| "loss": 6.1661, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.7280659508767954, |
| "grad_norm": 20.813777923583984, |
| "learning_rate": 1.81486054471068e-06, |
| "loss": 5.3045, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.7308988923198958, |
| "grad_norm": 39.82976150512695, |
| "learning_rate": 1.7796217200488114e-06, |
| "loss": 6.4348, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.7337318337629961, |
| "grad_norm": 25.495925903320312, |
| "learning_rate": 1.7446541579041048e-06, |
| "loss": 4.2349, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.7365647752060965, |
| "grad_norm": 38.05914306640625, |
| "learning_rate": 1.7099608037034953e-06, |
| "loss": 5.2485, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7393977166491968, |
| "grad_norm": 22.876413345336914, |
| "learning_rate": 1.6755445797765286e-06, |
| "loss": 4.263, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.7422306580922973, |
| "grad_norm": 22.402753829956055, |
| "learning_rate": 1.6414083851091973e-06, |
| "loss": 4.3153, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.7450635995353976, |
| "grad_norm": 20.86781883239746, |
| "learning_rate": 1.6075550950997592e-06, |
| "loss": 4.4095, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.747896540978498, |
| "grad_norm": 39.51744842529297, |
| "learning_rate": 1.5739875613165283e-06, |
| "loss": 6.2356, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.7507294824215983, |
| "grad_norm": 26.651187896728516, |
| "learning_rate": 1.5407086112576813e-06, |
| "loss": 4.1033, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.7535624238646987, |
| "grad_norm": 46.947757720947266, |
| "learning_rate": 1.5077210481130815e-06, |
| "loss": 8.1815, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.756395365307799, |
| "grad_norm": 41.29295349121094, |
| "learning_rate": 1.475027650528168e-06, |
| "loss": 6.1637, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.7592283067508995, |
| "grad_norm": 39.40729522705078, |
| "learning_rate": 1.442631172369896e-06, |
| "loss": 7.1273, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.7620612481939998, |
| "grad_norm": 39.58256912231445, |
| "learning_rate": 1.4105343424947654e-06, |
| "loss": 5.187, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.7648941896371002, |
| "grad_norm": 39.836185455322266, |
| "learning_rate": 1.378739864518971e-06, |
| "loss": 3.8889, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7677271310802005, |
| "grad_norm": 40.20053482055664, |
| "learning_rate": 1.3472504165906614e-06, |
| "loss": 5.3001, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.7705600725233009, |
| "grad_norm": 23.571002960205078, |
| "learning_rate": 1.3160686511643505e-06, |
| "loss": 4.0238, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.7733930139664014, |
| "grad_norm": 23.623443603515625, |
| "learning_rate": 1.2851971947774987e-06, |
| "loss": 5.1091, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.7762259554095017, |
| "grad_norm": 31.367658615112305, |
| "learning_rate": 1.2546386478292604e-06, |
| "loss": 4.1048, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.7790588968526021, |
| "grad_norm": 39.296226501464844, |
| "learning_rate": 1.2243955843614558e-06, |
| "loss": 4.271, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.7818918382957024, |
| "grad_norm": 28.69118881225586, |
| "learning_rate": 1.1944705518417466e-06, |
| "loss": 4.0739, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.7847247797388028, |
| "grad_norm": 32.27414321899414, |
| "learning_rate": 1.1648660709490538e-06, |
| "loss": 5.1998, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.7875577211819031, |
| "grad_norm": 24.473217010498047, |
| "learning_rate": 1.135584635361232e-06, |
| "loss": 4.9601, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.7903906626250036, |
| "grad_norm": 27.856367111206055, |
| "learning_rate": 1.1066287115450242e-06, |
| "loss": 4.9381, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.7932236040681039, |
| "grad_norm": 17.640838623046875, |
| "learning_rate": 1.0780007385483005e-06, |
| "loss": 4.2145, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7960565455112043, |
| "grad_norm": 34.375091552734375, |
| "learning_rate": 1.0497031277946062e-06, |
| "loss": 8.3028, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.7988894869543046, |
| "grad_norm": 23.346403121948242, |
| "learning_rate": 1.0217382628800465e-06, |
| "loss": 6.9337, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.801722428397405, |
| "grad_norm": 25.259016036987305, |
| "learning_rate": 9.94108499372507e-07, |
| "loss": 3.1855, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.8045553698405054, |
| "grad_norm": 33.022727966308594, |
| "learning_rate": 9.668161646132296e-07, |
| "loss": 5.2408, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.8073883112836058, |
| "grad_norm": 30.2951717376709, |
| "learning_rate": 9.398635575207854e-07, |
| "loss": 3.1828, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.8102212527267061, |
| "grad_norm": 51.273616790771484, |
| "learning_rate": 9.132529483974217e-07, |
| "loss": 5.0485, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.8130541941698065, |
| "grad_norm": 39.878597259521484, |
| "learning_rate": 8.869865787378262e-07, |
| "loss": 6.3068, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.8158871356129069, |
| "grad_norm": 21.064966201782227, |
| "learning_rate": 8.61066661040324e-07, |
| "loss": 3.3587, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.8187200770560072, |
| "grad_norm": 22.18380355834961, |
| "learning_rate": 8.354953786205133e-07, |
| "loss": 4.242, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.8215530184991077, |
| "grad_norm": 41.24013137817383, |
| "learning_rate": 8.102748854273468e-07, |
| "loss": 4.1017, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.824385959942208, |
| "grad_norm": 23.30076789855957, |
| "learning_rate": 7.854073058617112e-07, |
| "loss": 5.3308, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.8272189013853084, |
| "grad_norm": 21.42025375366211, |
| "learning_rate": 7.60894734597476e-07, |
| "loss": 8.113, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.8300518428284087, |
| "grad_norm": 18.741073608398438, |
| "learning_rate": 7.367392364050485e-07, |
| "loss": 5.1848, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.8328847842715091, |
| "grad_norm": 23.857194900512695, |
| "learning_rate": 7.129428459774618e-07, |
| "loss": 7.1581, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.8357177257146094, |
| "grad_norm": 28.530094146728516, |
| "learning_rate": 6.895075677589791e-07, |
| "loss": 6.2661, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.8385506671577099, |
| "grad_norm": 40.354949951171875, |
| "learning_rate": 6.664353757762515e-07, |
| "loss": 4.2647, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.8413836086008102, |
| "grad_norm": 21.305288314819336, |
| "learning_rate": 6.437282134720479e-07, |
| "loss": 4.9122, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.8442165500439106, |
| "grad_norm": 40.32603454589844, |
| "learning_rate": 6.21387993541544e-07, |
| "loss": 6.2095, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.8470494914870109, |
| "grad_norm": 39.293067932128906, |
| "learning_rate": 5.994165977712175e-07, |
| "loss": 4.1365, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.8498824329301113, |
| "grad_norm": 25.006118774414062, |
| "learning_rate": 5.778158768803294e-07, |
| "loss": 3.4504, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8527153743732117, |
| "grad_norm": 38.37477111816406, |
| "learning_rate": 5.565876503650442e-07, |
| "loss": 4.2214, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.8555483158163121, |
| "grad_norm": 40.921207427978516, |
| "learning_rate": 5.357337063451601e-07, |
| "loss": 5.1103, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.8583812572594125, |
| "grad_norm": 26.225017547607422, |
| "learning_rate": 5.152558014134906e-07, |
| "loss": 5.9913, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.8612141987025128, |
| "grad_norm": 22.678930282592773, |
| "learning_rate": 4.951556604879049e-07, |
| "loss": 4.3731, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.8640471401456132, |
| "grad_norm": 29.937393188476562, |
| "learning_rate": 4.754349766660299e-07, |
| "loss": 4.2301, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.8668800815887135, |
| "grad_norm": 26.465091705322266, |
| "learning_rate": 4.5609541108263377e-07, |
| "loss": 6.0091, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.869713023031814, |
| "grad_norm": 25.58681297302246, |
| "learning_rate": 4.3713859276971026e-07, |
| "loss": 6.979, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.8725459644749143, |
| "grad_norm": 22.564706802368164, |
| "learning_rate": 4.1856611851925245e-07, |
| "loss": 5.0316, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.8753789059180147, |
| "grad_norm": 40.97758102416992, |
| "learning_rate": 4.003795527487508e-07, |
| "loss": 8.964, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.878211847361115, |
| "grad_norm": 32.80113220214844, |
| "learning_rate": 3.8258042736942446e-07, |
| "loss": 3.1517, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8810447888042154, |
| "grad_norm": 30.950176239013672, |
| "learning_rate": 3.651702416571762e-07, |
| "loss": 4.345, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.8838777302473158, |
| "grad_norm": 21.242090225219727, |
| "learning_rate": 3.481504621263049e-07, |
| "loss": 6.1642, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.8867106716904162, |
| "grad_norm": 21.0790958404541, |
| "learning_rate": 3.315225224059809e-07, |
| "loss": 5.1734, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.8895436131335165, |
| "grad_norm": 41.8050537109375, |
| "learning_rate": 3.1528782311948226e-07, |
| "loss": 5.0608, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.8923765545766169, |
| "grad_norm": 23.527942657470703, |
| "learning_rate": 2.9944773176621756e-07, |
| "loss": 5.9961, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.8952094960197172, |
| "grad_norm": 28.754201889038086, |
| "learning_rate": 2.840035826065368e-07, |
| "loss": 3.8781, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.8980424374628176, |
| "grad_norm": 26.580829620361328, |
| "learning_rate": 2.689566765493451e-07, |
| "loss": 4.1426, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.9008753789059181, |
| "grad_norm": 18.550945281982422, |
| "learning_rate": 2.5430828104251684e-07, |
| "loss": 4.9139, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.9037083203490184, |
| "grad_norm": 20.301895141601562, |
| "learning_rate": 2.4005962996614174e-07, |
| "loss": 3.1654, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.9065412617921188, |
| "grad_norm": 37.804969787597656, |
| "learning_rate": 2.2621192352858702e-07, |
| "loss": 5.0736, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.9093742032352191, |
| "grad_norm": 29.193897247314453, |
| "learning_rate": 2.1276632816540077e-07, |
| "loss": 5.2175, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.9122071446783195, |
| "grad_norm": 23.378320693969727, |
| "learning_rate": 1.9972397644106023e-07, |
| "loss": 5.2508, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.9150400861214198, |
| "grad_norm": 18.92923355102539, |
| "learning_rate": 1.870859669535724e-07, |
| "loss": 5.2554, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.9178730275645203, |
| "grad_norm": 18.03963279724121, |
| "learning_rate": 1.7485336424193366e-07, |
| "loss": 5.1253, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.9207059690076206, |
| "grad_norm": 38.961456298828125, |
| "learning_rate": 1.6302719869646432e-07, |
| "loss": 5.0659, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.923538910450721, |
| "grad_norm": 20.624431610107422, |
| "learning_rate": 1.5160846647201132e-07, |
| "loss": 4.1776, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.9263718518938213, |
| "grad_norm": 21.755279541015625, |
| "learning_rate": 1.4059812940404093e-07, |
| "loss": 3.142, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.9292047933369217, |
| "grad_norm": 20.076051712036133, |
| "learning_rate": 1.2999711492762079e-07, |
| "loss": 5.2161, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.932037734780022, |
| "grad_norm": 25.841142654418945, |
| "learning_rate": 1.198063159992996e-07, |
| "loss": 5.3184, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.9348706762231225, |
| "grad_norm": 35.23577117919922, |
| "learning_rate": 1.1002659102188784e-07, |
| "loss": 3.3098, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.9377036176662228, |
| "grad_norm": 20.789785385131836, |
| "learning_rate": 1.006587637721551e-07, |
| "loss": 3.1742, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.9405365591093232, |
| "grad_norm": 40.675296783447266, |
| "learning_rate": 9.170362333143778e-08, |
| "loss": 7.3385, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.9433695005524236, |
| "grad_norm": 24.71589469909668, |
| "learning_rate": 8.316192401917667e-08, |
| "loss": 5.3478, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.946202441995524, |
| "grad_norm": 38.48093032836914, |
| "learning_rate": 7.503438532937169e-08, |
| "loss": 6.069, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.9490353834386244, |
| "grad_norm": 26.636127471923828, |
| "learning_rate": 6.732169186998372e-08, |
| "loss": 4.1179, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.9518683248817247, |
| "grad_norm": 42.95631790161133, |
| "learning_rate": 6.002449330526294e-08, |
| "loss": 6.9268, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.9547012663248251, |
| "grad_norm": 20.64594268798828, |
| "learning_rate": 5.31434043010276e-08, |
| "loss": 3.1192, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.9575342077679254, |
| "grad_norm": 44.144744873046875, |
| "learning_rate": 4.667900447288931e-08, |
| "loss": 6.0163, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.9603671492110258, |
| "grad_norm": 41.0361442565918, |
| "learning_rate": 4.0631838337427675e-08, |
| "loss": 5.265, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.9632000906541262, |
| "grad_norm": 28.538305282592773, |
| "learning_rate": 3.500241526632753e-08, |
| "loss": 5.07, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.9660330320972266, |
| "grad_norm": 24.23358154296875, |
| "learning_rate": 2.979120944346936e-08, |
| "loss": 4.3623, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.9688659735403269, |
| "grad_norm": 26.74643898010254, |
| "learning_rate": 2.499865982499128e-08, |
| "loss": 3.3373, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.9716989149834273, |
| "grad_norm": 22.635358810424805, |
| "learning_rate": 2.0625170102309687e-08, |
| "loss": 4.2529, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.9745318564265276, |
| "grad_norm": 25.663415908813477, |
| "learning_rate": 1.6671108668119828e-08, |
| "loss": 4.1368, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.977364797869628, |
| "grad_norm": 28.40155029296875, |
| "learning_rate": 1.3136808585361149e-08, |
| "loss": 5.9535, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.9801977393127284, |
| "grad_norm": 21.457969665527344, |
| "learning_rate": 1.0022567559164198e-08, |
| "loss": 6.1661, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.9830306807558288, |
| "grad_norm": 26.77224349975586, |
| "learning_rate": 7.328647911774567e-09, |
| "loss": 4.1479, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.9858636221989292, |
| "grad_norm": 34.7308464050293, |
| "learning_rate": 5.055276560454459e-09, |
| "loss": 3.216, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.9886965636420295, |
| "grad_norm": 27.362573623657227, |
| "learning_rate": 3.202644998370752e-09, |
| "loss": 5.132, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.9915295050851299, |
| "grad_norm": 33.32588195800781, |
| "learning_rate": 1.770909278464017e-09, |
| "loss": 6.1225, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.9943624465282302, |
| "grad_norm": 19.929155349731445, |
| "learning_rate": 7.601900003051388e-10, |
| "loss": 3.1458, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.9971953879713307, |
| "grad_norm": 26.834556579589844, |
| "learning_rate": 1.7057229993344693e-10, |
| "loss": 7.2351, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.9997450352701209, |
| "step": 3529, |
| "total_flos": 1.1540349697243742e+19, |
| "train_loss": 5.616373471816494, |
| "train_runtime": 84659.9545, |
| "train_samples_per_second": 2.502, |
| "train_steps_per_second": 0.042 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3529, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1540349697243742e+19, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|