| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999709884243814, |
| "eval_steps": 1000, |
| "global_step": 17234, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000580231512373437, |
| "grad_norm": 4.4758100509643555, |
| "learning_rate": 6.264501160092807e-06, |
| "loss": 10.4749, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001160463024746874, |
| "grad_norm": 1.6773627996444702, |
| "learning_rate": 1.322505800464037e-05, |
| "loss": 9.159, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.001740694537120311, |
| "grad_norm": 1.5999170541763306, |
| "learning_rate": 2.018561484918793e-05, |
| "loss": 8.8189, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.002320926049493748, |
| "grad_norm": 1.9260104894638062, |
| "learning_rate": 2.7146171693735496e-05, |
| "loss": 8.4574, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.002901157561867185, |
| "grad_norm": 2.173593282699585, |
| "learning_rate": 3.410672853828306e-05, |
| "loss": 8.0835, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.003481389074240622, |
| "grad_norm": 1.5830281972885132, |
| "learning_rate": 4.1067285382830626e-05, |
| "loss": 7.7376, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.004061620586614059, |
| "grad_norm": 2.772728443145752, |
| "learning_rate": 4.802784222737819e-05, |
| "loss": 7.4168, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.004641852098987496, |
| "grad_norm": 1.511775016784668, |
| "learning_rate": 5.498839907192575e-05, |
| "loss": 7.1442, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.005222083611360933, |
| "grad_norm": 1.9058183431625366, |
| "learning_rate": 6.194895591647331e-05, |
| "loss": 6.9324, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.00580231512373437, |
| "grad_norm": 1.6976985931396484, |
| "learning_rate": 6.890951276102087e-05, |
| "loss": 6.8005, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.006382546636107807, |
| "grad_norm": 1.4346176385879517, |
| "learning_rate": 7.587006960556844e-05, |
| "loss": 6.6814, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.006962778148481244, |
| "grad_norm": 1.0364270210266113, |
| "learning_rate": 8.283062645011599e-05, |
| "loss": 6.5547, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.007543009660854681, |
| "grad_norm": 0.6528536677360535, |
| "learning_rate": 8.979118329466357e-05, |
| "loss": 6.4482, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.008123241173228117, |
| "grad_norm": 1.1468390226364136, |
| "learning_rate": 9.675174013921112e-05, |
| "loss": 6.3518, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.008703472685601555, |
| "grad_norm": 0.6249582171440125, |
| "learning_rate": 0.0001037122969837587, |
| "loss": 6.2749, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.009283704197974993, |
| "grad_norm": 0.9577043652534485, |
| "learning_rate": 0.00011067285382830626, |
| "loss": 6.2026, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.009863935710348428, |
| "grad_norm": 1.156731367111206, |
| "learning_rate": 0.00011763341067285381, |
| "loss": 6.1482, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.010444167222721866, |
| "grad_norm": 0.7919487357139587, |
| "learning_rate": 0.0001245939675174014, |
| "loss": 6.0907, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.011024398735095304, |
| "grad_norm": 0.5902596712112427, |
| "learning_rate": 0.00013155452436194894, |
| "loss": 6.0469, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.01160463024746874, |
| "grad_norm": 0.9712298512458801, |
| "learning_rate": 0.00013851508120649652, |
| "loss": 6.0128, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.012184861759842177, |
| "grad_norm": 0.6487208008766174, |
| "learning_rate": 0.00014547563805104407, |
| "loss": 5.949, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.012765093272215615, |
| "grad_norm": 0.6659431457519531, |
| "learning_rate": 0.00015243619489559162, |
| "loss": 5.9004, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01334532478458905, |
| "grad_norm": 0.9973188042640686, |
| "learning_rate": 0.0001593967517401392, |
| "loss": 5.8727, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.013925556296962488, |
| "grad_norm": 0.592413067817688, |
| "learning_rate": 0.00016635730858468675, |
| "loss": 5.8594, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.014505787809335926, |
| "grad_norm": 0.6143619418144226, |
| "learning_rate": 0.00017331786542923433, |
| "loss": 5.8114, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.015086019321709361, |
| "grad_norm": 0.5780689120292664, |
| "learning_rate": 0.00018027842227378188, |
| "loss": 5.7829, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0156662508340828, |
| "grad_norm": 0.41307076811790466, |
| "learning_rate": 0.00018723897911832944, |
| "loss": 5.7197, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.016246482346456235, |
| "grad_norm": 0.6880993247032166, |
| "learning_rate": 0.00019419953596287701, |
| "loss": 5.7168, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.016826713858829674, |
| "grad_norm": 0.4273562431335449, |
| "learning_rate": 0.0002011600928074246, |
| "loss": 5.6639, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.01740694537120311, |
| "grad_norm": 0.5025382041931152, |
| "learning_rate": 0.00020812064965197212, |
| "loss": 5.6305, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.017987176883576546, |
| "grad_norm": 0.7127647995948792, |
| "learning_rate": 0.0002150812064965197, |
| "loss": 5.5991, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.018567408395949985, |
| "grad_norm": 0.6494776010513306, |
| "learning_rate": 0.00022204176334106727, |
| "loss": 5.5961, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.01914763990832342, |
| "grad_norm": 0.43809765577316284, |
| "learning_rate": 0.00022900232018561485, |
| "loss": 5.5242, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.019727871420696857, |
| "grad_norm": 0.5514947175979614, |
| "learning_rate": 0.00023596287703016238, |
| "loss": 5.4885, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.020308102933070296, |
| "grad_norm": 0.7086557745933533, |
| "learning_rate": 0.00024292343387470995, |
| "loss": 5.4558, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.020888334445443732, |
| "grad_norm": 0.44333210587501526, |
| "learning_rate": 0.0002498839907192575, |
| "loss": 5.4249, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.021468565957817168, |
| "grad_norm": 0.5971847772598267, |
| "learning_rate": 0.0002568445475638051, |
| "loss": 5.3896, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.022048797470190607, |
| "grad_norm": 0.5358195900917053, |
| "learning_rate": 0.0002638051044083526, |
| "loss": 5.3647, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.022629028982564043, |
| "grad_norm": 0.4231407046318054, |
| "learning_rate": 0.0002707656612529002, |
| "loss": 5.3325, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.02320926049493748, |
| "grad_norm": 0.48789191246032715, |
| "learning_rate": 0.00027772621809744777, |
| "loss": 5.2922, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.023789492007310918, |
| "grad_norm": 0.46154582500457764, |
| "learning_rate": 0.0002846867749419953, |
| "loss": 5.2881, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.024369723519684354, |
| "grad_norm": 0.44972172379493713, |
| "learning_rate": 0.00029164733178654287, |
| "loss": 5.2397, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.02494995503205779, |
| "grad_norm": 0.505415678024292, |
| "learning_rate": 0.0002986078886310905, |
| "loss": 5.1841, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.02553018654443123, |
| "grad_norm": 0.42717623710632324, |
| "learning_rate": 0.0003055684454756381, |
| "loss": 5.1848, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.026110418056804665, |
| "grad_norm": 0.4216056168079376, |
| "learning_rate": 0.0003125290023201856, |
| "loss": 5.1447, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0266906495691781, |
| "grad_norm": 0.5051509141921997, |
| "learning_rate": 0.00031948955916473313, |
| "loss": 5.1084, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.02727088108155154, |
| "grad_norm": 0.5205376744270325, |
| "learning_rate": 0.0003264501160092807, |
| "loss": 5.0462, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.027851112593924976, |
| "grad_norm": 0.5111084580421448, |
| "learning_rate": 0.0003334106728538283, |
| "loss": 5.0225, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.028431344106298412, |
| "grad_norm": 0.4395337402820587, |
| "learning_rate": 0.00034037122969837584, |
| "loss": 4.991, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.02901157561867185, |
| "grad_norm": 0.2879785895347595, |
| "learning_rate": 0.00034733178654292344, |
| "loss": 4.9628, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.029591807131045287, |
| "grad_norm": 0.3356530964374542, |
| "learning_rate": 0.000354292343387471, |
| "loss": 4.9165, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.030172038643418723, |
| "grad_norm": 0.39410287141799927, |
| "learning_rate": 0.00036125290023201855, |
| "loss": 4.8802, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.030752270155792162, |
| "grad_norm": 0.4210626184940338, |
| "learning_rate": 0.00036821345707656604, |
| "loss": 4.8403, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0313325016681656, |
| "grad_norm": 0.4170067608356476, |
| "learning_rate": 0.00037517401392111365, |
| "loss": 4.8156, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.031912733180539034, |
| "grad_norm": 0.40876781940460205, |
| "learning_rate": 0.0003821345707656612, |
| "loss": 4.7932, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.03249296469291247, |
| "grad_norm": 0.3717671036720276, |
| "learning_rate": 0.0003890951276102088, |
| "loss": 4.7812, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.03307319620528591, |
| "grad_norm": 0.37275081872940063, |
| "learning_rate": 0.00039605568445475636, |
| "loss": 4.7324, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.03365342771765935, |
| "grad_norm": 0.32523536682128906, |
| "learning_rate": 0.0004030162412993039, |
| "loss": 4.6891, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.034233659230032784, |
| "grad_norm": 0.2909957468509674, |
| "learning_rate": 0.0004099767981438515, |
| "loss": 4.6555, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.03481389074240622, |
| "grad_norm": 0.40268951654434204, |
| "learning_rate": 0.00041693735498839906, |
| "loss": 4.622, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.035394122254779656, |
| "grad_norm": 0.433383584022522, |
| "learning_rate": 0.00042389791183294656, |
| "loss": 4.6122, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.03597435376715309, |
| "grad_norm": 0.3096088171005249, |
| "learning_rate": 0.0004308584686774941, |
| "loss": 4.5976, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.036554585279526534, |
| "grad_norm": 0.30540433526039124, |
| "learning_rate": 0.0004378190255220417, |
| "loss": 4.5569, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.03713481679189997, |
| "grad_norm": 0.3136671781539917, |
| "learning_rate": 0.00044477958236658927, |
| "loss": 4.5228, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.037715048304273406, |
| "grad_norm": 0.332621693611145, |
| "learning_rate": 0.0004517401392111369, |
| "loss": 4.4901, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.03829527981664684, |
| "grad_norm": 0.3817736804485321, |
| "learning_rate": 0.0004587006960556844, |
| "loss": 4.475, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.03887551132902028, |
| "grad_norm": 0.458741158246994, |
| "learning_rate": 0.000465661252900232, |
| "loss": 4.4545, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.039455742841393714, |
| "grad_norm": 0.27561265230178833, |
| "learning_rate": 0.0004726218097447796, |
| "loss": 4.4406, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.040035974353767156, |
| "grad_norm": 0.380633145570755, |
| "learning_rate": 0.0004795823665893271, |
| "loss": 4.4027, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.04061620586614059, |
| "grad_norm": 0.3662358820438385, |
| "learning_rate": 0.00048654292343387463, |
| "loss": 4.377, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.04119643737851403, |
| "grad_norm": 0.31104594469070435, |
| "learning_rate": 0.0004935034802784222, |
| "loss": 4.3399, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.041776668890887464, |
| "grad_norm": 0.43897074460983276, |
| "learning_rate": 0.0005004640371229698, |
| "loss": 4.3229, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.0423569004032609, |
| "grad_norm": 0.2685506343841553, |
| "learning_rate": 0.0005074245939675173, |
| "loss": 4.302, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.042937131915634336, |
| "grad_norm": 0.2662206292152405, |
| "learning_rate": 0.0005143851508120649, |
| "loss": 4.2533, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.04351736342800778, |
| "grad_norm": 0.31665244698524475, |
| "learning_rate": 0.0005213457076566126, |
| "loss": 4.2463, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.044097594940381214, |
| "grad_norm": 0.3573771119117737, |
| "learning_rate": 0.0005283062645011601, |
| "loss": 4.2177, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.04467782645275465, |
| "grad_norm": 0.3051789402961731, |
| "learning_rate": 0.0005352668213457077, |
| "loss": 4.2098, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.045258057965128086, |
| "grad_norm": 0.26946839690208435, |
| "learning_rate": 0.0005422273781902551, |
| "loss": 4.1739, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.04583828947750152, |
| "grad_norm": 0.21327945590019226, |
| "learning_rate": 0.0005491879350348028, |
| "loss": 4.151, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.04641852098987496, |
| "grad_norm": 0.28413307666778564, |
| "learning_rate": 0.0005561484918793503, |
| "loss": 4.1455, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.0469987525022484, |
| "grad_norm": 0.2847752869129181, |
| "learning_rate": 0.0005631090487238979, |
| "loss": 4.1166, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.047578984014621836, |
| "grad_norm": 0.25382527709007263, |
| "learning_rate": 0.0005700696055684454, |
| "loss": 4.0986, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.04815921552699527, |
| "grad_norm": 0.2375078797340393, |
| "learning_rate": 0.000577030162412993, |
| "loss": 4.0765, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.04873944703936871, |
| "grad_norm": 0.3032638430595398, |
| "learning_rate": 0.0005839907192575406, |
| "loss": 4.085, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.049319678551742144, |
| "grad_norm": 0.2454582005739212, |
| "learning_rate": 0.0005909512761020882, |
| "loss": 4.0505, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.04989991006411558, |
| "grad_norm": 0.23829826712608337, |
| "learning_rate": 0.0005979118329466356, |
| "loss": 4.0391, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.05048014157648902, |
| "grad_norm": 0.29694074392318726, |
| "learning_rate": 0.0005999997293652579, |
| "loss": 4.0195, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.05106037308886246, |
| "grad_norm": 0.20268426835536957, |
| "learning_rate": 0.0005999984038085133, |
| "loss": 4.0023, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.051640604601235894, |
| "grad_norm": 0.2563273310661316, |
| "learning_rate": 0.000599995973626219, |
| "loss": 3.98, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.05222083611360933, |
| "grad_norm": 0.26515451073646545, |
| "learning_rate": 0.0005999924388273229, |
| "loss": 3.9799, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.052801067625982766, |
| "grad_norm": 0.23011842370033264, |
| "learning_rate": 0.0005999877994248407, |
| "loss": 3.9592, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0533812991383562, |
| "grad_norm": 0.21570523083209991, |
| "learning_rate": 0.0005999820554358552, |
| "loss": 3.9366, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.053961530650729644, |
| "grad_norm": 0.24623119831085205, |
| "learning_rate": 0.0005999752068815162, |
| "loss": 3.923, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.05454176216310308, |
| "grad_norm": 0.26557642221450806, |
| "learning_rate": 0.0005999672537870409, |
| "loss": 3.9114, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.055121993675476516, |
| "grad_norm": 0.23711174726486206, |
| "learning_rate": 0.0005999581961817135, |
| "loss": 3.9021, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.05570222518784995, |
| "grad_norm": 0.2636472284793854, |
| "learning_rate": 0.000599948034098885, |
| "loss": 3.8945, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.05628245670022339, |
| "grad_norm": 0.2139461785554886, |
| "learning_rate": 0.000599936767575973, |
| "loss": 3.8742, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.056862688212596824, |
| "grad_norm": 0.2411975860595703, |
| "learning_rate": 0.0005999243966544624, |
| "loss": 3.8627, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.057442919724970266, |
| "grad_norm": 0.22522902488708496, |
| "learning_rate": 0.000599910921379904, |
| "loss": 3.8439, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.0580231512373437, |
| "grad_norm": 0.2505146861076355, |
| "learning_rate": 0.0005998963418019153, |
| "loss": 3.8376, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.0580231512373437, |
| "eval_loss": 3.7977514266967773, |
| "eval_runtime": 3.2666, |
| "eval_samples_per_second": 1325.524, |
| "eval_steps_per_second": 2.755, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.05860338274971714, |
| "grad_norm": 0.21931585669517517, |
| "learning_rate": 0.0005998806579741798, |
| "loss": 3.8196, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.059183614262090574, |
| "grad_norm": 0.19973556697368622, |
| "learning_rate": 0.0005998638699544469, |
| "loss": 3.813, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.05976384577446401, |
| "grad_norm": 0.21615122258663177, |
| "learning_rate": 0.0005998459778045319, |
| "loss": 3.7993, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.060344077286837446, |
| "grad_norm": 0.18904747068881989, |
| "learning_rate": 0.0005998269815903156, |
| "loss": 3.8122, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.06092430879921089, |
| "grad_norm": 0.20379868149757385, |
| "learning_rate": 0.000599806881381744, |
| "loss": 3.7891, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.061504540311584324, |
| "grad_norm": 0.21616701781749725, |
| "learning_rate": 0.0005997856772528283, |
| "loss": 3.7768, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.06208477182395776, |
| "grad_norm": 0.1838783323764801, |
| "learning_rate": 0.0005997633692816442, |
| "loss": 3.7744, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.0626650033363312, |
| "grad_norm": 0.17894767224788666, |
| "learning_rate": 0.0005997399575503321, |
| "loss": 3.7667, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.06324523484870463, |
| "grad_norm": 0.20992882549762726, |
| "learning_rate": 0.0005997154421450963, |
| "loss": 3.7449, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.06382546636107807, |
| "grad_norm": 0.19586902856826782, |
| "learning_rate": 0.0005996898231562051, |
| "loss": 3.7423, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.0644056978734515, |
| "grad_norm": 0.24105612933635712, |
| "learning_rate": 0.0005996631006779903, |
| "loss": 3.7223, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.06498592938582494, |
| "grad_norm": 0.19526907801628113, |
| "learning_rate": 0.0005996352748088471, |
| "loss": 3.7189, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.06556616089819838, |
| "grad_norm": 0.16144131124019623, |
| "learning_rate": 0.000599606345651233, |
| "loss": 3.7118, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.06614639241057182, |
| "grad_norm": 0.167442187666893, |
| "learning_rate": 0.0005995763133116683, |
| "loss": 3.6986, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.06672662392294526, |
| "grad_norm": 0.23503893613815308, |
| "learning_rate": 0.0005995451779007352, |
| "loss": 3.7049, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0673068554353187, |
| "grad_norm": 0.2096278965473175, |
| "learning_rate": 0.0005995129395330776, |
| "loss": 3.6865, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.06788708694769213, |
| "grad_norm": 0.19825097918510437, |
| "learning_rate": 0.0005994795983274004, |
| "loss": 3.6712, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.06846731846006557, |
| "grad_norm": 0.15405306220054626, |
| "learning_rate": 0.0005994451544064696, |
| "loss": 3.6711, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.069047549972439, |
| "grad_norm": 0.563884437084198, |
| "learning_rate": 0.0005994096078971111, |
| "loss": 3.677, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.06962778148481244, |
| "grad_norm": 0.1655234694480896, |
| "learning_rate": 0.0005993729589302111, |
| "loss": 3.7143, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.07020801299718588, |
| "grad_norm": 0.15598031878471375, |
| "learning_rate": 0.0005993352076407148, |
| "loss": 3.6689, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.07078824450955931, |
| "grad_norm": 0.14992448687553406, |
| "learning_rate": 0.0005992963541676265, |
| "loss": 3.6581, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.07136847602193275, |
| "grad_norm": 0.1618255376815796, |
| "learning_rate": 0.0005992563986540086, |
| "loss": 3.642, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.07194870753430618, |
| "grad_norm": 0.16188852488994598, |
| "learning_rate": 0.0005992153412469816, |
| "loss": 3.6399, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.07252893904667962, |
| "grad_norm": 0.17180649936199188, |
| "learning_rate": 0.0005991731820977231, |
| "loss": 3.6252, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.07310917055905307, |
| "grad_norm": 0.1691058874130249, |
| "learning_rate": 0.0005991299213614678, |
| "loss": 3.6244, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.0736894020714265, |
| "grad_norm": 0.19470703601837158, |
| "learning_rate": 0.0005990855591975059, |
| "loss": 3.6199, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.07426963358379994, |
| "grad_norm": 0.15482653677463531, |
| "learning_rate": 0.0005990400957691835, |
| "loss": 3.6176, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.07484986509617338, |
| "grad_norm": 0.18342998623847961, |
| "learning_rate": 0.000598993531243902, |
| "loss": 3.6082, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.07543009660854681, |
| "grad_norm": 0.17348110675811768, |
| "learning_rate": 0.0005989458657931167, |
| "loss": 3.6063, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.07601032812092025, |
| "grad_norm": 0.1687677949666977, |
| "learning_rate": 0.0005988970995923368, |
| "loss": 3.6015, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.07659055963329368, |
| "grad_norm": 0.19341568648815155, |
| "learning_rate": 0.0005988472328211246, |
| "loss": 3.5912, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.07717079114566712, |
| "grad_norm": 0.15345478057861328, |
| "learning_rate": 0.0005987962656630947, |
| "loss": 3.586, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.07775102265804056, |
| "grad_norm": 0.16126085817813873, |
| "learning_rate": 0.0005987441983059136, |
| "loss": 3.5797, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.07833125417041399, |
| "grad_norm": 0.1716892272233963, |
| "learning_rate": 0.0005986910309412986, |
| "loss": 3.5751, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.07891148568278743, |
| "grad_norm": 0.15669932961463928, |
| "learning_rate": 0.0005986367637650177, |
| "loss": 3.5799, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.07949171719516086, |
| "grad_norm": 0.19878168404102325, |
| "learning_rate": 0.0005985813969768884, |
| "loss": 3.572, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.08007194870753431, |
| "grad_norm": 0.1505119651556015, |
| "learning_rate": 0.0005985249307807767, |
| "loss": 3.567, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.08065218021990775, |
| "grad_norm": 0.1548507809638977, |
| "learning_rate": 0.0005984673653845972, |
| "loss": 3.5427, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.08123241173228118, |
| "grad_norm": 0.15786635875701904, |
| "learning_rate": 0.0005984087010003119, |
| "loss": 3.5637, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.08181264324465462, |
| "grad_norm": 0.15546779334545135, |
| "learning_rate": 0.0005983489378439289, |
| "loss": 3.5475, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.08239287475702806, |
| "grad_norm": 0.17267097532749176, |
| "learning_rate": 0.0005982880761355026, |
| "loss": 3.5519, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.08297310626940149, |
| "grad_norm": 0.2120850831270218, |
| "learning_rate": 0.0005982261160991321, |
| "loss": 3.545, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.08355333778177493, |
| "grad_norm": 0.1541440784931183, |
| "learning_rate": 0.0005981630579629609, |
| "loss": 3.5236, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.08413356929414836, |
| "grad_norm": 0.1610753834247589, |
| "learning_rate": 0.0005980989019591753, |
| "loss": 3.5153, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.0847138008065218, |
| "grad_norm": 0.1872093677520752, |
| "learning_rate": 0.0005980336483240048, |
| "loss": 3.5208, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.08529403231889524, |
| "grad_norm": 0.15793032944202423, |
| "learning_rate": 0.0005979672972977201, |
| "loss": 3.5294, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.08587426383126867, |
| "grad_norm": 0.1738296002149582, |
| "learning_rate": 0.0005978998491246324, |
| "loss": 3.5234, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.08645449534364211, |
| "grad_norm": 0.1644987314939499, |
| "learning_rate": 0.0005978313040530931, |
| "loss": 3.515, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.08703472685601556, |
| "grad_norm": 0.16707918047904968, |
| "learning_rate": 0.0005977616623354923, |
| "loss": 3.5014, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.08761495836838899, |
| "grad_norm": 0.14812146127223969, |
| "learning_rate": 0.0005976909242282581, |
| "loss": 3.4923, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.08819518988076243, |
| "grad_norm": 0.15653282403945923, |
| "learning_rate": 0.0005976190899918555, |
| "loss": 3.4899, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.08877542139313586, |
| "grad_norm": 0.1531265377998352, |
| "learning_rate": 0.0005975461598907858, |
| "loss": 3.4939, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.0893556529055093, |
| "grad_norm": 0.19499650597572327, |
| "learning_rate": 0.0005974721341935854, |
| "loss": 3.4776, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.08993588441788274, |
| "grad_norm": 0.16522051393985748, |
| "learning_rate": 0.0005973970131728245, |
| "loss": 3.4843, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.09051611593025617, |
| "grad_norm": 0.14911240339279175, |
| "learning_rate": 0.0005973207971051066, |
| "loss": 3.4854, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.09109634744262961, |
| "grad_norm": 0.1797751784324646, |
| "learning_rate": 0.0005972434862710673, |
| "loss": 3.4814, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.09167657895500304, |
| "grad_norm": 0.14958298206329346, |
| "learning_rate": 0.0005971650809553729, |
| "loss": 3.4791, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.09225681046737648, |
| "grad_norm": 0.17834265530109406, |
| "learning_rate": 0.0005970855814467205, |
| "loss": 3.4633, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.09283704197974992, |
| "grad_norm": 0.15738125145435333, |
| "learning_rate": 0.0005970049880378353, |
| "loss": 3.4676, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.09341727349212335, |
| "grad_norm": 0.14483994245529175, |
| "learning_rate": 0.0005969233010254707, |
| "loss": 3.4661, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.0939975050044968, |
| "grad_norm": 0.14126789569854736, |
| "learning_rate": 0.0005968405207104068, |
| "loss": 3.4571, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.09457773651687024, |
| "grad_norm": 0.1578633040189743, |
| "learning_rate": 0.0005967566473974495, |
| "loss": 3.4558, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.09515796802924367, |
| "grad_norm": 0.1565486639738083, |
| "learning_rate": 0.000596671681395429, |
| "loss": 3.4604, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.09573819954161711, |
| "grad_norm": 0.13866451382637024, |
| "learning_rate": 0.0005965856230171993, |
| "loss": 3.4552, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.09631843105399054, |
| "grad_norm": 0.2121124267578125, |
| "learning_rate": 0.0005964984725796359, |
| "loss": 3.4541, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.09689866256636398, |
| "grad_norm": 0.17082008719444275, |
| "learning_rate": 0.0005964102304036363, |
| "loss": 3.4382, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.09747889407873742, |
| "grad_norm": 0.20681622624397278, |
| "learning_rate": 0.0005963208968141172, |
| "loss": 3.4372, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.09805912559111085, |
| "grad_norm": 0.1384105086326599, |
| "learning_rate": 0.0005962304721400142, |
| "loss": 3.4484, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.09863935710348429, |
| "grad_norm": 0.16820856928825378, |
| "learning_rate": 0.0005961389567142806, |
| "loss": 3.4302, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.09921958861585772, |
| "grad_norm": 0.16617996990680695, |
| "learning_rate": 0.0005960463508738855, |
| "loss": 3.4328, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.09979982012823116, |
| "grad_norm": 0.16344214975833893, |
| "learning_rate": 0.0005959526549598137, |
| "loss": 3.4326, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1003800516406046, |
| "grad_norm": 0.16235540807247162, |
| "learning_rate": 0.000595857869317063, |
| "loss": 3.4271, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.10096028315297804, |
| "grad_norm": 0.1524738371372223, |
| "learning_rate": 0.0005957619942946442, |
| "loss": 3.424, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.10154051466535148, |
| "grad_norm": 0.18023791909217834, |
| "learning_rate": 0.0005956650302455793, |
| "loss": 3.4266, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.10212074617772492, |
| "grad_norm": 0.17738115787506104, |
| "learning_rate": 0.0005955669775268999, |
| "loss": 3.4046, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.10270097769009835, |
| "grad_norm": 0.13939271867275238, |
| "learning_rate": 0.0005954678364996466, |
| "loss": 3.4177, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.10328120920247179, |
| "grad_norm": 0.18028447031974792, |
| "learning_rate": 0.0005953676075288668, |
| "loss": 3.4113, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.10386144071484522, |
| "grad_norm": 0.15911422669887543, |
| "learning_rate": 0.0005952662909836142, |
| "loss": 3.4191, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.10444167222721866, |
| "grad_norm": 0.15596607327461243, |
| "learning_rate": 0.0005951638872369469, |
| "loss": 3.3993, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.1050219037395921, |
| "grad_norm": 0.15493981540203094, |
| "learning_rate": 0.0005950603966659264, |
| "loss": 3.4043, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.10560213525196553, |
| "grad_norm": 0.1727568507194519, |
| "learning_rate": 0.0005949558196516154, |
| "loss": 3.4028, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.10618236676433897, |
| "grad_norm": 0.1614874303340912, |
| "learning_rate": 0.0005948501565790779, |
| "loss": 3.3998, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.1067625982767124, |
| "grad_norm": 0.13620299100875854, |
| "learning_rate": 0.000594743407837376, |
| "loss": 3.3896, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.10734282978908584, |
| "grad_norm": 0.15391112864017487, |
| "learning_rate": 0.0005946355738195701, |
| "loss": 3.3823, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.10792306130145929, |
| "grad_norm": 0.15937426686286926, |
| "learning_rate": 0.0005945266549227162, |
| "loss": 3.3893, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.10850329281383272, |
| "grad_norm": 0.16253319382667542, |
| "learning_rate": 0.0005944166515478649, |
| "loss": 3.3905, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.10908352432620616, |
| "grad_norm": 0.14502382278442383, |
| "learning_rate": 0.0005943055641000604, |
| "loss": 3.3836, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1096637558385796, |
| "grad_norm": 0.14128324389457703, |
| "learning_rate": 0.0005941933929883384, |
| "loss": 3.3854, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.11024398735095303, |
| "grad_norm": 0.19345618784427643, |
| "learning_rate": 0.0005940801386257244, |
| "loss": 3.3746, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.11082421886332647, |
| "grad_norm": 0.1499020904302597, |
| "learning_rate": 0.000593965801429233, |
| "loss": 3.3729, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.1114044503756999, |
| "grad_norm": 0.14975206553936005, |
| "learning_rate": 0.0005938503818198656, |
| "loss": 3.3676, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.11198468188807334, |
| "grad_norm": 0.13726426661014557, |
| "learning_rate": 0.0005937338802226094, |
| "loss": 3.373, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.11256491340044678, |
| "grad_norm": 0.1749139279127121, |
| "learning_rate": 0.0005936162970664355, |
| "loss": 3.3761, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.11314514491282021, |
| "grad_norm": 0.14197006821632385, |
| "learning_rate": 0.0005934976327842974, |
| "loss": 3.3513, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.11372537642519365, |
| "grad_norm": 0.15288510918617249, |
| "learning_rate": 0.0005933778878131294, |
| "loss": 3.357, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.11430560793756708, |
| "grad_norm": 0.1787514090538025, |
| "learning_rate": 0.000593257062593845, |
| "loss": 3.3642, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.11488583944994053, |
| "grad_norm": 0.13630741834640503, |
| "learning_rate": 0.0005931351575713353, |
| "loss": 3.3614, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.11546607096231397, |
| "grad_norm": 0.16102264821529388, |
| "learning_rate": 0.0005930121731944674, |
| "loss": 3.3523, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.1160463024746874, |
| "grad_norm": 0.16226573288440704, |
| "learning_rate": 0.0005928881099160826, |
| "loss": 3.3595, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.1160463024746874, |
| "eval_loss": 3.3178560733795166, |
| "eval_runtime": 3.2576, |
| "eval_samples_per_second": 1329.214, |
| "eval_steps_per_second": 2.763, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.11662653398706084, |
| "grad_norm": 0.14609858393669128, |
| "learning_rate": 0.0005927629681929951, |
| "loss": 3.3585, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.11720676549943428, |
| "grad_norm": 0.14387281239032745, |
| "learning_rate": 0.0005926367484859896, |
| "loss": 3.3517, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.11778699701180771, |
| "grad_norm": 0.14605766534805298, |
| "learning_rate": 0.0005925094512598202, |
| "loss": 3.3524, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.11836722852418115, |
| "grad_norm": 0.22022885084152222, |
| "learning_rate": 0.000592381076983209, |
| "loss": 3.3356, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.11894746003655458, |
| "grad_norm": 0.1847839504480362, |
| "learning_rate": 0.0005922516261288431, |
| "loss": 3.3441, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.11952769154892802, |
| "grad_norm": 0.13915176689624786, |
| "learning_rate": 0.0005921210991733745, |
| "loss": 3.352, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.12010792306130146, |
| "grad_norm": 0.1398390680551529, |
| "learning_rate": 0.0005919894965974168, |
| "loss": 3.3455, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.12068815457367489, |
| "grad_norm": 0.1368722915649414, |
| "learning_rate": 0.0005918568188855447, |
| "loss": 3.3403, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.12126838608604833, |
| "grad_norm": 0.16239017248153687, |
| "learning_rate": 0.0005917230665262914, |
| "loss": 3.3334, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.12184861759842178, |
| "grad_norm": 0.14380386471748352, |
| "learning_rate": 0.000591588240012147, |
| "loss": 3.3294, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.12242884911079521, |
| "grad_norm": 0.16626037657260895, |
| "learning_rate": 0.0005914523398395569, |
| "loss": 3.3425, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.12300908062316865, |
| "grad_norm": 0.15981921553611755, |
| "learning_rate": 0.0005913153665089197, |
| "loss": 3.3403, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.12358931213554208, |
| "grad_norm": 0.15275150537490845, |
| "learning_rate": 0.0005911773205245857, |
| "loss": 3.3261, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.12416954364791552, |
| "grad_norm": 0.1598198413848877, |
| "learning_rate": 0.0005910382023948546, |
| "loss": 3.3264, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.12474977516028896, |
| "grad_norm": 0.138661190867424, |
| "learning_rate": 0.0005908980126319739, |
| "loss": 3.3216, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.1253300066726624, |
| "grad_norm": 0.15583263337612152, |
| "learning_rate": 0.000590756751752137, |
| "loss": 3.3204, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.12591023818503583, |
| "grad_norm": 0.15883944928646088, |
| "learning_rate": 0.0005906144202754813, |
| "loss": 3.3274, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.12649046969740926, |
| "grad_norm": 0.15031637251377106, |
| "learning_rate": 0.0005904710187260862, |
| "loss": 3.3224, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.1270707012097827, |
| "grad_norm": 0.1994715929031372, |
| "learning_rate": 0.0005903265476319712, |
| "loss": 3.3204, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.12765093272215614, |
| "grad_norm": 0.16986873745918274, |
| "learning_rate": 0.000590181007525094, |
| "loss": 3.327, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.12823116423452957, |
| "grad_norm": 0.147616907954216, |
| "learning_rate": 0.0005900343989413485, |
| "loss": 3.3063, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.128811395746903, |
| "grad_norm": 0.16532088816165924, |
| "learning_rate": 0.0005898867224205629, |
| "loss": 3.3198, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.12939162725927644, |
| "grad_norm": 0.16687408089637756, |
| "learning_rate": 0.0005897379785064977, |
| "loss": 3.3193, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.12997185877164988, |
| "grad_norm": 0.16683116555213928, |
| "learning_rate": 0.0005895881677468434, |
| "loss": 3.3078, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.13055209028402331, |
| "grad_norm": 0.15461483597755432, |
| "learning_rate": 0.000589437290693219, |
| "loss": 3.3126, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.13113232179639675, |
| "grad_norm": 0.1432589441537857, |
| "learning_rate": 0.0005892853479011696, |
| "loss": 3.3004, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.13171255330877019, |
| "grad_norm": 0.1792496293783188, |
| "learning_rate": 0.0005891323399301646, |
| "loss": 3.2946, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.13229278482114365, |
| "grad_norm": 0.15189994871616364, |
| "learning_rate": 0.0005889782673435952, |
| "loss": 3.3013, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.13287301633351709, |
| "grad_norm": 0.15026351809501648, |
| "learning_rate": 0.0005888231307087728, |
| "loss": 3.295, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.13345324784589052, |
| "grad_norm": 0.16199465095996857, |
| "learning_rate": 0.0005886669305969269, |
| "loss": 3.2955, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.13403347935826396, |
| "grad_norm": 0.16704988479614258, |
| "learning_rate": 0.0005885096675832027, |
| "loss": 3.3057, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.1346137108706374, |
| "grad_norm": 0.14401213824748993, |
| "learning_rate": 0.0005883513422466588, |
| "loss": 3.2876, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.13519394238301083, |
| "grad_norm": 0.15336865186691284, |
| "learning_rate": 0.000588191955170266, |
| "loss": 3.2903, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.13577417389538426, |
| "grad_norm": 0.16176366806030273, |
| "learning_rate": 0.0005880315069409039, |
| "loss": 3.2873, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.1363544054077577, |
| "grad_norm": 0.14728406071662903, |
| "learning_rate": 0.00058786999814936, |
| "loss": 3.2862, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.13693463692013114, |
| "grad_norm": 0.14426636695861816, |
| "learning_rate": 0.0005877074293903264, |
| "loss": 3.2786, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.13751486843250457, |
| "grad_norm": 0.15023665130138397, |
| "learning_rate": 0.0005875438012623984, |
| "loss": 3.2888, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.138095099944878, |
| "grad_norm": 0.1882687211036682, |
| "learning_rate": 0.0005873791143680718, |
| "loss": 3.2806, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.13867533145725144, |
| "grad_norm": 0.14847789704799652, |
| "learning_rate": 0.000587213369313741, |
| "loss": 3.2698, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.13925556296962488, |
| "grad_norm": 0.14070352911949158, |
| "learning_rate": 0.0005870465667096969, |
| "loss": 3.2782, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.13983579448199832, |
| "grad_norm": 0.19226056337356567, |
| "learning_rate": 0.0005868787071701238, |
| "loss": 3.2639, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.14041602599437175, |
| "grad_norm": 0.1776312291622162, |
| "learning_rate": 0.0005867097913130982, |
| "loss": 3.2792, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.1409962575067452, |
| "grad_norm": 0.13482613861560822, |
| "learning_rate": 0.0005865398197605863, |
| "loss": 3.2834, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.14157648901911862, |
| "grad_norm": 0.16731715202331543, |
| "learning_rate": 0.0005863687931384408, |
| "loss": 3.2773, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.14215672053149206, |
| "grad_norm": 0.14542406797409058, |
| "learning_rate": 0.0005861967120763997, |
| "loss": 3.2676, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.1427369520438655, |
| "grad_norm": 0.1490476280450821, |
| "learning_rate": 0.0005860235772080836, |
| "loss": 3.2783, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.14331718355623893, |
| "grad_norm": 0.1446717530488968, |
| "learning_rate": 0.0005858493891709932, |
| "loss": 3.283, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.14389741506861237, |
| "grad_norm": 0.1412891447544098, |
| "learning_rate": 0.0005856741486065071, |
| "loss": 3.2652, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.1444776465809858, |
| "grad_norm": 0.14674563705921173, |
| "learning_rate": 0.0005854978561598794, |
| "loss": 3.2613, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.14505787809335924, |
| "grad_norm": 0.14808981120586395, |
| "learning_rate": 0.0005853205124802374, |
| "loss": 3.2742, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.14563810960573267, |
| "grad_norm": 0.14043253660202026, |
| "learning_rate": 0.0005851421182205789, |
| "loss": 3.2685, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.14621834111810614, |
| "grad_norm": 0.1568257212638855, |
| "learning_rate": 0.0005849626740377705, |
| "loss": 3.2711, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.14679857263047957, |
| "grad_norm": 0.13545943796634674, |
| "learning_rate": 0.0005847821805925444, |
| "loss": 3.2573, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.147378804142853, |
| "grad_norm": 0.18863698840141296, |
| "learning_rate": 0.0005846006385494964, |
| "loss": 3.2526, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.14795903565522645, |
| "grad_norm": 0.14628858864307404, |
| "learning_rate": 0.0005844180485770832, |
| "loss": 3.2629, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.14853926716759988, |
| "grad_norm": 0.1624503880739212, |
| "learning_rate": 0.0005842344113476202, |
| "loss": 3.2529, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.14911949867997332, |
| "grad_norm": 0.16218945384025574, |
| "learning_rate": 0.0005840497275372792, |
| "loss": 3.2548, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.14969973019234675, |
| "grad_norm": 0.16516704857349396, |
| "learning_rate": 0.0005838639978260851, |
| "loss": 3.2501, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.1502799617047202, |
| "grad_norm": 0.1366761326789856, |
| "learning_rate": 0.0005836772228979142, |
| "loss": 3.2467, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.15086019321709362, |
| "grad_norm": 0.15526661276817322, |
| "learning_rate": 0.0005834894034404913, |
| "loss": 3.242, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.15144042472946706, |
| "grad_norm": 0.1441916972398758, |
| "learning_rate": 0.0005833005401453874, |
| "loss": 3.2399, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.1520206562418405, |
| "grad_norm": 0.1708252727985382, |
| "learning_rate": 0.0005831106337080169, |
| "loss": 3.2427, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.15260088775421393, |
| "grad_norm": 0.14945155382156372, |
| "learning_rate": 0.0005829196848276351, |
| "loss": 3.2449, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.15318111926658737, |
| "grad_norm": 0.1512700468301773, |
| "learning_rate": 0.000582727694207336, |
| "loss": 3.2438, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.1537613507789608, |
| "grad_norm": 0.15101619064807892, |
| "learning_rate": 0.0005825346625540491, |
| "loss": 3.2396, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.15434158229133424, |
| "grad_norm": 0.13658584654331207, |
| "learning_rate": 0.000582340590578537, |
| "loss": 3.2475, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.15492181380370768, |
| "grad_norm": 0.16723176836967468, |
| "learning_rate": 0.0005821454789953932, |
| "loss": 3.2385, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.1555020453160811, |
| "grad_norm": 0.16236084699630737, |
| "learning_rate": 0.000581949328523039, |
| "loss": 3.2287, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.15608227682845455, |
| "grad_norm": 0.1473713517189026, |
| "learning_rate": 0.0005817521398837209, |
| "loss": 3.2335, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.15666250834082798, |
| "grad_norm": 0.14422966539859772, |
| "learning_rate": 0.0005815539138035082, |
| "loss": 3.2217, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.15724273985320142, |
| "grad_norm": 0.1676100343465805, |
| "learning_rate": 0.00058135465101229, |
| "loss": 3.2329, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.15782297136557485, |
| "grad_norm": 0.14574168622493744, |
| "learning_rate": 0.000581154352243773, |
| "loss": 3.2278, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.1584032028779483, |
| "grad_norm": 0.16981543600559235, |
| "learning_rate": 0.000580953018235478, |
| "loss": 3.229, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.15898343439032173, |
| "grad_norm": 0.13945645093917847, |
| "learning_rate": 0.0005807506497287379, |
| "loss": 3.2297, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.15956366590269516, |
| "grad_norm": 0.17302276194095612, |
| "learning_rate": 0.0005805472474686949, |
| "loss": 3.2227, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.16014389741506863, |
| "grad_norm": 0.15059055387973785, |
| "learning_rate": 0.0005803428122042974, |
| "loss": 3.2288, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.16072412892744206, |
| "grad_norm": 0.14908020198345184, |
| "learning_rate": 0.0005801373446882973, |
| "loss": 3.2293, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.1613043604398155, |
| "grad_norm": 0.1653462052345276, |
| "learning_rate": 0.0005799308456772478, |
| "loss": 3.2189, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.16188459195218893, |
| "grad_norm": 0.14483293890953064, |
| "learning_rate": 0.0005797233159314997, |
| "loss": 3.2239, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.16246482346456237, |
| "grad_norm": 0.15277917683124542, |
| "learning_rate": 0.0005795147562151992, |
| "loss": 3.2155, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.1630450549769358, |
| "grad_norm": 0.13660204410552979, |
| "learning_rate": 0.0005793051672962852, |
| "loss": 3.2183, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.16362528648930924, |
| "grad_norm": 0.15595564246177673, |
| "learning_rate": 0.0005790945499464861, |
| "loss": 3.2163, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.16420551800168268, |
| "grad_norm": 0.14608708024024963, |
| "learning_rate": 0.0005788829049413167, |
| "loss": 3.2222, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.1647857495140561, |
| "grad_norm": 0.14129003882408142, |
| "learning_rate": 0.0005786702330600764, |
| "loss": 3.2115, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.16536598102642955, |
| "grad_norm": 0.13925908505916595, |
| "learning_rate": 0.0005784565350858453, |
| "loss": 3.2115, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.16594621253880298, |
| "grad_norm": 0.15094564855098724, |
| "learning_rate": 0.0005782418118054816, |
| "loss": 3.216, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.16652644405117642, |
| "grad_norm": 0.1384998857975006, |
| "learning_rate": 0.0005780260640096189, |
| "loss": 3.2084, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.16710667556354986, |
| "grad_norm": 0.15442876517772675, |
| "learning_rate": 0.0005778092924926634, |
| "loss": 3.2071, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.1676869070759233, |
| "grad_norm": 0.16494965553283691, |
| "learning_rate": 0.0005775914980527904, |
| "loss": 3.2101, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.16826713858829673, |
| "grad_norm": 0.16855239868164062, |
| "learning_rate": 0.0005773726814919419, |
| "loss": 3.2019, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.16884737010067016, |
| "grad_norm": 0.1579483449459076, |
| "learning_rate": 0.0005771528436158233, |
| "loss": 3.209, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.1694276016130436, |
| "grad_norm": 0.1417829543352127, |
| "learning_rate": 0.0005769319852339008, |
| "loss": 3.2019, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.17000783312541703, |
| "grad_norm": 0.14454993605613708, |
| "learning_rate": 0.0005767101071593979, |
| "loss": 3.2047, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.17058806463779047, |
| "grad_norm": 0.16087666153907776, |
| "learning_rate": 0.0005764872102092931, |
| "loss": 3.2062, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.1711682961501639, |
| "grad_norm": 0.139312744140625, |
| "learning_rate": 0.0005762632952043163, |
| "loss": 3.1988, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.17174852766253734, |
| "grad_norm": 0.15459179878234863, |
| "learning_rate": 0.000576038362968946, |
| "loss": 3.2002, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.17232875917491078, |
| "grad_norm": 0.18820500373840332, |
| "learning_rate": 0.0005758124143314062, |
| "loss": 3.2035, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.17290899068728421, |
| "grad_norm": 0.14626365900039673, |
| "learning_rate": 0.0005755854501236635, |
| "loss": 3.194, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.17348922219965765, |
| "grad_norm": 0.14270606637001038, |
| "learning_rate": 0.0005753574711814238, |
| "loss": 3.1879, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.1740694537120311, |
| "grad_norm": 0.15857936441898346, |
| "learning_rate": 0.0005751284783441297, |
| "loss": 3.207, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.1740694537120311, |
| "eval_loss": 3.158046245574951, |
| "eval_runtime": 3.2654, |
| "eval_samples_per_second": 1326.029, |
| "eval_steps_per_second": 2.756, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17464968522440455, |
| "grad_norm": 0.14403465390205383, |
| "learning_rate": 0.0005748984724549565, |
| "loss": 3.1895, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.17522991673677799, |
| "grad_norm": 0.1392756998538971, |
| "learning_rate": 0.0005746674543608101, |
| "loss": 3.1942, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.17581014824915142, |
| "grad_norm": 0.13957557082176208, |
| "learning_rate": 0.0005744354249123234, |
| "loss": 3.1969, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.17639037976152486, |
| "grad_norm": 0.151198148727417, |
| "learning_rate": 0.0005742023849638531, |
| "loss": 3.1903, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.1769706112738983, |
| "grad_norm": 0.14607684314250946, |
| "learning_rate": 0.0005739683353734766, |
| "loss": 3.2003, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.17755084278627173, |
| "grad_norm": 0.13925622403621674, |
| "learning_rate": 0.0005737332770029891, |
| "loss": 3.1927, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.17813107429864516, |
| "grad_norm": 0.13125456869602203, |
| "learning_rate": 0.0005734972107179001, |
| "loss": 3.1849, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.1787113058110186, |
| "grad_norm": 0.16905735433101654, |
| "learning_rate": 0.0005732601373874306, |
| "loss": 3.187, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.17929153732339204, |
| "grad_norm": 0.13563838601112366, |
| "learning_rate": 0.0005730220578845091, |
| "loss": 3.1853, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.17987176883576547, |
| "grad_norm": 0.15470236539840698, |
| "learning_rate": 0.0005727829730857695, |
| "loss": 3.1906, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.1804520003481389, |
| "grad_norm": 0.160013347864151, |
| "learning_rate": 0.0005725428838715469, |
| "loss": 3.1705, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.18103223186051234, |
| "grad_norm": 0.14684250950813293, |
| "learning_rate": 0.0005723017911258752, |
| "loss": 3.1825, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.18161246337288578, |
| "grad_norm": 0.1529027372598648, |
| "learning_rate": 0.0005720596957364829, |
| "loss": 3.1817, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.18219269488525922, |
| "grad_norm": 0.13860736787319183, |
| "learning_rate": 0.0005718165985947907, |
| "loss": 3.1844, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.18277292639763265, |
| "grad_norm": 0.14795511960983276, |
| "learning_rate": 0.0005715725005959077, |
| "loss": 3.1741, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.1833531579100061, |
| "grad_norm": 0.1455545276403427, |
| "learning_rate": 0.0005713274026386283, |
| "loss": 3.1869, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.18393338942237952, |
| "grad_norm": 0.14845995604991913, |
| "learning_rate": 0.0005710813056254289, |
| "loss": 3.1735, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.18451362093475296, |
| "grad_norm": 0.14949209988117218, |
| "learning_rate": 0.0005708342104624645, |
| "loss": 3.178, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.1850938524471264, |
| "grad_norm": 0.16276435554027557, |
| "learning_rate": 0.0005705861180595653, |
| "loss": 3.1712, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.18567408395949983, |
| "grad_norm": 0.14152179658412933, |
| "learning_rate": 0.0005703370293302335, |
| "loss": 3.1752, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.18625431547187327, |
| "grad_norm": 0.1554255187511444, |
| "learning_rate": 0.00057008694519164, |
| "loss": 3.169, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.1868345469842467, |
| "grad_norm": 0.14890237152576447, |
| "learning_rate": 0.0005698358665646207, |
| "loss": 3.1706, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.18741477849662014, |
| "grad_norm": 0.15197904407978058, |
| "learning_rate": 0.0005695837943736735, |
| "loss": 3.1691, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.1879950100089936, |
| "grad_norm": 0.15369053184986115, |
| "learning_rate": 0.0005693307295469547, |
| "loss": 3.1678, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.18857524152136704, |
| "grad_norm": 0.19938114285469055, |
| "learning_rate": 0.0005690766730162752, |
| "loss": 3.1706, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.18915547303374047, |
| "grad_norm": 0.14962078630924225, |
| "learning_rate": 0.0005688216257170979, |
| "loss": 3.1665, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.1897357045461139, |
| "grad_norm": 0.14826686680316925, |
| "learning_rate": 0.0005685655885885337, |
| "loss": 3.1478, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.19031593605848734, |
| "grad_norm": 0.137392058968544, |
| "learning_rate": 0.0005683085625733382, |
| "loss": 3.1645, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.19089616757086078, |
| "grad_norm": 0.15559589862823486, |
| "learning_rate": 0.000568050548617908, |
| "loss": 3.1674, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.19147639908323422, |
| "grad_norm": 0.17506170272827148, |
| "learning_rate": 0.0005677915476722775, |
| "loss": 3.1606, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.19205663059560765, |
| "grad_norm": 0.1602877825498581, |
| "learning_rate": 0.0005675315606901155, |
| "loss": 3.1586, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.1926368621079811, |
| "grad_norm": 0.13343220949172974, |
| "learning_rate": 0.0005672705886287211, |
| "loss": 3.1553, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.19321709362035452, |
| "grad_norm": 0.15390737354755402, |
| "learning_rate": 0.0005670086324490208, |
| "loss": 3.1687, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.19379732513272796, |
| "grad_norm": 0.13513082265853882, |
| "learning_rate": 0.0005667456931155647, |
| "loss": 3.1543, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.1943775566451014, |
| "grad_norm": 0.1489078551530838, |
| "learning_rate": 0.0005664817715965231, |
| "loss": 3.1623, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.19495778815747483, |
| "grad_norm": 0.14149461686611176, |
| "learning_rate": 0.0005662168688636826, |
| "loss": 3.1487, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.19553801966984827, |
| "grad_norm": 0.150479257106781, |
| "learning_rate": 0.0005659509858924428, |
| "loss": 3.1588, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.1961182511822217, |
| "grad_norm": 0.15041102468967438, |
| "learning_rate": 0.0005656841236618127, |
| "loss": 3.155, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.19669848269459514, |
| "grad_norm": 0.14053913950920105, |
| "learning_rate": 0.0005654162831544068, |
| "loss": 3.1581, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.19727871420696858, |
| "grad_norm": 0.15485486388206482, |
| "learning_rate": 0.0005651474653564421, |
| "loss": 3.1465, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.197858945719342, |
| "grad_norm": 0.1425885111093521, |
| "learning_rate": 0.0005648776712577338, |
| "loss": 3.1535, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.19843917723171545, |
| "grad_norm": 0.1361316442489624, |
| "learning_rate": 0.0005646069018516921, |
| "loss": 3.1466, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.19901940874408888, |
| "grad_norm": 0.15521439909934998, |
| "learning_rate": 0.0005643351581353184, |
| "loss": 3.1415, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.19959964025646232, |
| "grad_norm": 0.14644280076026917, |
| "learning_rate": 0.0005640624411092014, |
| "loss": 3.1411, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.20017987176883575, |
| "grad_norm": 0.14116531610488892, |
| "learning_rate": 0.0005637887517775137, |
| "loss": 3.1542, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.2007601032812092, |
| "grad_norm": 0.1301729828119278, |
| "learning_rate": 0.0005635140911480082, |
| "loss": 3.1448, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.20134033479358263, |
| "grad_norm": 0.16307103633880615, |
| "learning_rate": 0.000563238460232014, |
| "loss": 3.1397, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.2019205663059561, |
| "grad_norm": 0.13141117990016937, |
| "learning_rate": 0.0005629618600444332, |
| "loss": 3.1469, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.20250079781832953, |
| "grad_norm": 0.13741467893123627, |
| "learning_rate": 0.0005626842916037365, |
| "loss": 3.1419, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.20308102933070296, |
| "grad_norm": 0.16112880408763885, |
| "learning_rate": 0.0005624057559319601, |
| "loss": 3.1449, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2036612608430764, |
| "grad_norm": 0.153072327375412, |
| "learning_rate": 0.0005621262540547015, |
| "loss": 3.1365, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.20424149235544983, |
| "grad_norm": 0.1413891613483429, |
| "learning_rate": 0.0005618457870011158, |
| "loss": 3.1307, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.20482172386782327, |
| "grad_norm": 0.15589068830013275, |
| "learning_rate": 0.0005615643558039121, |
| "loss": 3.1418, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.2054019553801967, |
| "grad_norm": 0.12889379262924194, |
| "learning_rate": 0.0005612819614993496, |
| "loss": 3.1366, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.20598218689257014, |
| "grad_norm": 0.14375300705432892, |
| "learning_rate": 0.0005609986051272336, |
| "loss": 3.13, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.20656241840494358, |
| "grad_norm": 0.1587209552526474, |
| "learning_rate": 0.000560714287730912, |
| "loss": 3.1338, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.207142649917317, |
| "grad_norm": 0.15273341536521912, |
| "learning_rate": 0.0005604290103572714, |
| "loss": 3.1393, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.20772288142969045, |
| "grad_norm": 0.13435807824134827, |
| "learning_rate": 0.0005601427740567328, |
| "loss": 3.137, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.20830311294206388, |
| "grad_norm": 0.1391715109348297, |
| "learning_rate": 0.0005598555798832482, |
| "loss": 3.1347, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.20888334445443732, |
| "grad_norm": 0.16318084299564362, |
| "learning_rate": 0.0005595674288942969, |
| "loss": 3.1279, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.20946357596681076, |
| "grad_norm": 0.1386035829782486, |
| "learning_rate": 0.0005592783221508807, |
| "loss": 3.1335, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.2100438074791842, |
| "grad_norm": 0.14639577269554138, |
| "learning_rate": 0.000558988260717521, |
| "loss": 3.142, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.21062403899155763, |
| "grad_norm": 0.13666051626205444, |
| "learning_rate": 0.0005586972456622546, |
| "loss": 3.1287, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.21120427050393106, |
| "grad_norm": 0.14930284023284912, |
| "learning_rate": 0.0005584052780566293, |
| "loss": 3.1283, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.2117845020163045, |
| "grad_norm": 0.13987945020198822, |
| "learning_rate": 0.0005581123589757002, |
| "loss": 3.1329, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.21236473352867793, |
| "grad_norm": 0.1452946811914444, |
| "learning_rate": 0.0005578184894980263, |
| "loss": 3.1294, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.21294496504105137, |
| "grad_norm": 0.15192043781280518, |
| "learning_rate": 0.0005575236707056657, |
| "loss": 3.1206, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.2135251965534248, |
| "grad_norm": 0.16006827354431152, |
| "learning_rate": 0.0005572279036841721, |
| "loss": 3.1273, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.21410542806579824, |
| "grad_norm": 0.18141302466392517, |
| "learning_rate": 0.0005569311895225906, |
| "loss": 3.1245, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.21468565957817168, |
| "grad_norm": 0.14263153076171875, |
| "learning_rate": 0.0005566335293134539, |
| "loss": 3.1211, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.21526589109054511, |
| "grad_norm": 0.1435001790523529, |
| "learning_rate": 0.0005563349241527781, |
| "loss": 3.1258, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.21584612260291858, |
| "grad_norm": 0.15155887603759766, |
| "learning_rate": 0.0005560353751400585, |
| "loss": 3.1233, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.216426354115292, |
| "grad_norm": 0.1545734703540802, |
| "learning_rate": 0.0005557348833782663, |
| "loss": 3.1292, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.21700658562766545, |
| "grad_norm": 0.15549300611019135, |
| "learning_rate": 0.0005554334499738433, |
| "loss": 3.1142, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.21758681714003889, |
| "grad_norm": 0.15990693867206573, |
| "learning_rate": 0.000555131076036699, |
| "loss": 3.125, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.21816704865241232, |
| "grad_norm": 0.16630201041698456, |
| "learning_rate": 0.0005548277626802058, |
| "loss": 3.1216, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.21874728016478576, |
| "grad_norm": 0.1408713161945343, |
| "learning_rate": 0.0005545235110211954, |
| "loss": 3.1111, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.2193275116771592, |
| "grad_norm": 0.1488475650548935, |
| "learning_rate": 0.0005542183221799544, |
| "loss": 3.1253, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.21990774318953263, |
| "grad_norm": 0.14259935915470123, |
| "learning_rate": 0.0005539121972802198, |
| "loss": 3.1179, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.22048797470190606, |
| "grad_norm": 0.14055614173412323, |
| "learning_rate": 0.0005536051374491757, |
| "loss": 3.1113, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.2210682062142795, |
| "grad_norm": 0.1665177196264267, |
| "learning_rate": 0.0005532971438174485, |
| "loss": 3.1197, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.22164843772665294, |
| "grad_norm": 0.15349626541137695, |
| "learning_rate": 0.0005529882175191031, |
| "loss": 3.1086, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.22222866923902637, |
| "grad_norm": 0.14321498572826385, |
| "learning_rate": 0.0005526783596916385, |
| "loss": 3.1161, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.2228089007513998, |
| "grad_norm": 0.14768148958683014, |
| "learning_rate": 0.0005523675714759835, |
| "loss": 3.1164, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.22338913226377324, |
| "grad_norm": 0.1546637862920761, |
| "learning_rate": 0.000552055854016493, |
| "loss": 3.1185, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.22396936377614668, |
| "grad_norm": 0.16114896535873413, |
| "learning_rate": 0.0005517432084609434, |
| "loss": 3.1083, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.22454959528852012, |
| "grad_norm": 0.13796792924404144, |
| "learning_rate": 0.0005514296359605284, |
| "loss": 3.102, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.22512982680089355, |
| "grad_norm": 0.13948635756969452, |
| "learning_rate": 0.0005511151376698546, |
| "loss": 3.1079, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.225710058313267, |
| "grad_norm": 0.13826532661914825, |
| "learning_rate": 0.0005507997147469378, |
| "loss": 3.107, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.22629028982564042, |
| "grad_norm": 0.1437525451183319, |
| "learning_rate": 0.0005504833683531981, |
| "loss": 3.1076, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.22687052133801386, |
| "grad_norm": 0.14256474375724792, |
| "learning_rate": 0.0005501660996534563, |
| "loss": 3.1056, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.2274507528503873, |
| "grad_norm": 0.1531156748533249, |
| "learning_rate": 0.0005498479098159289, |
| "loss": 3.101, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.22803098436276073, |
| "grad_norm": 0.16901366412639618, |
| "learning_rate": 0.0005495288000122242, |
| "loss": 3.0981, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.22861121587513417, |
| "grad_norm": 0.1440243273973465, |
| "learning_rate": 0.0005492087714173378, |
| "loss": 3.1052, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.2291914473875076, |
| "grad_norm": 0.1603139340877533, |
| "learning_rate": 0.0005488878252096487, |
| "loss": 3.105, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.22977167889988107, |
| "grad_norm": 0.1588706523180008, |
| "learning_rate": 0.0005485659625709144, |
| "loss": 3.1107, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.2303519104122545, |
| "grad_norm": 0.1452343761920929, |
| "learning_rate": 0.0005482431846862667, |
| "loss": 3.1074, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.23093214192462794, |
| "grad_norm": 0.15799881517887115, |
| "learning_rate": 0.0005479194927442078, |
| "loss": 3.0985, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.23151237343700137, |
| "grad_norm": 0.12657681107521057, |
| "learning_rate": 0.0005475948879366053, |
| "loss": 3.0958, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.2320926049493748, |
| "grad_norm": 0.13606688380241394, |
| "learning_rate": 0.000547269371458688, |
| "loss": 3.0999, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.2320926049493748, |
| "eval_loss": 3.0630993843078613, |
| "eval_runtime": 3.264, |
| "eval_samples_per_second": 1326.576, |
| "eval_steps_per_second": 2.757, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.23267283646174824, |
| "grad_norm": 0.16136619448661804, |
| "learning_rate": 0.0005469429445090417, |
| "loss": 3.1004, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.23325306797412168, |
| "grad_norm": 0.14767828583717346, |
| "learning_rate": 0.0005466156082896047, |
| "loss": 3.1075, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.23383329948649512, |
| "grad_norm": 0.1492021530866623, |
| "learning_rate": 0.0005462873640056632, |
| "loss": 3.1025, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.23441353099886855, |
| "grad_norm": 0.14654645323753357, |
| "learning_rate": 0.000545958212865847, |
| "loss": 3.0966, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.234993762511242, |
| "grad_norm": 0.15648731589317322, |
| "learning_rate": 0.0005456281560821252, |
| "loss": 3.0937, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.23557399402361542, |
| "grad_norm": 0.13584694266319275, |
| "learning_rate": 0.0005452971948698014, |
| "loss": 3.1052, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.23615422553598886, |
| "grad_norm": 0.13829472661018372, |
| "learning_rate": 0.0005449653304475094, |
| "loss": 3.0933, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.2367344570483623, |
| "grad_norm": 0.16889816522598267, |
| "learning_rate": 0.0005446325640372088, |
| "loss": 3.0949, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.23731468856073573, |
| "grad_norm": 0.12351599335670471, |
| "learning_rate": 0.0005442988968641804, |
| "loss": 3.0914, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.23789492007310917, |
| "grad_norm": 0.14327877759933472, |
| "learning_rate": 0.0005439643301570216, |
| "loss": 3.0814, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.2384751515854826, |
| "grad_norm": 0.15155468881130219, |
| "learning_rate": 0.0005436288651476421, |
| "loss": 3.0849, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.23905538309785604, |
| "grad_norm": 0.14292922616004944, |
| "learning_rate": 0.0005432925030712594, |
| "loss": 3.0887, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.23963561461022947, |
| "grad_norm": 0.14884264767169952, |
| "learning_rate": 0.0005429552451663936, |
| "loss": 3.0911, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.2402158461226029, |
| "grad_norm": 0.1403530389070511, |
| "learning_rate": 0.0005426170926748639, |
| "loss": 3.0926, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.24079607763497635, |
| "grad_norm": 0.14543718099594116, |
| "learning_rate": 0.0005422780468417829, |
| "loss": 3.0897, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.24137630914734978, |
| "grad_norm": 0.12813718616962433, |
| "learning_rate": 0.0005419381089155532, |
| "loss": 3.0902, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.24195654065972322, |
| "grad_norm": 0.13375824689865112, |
| "learning_rate": 0.0005415972801478617, |
| "loss": 3.0915, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.24253677217209665, |
| "grad_norm": 0.14347635209560394, |
| "learning_rate": 0.0005412555617936755, |
| "loss": 3.0892, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.2431170036844701, |
| "grad_norm": 0.14166522026062012, |
| "learning_rate": 0.0005409129551112377, |
| "loss": 3.0808, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.24369723519684355, |
| "grad_norm": 0.13924048840999603, |
| "learning_rate": 0.0005405694613620617, |
| "loss": 3.0854, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.244277466709217, |
| "grad_norm": 0.13338492810726166, |
| "learning_rate": 0.0005402250818109276, |
| "loss": 3.0836, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.24485769822159043, |
| "grad_norm": 0.14531342685222626, |
| "learning_rate": 0.0005398798177258768, |
| "loss": 3.0971, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.24543792973396386, |
| "grad_norm": 0.1432162970304489, |
| "learning_rate": 0.0005395336703782082, |
| "loss": 3.0838, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.2460181612463373, |
| "grad_norm": 0.15475274622440338, |
| "learning_rate": 0.0005391866410424722, |
| "loss": 3.0764, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.24659839275871073, |
| "grad_norm": 0.15521539747714996, |
| "learning_rate": 0.0005388387309964675, |
| "loss": 3.0837, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.24717862427108417, |
| "grad_norm": 0.1430870145559311, |
| "learning_rate": 0.0005384899415212351, |
| "loss": 3.0889, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.2477588557834576, |
| "grad_norm": 0.14807622134685516, |
| "learning_rate": 0.0005381402739010545, |
| "loss": 3.0769, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.24833908729583104, |
| "grad_norm": 0.1509249359369278, |
| "learning_rate": 0.0005377897294234385, |
| "loss": 3.0815, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.24891931880820448, |
| "grad_norm": 0.1451188027858734, |
| "learning_rate": 0.0005374383093791287, |
| "loss": 3.0766, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.2494995503205779, |
| "grad_norm": 0.130240797996521, |
| "learning_rate": 0.0005370860150620901, |
| "loss": 3.0824, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.2500797818329513, |
| "grad_norm": 0.14696471393108368, |
| "learning_rate": 0.0005367328477695077, |
| "loss": 3.0678, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.2506600133453248, |
| "grad_norm": 0.13198255002498627, |
| "learning_rate": 0.0005363788088017803, |
| "loss": 3.0759, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.25124024485769825, |
| "grad_norm": 0.1413690447807312, |
| "learning_rate": 0.0005360238994625166, |
| "loss": 3.0842, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.25182047637007166, |
| "grad_norm": 0.1560727059841156, |
| "learning_rate": 0.0005356681210585297, |
| "loss": 3.074, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.2524007078824451, |
| "grad_norm": 0.13727669417858124, |
| "learning_rate": 0.0005353114748998332, |
| "loss": 3.082, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.2529809393948185, |
| "grad_norm": 0.1479531228542328, |
| "learning_rate": 0.0005349539622996356, |
| "loss": 3.0804, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.253561170907192, |
| "grad_norm": 0.13756506145000458, |
| "learning_rate": 0.0005345955845743358, |
| "loss": 3.0829, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.2541414024195654, |
| "grad_norm": 0.14778585731983185, |
| "learning_rate": 0.0005342363430435177, |
| "loss": 3.0785, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.25472163393193886, |
| "grad_norm": 0.13227440416812897, |
| "learning_rate": 0.0005338762390299467, |
| "loss": 3.0776, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.25530186544431227, |
| "grad_norm": 0.14178766310214996, |
| "learning_rate": 0.0005335152738595634, |
| "loss": 3.0799, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.25588209695668573, |
| "grad_norm": 0.14833244681358337, |
| "learning_rate": 0.0005331534488614794, |
| "loss": 3.0674, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.25646232846905914, |
| "grad_norm": 0.13829241693019867, |
| "learning_rate": 0.0005327907653679721, |
| "loss": 3.0643, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.2570425599814326, |
| "grad_norm": 0.16908784210681915, |
| "learning_rate": 0.0005324272247144802, |
| "loss": 3.0649, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.257622791493806, |
| "grad_norm": 0.14392369985580444, |
| "learning_rate": 0.0005320628282395985, |
| "loss": 3.0761, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.2582030230061795, |
| "grad_norm": 0.16387993097305298, |
| "learning_rate": 0.0005316975772850729, |
| "loss": 3.0666, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.2587832545185529, |
| "grad_norm": 0.13506962358951569, |
| "learning_rate": 0.0005313314731957957, |
| "loss": 3.0672, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.25936348603092635, |
| "grad_norm": 0.1522989273071289, |
| "learning_rate": 0.0005309645173198007, |
| "loss": 3.0607, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.25994371754329976, |
| "grad_norm": 0.13824021816253662, |
| "learning_rate": 0.0005305967110082576, |
| "loss": 3.0627, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.2605239490556732, |
| "grad_norm": 0.13685718178749084, |
| "learning_rate": 0.000530228055615468, |
| "loss": 3.0612, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.26110418056804663, |
| "grad_norm": 0.13309134542942047, |
| "learning_rate": 0.0005298585524988594, |
| "loss": 3.0548, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.2616844120804201, |
| "grad_norm": 0.17121103405952454, |
| "learning_rate": 0.0005294882030189812, |
| "loss": 3.066, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.2622646435927935, |
| "grad_norm": 0.13467055559158325, |
| "learning_rate": 0.000529117008539499, |
| "loss": 3.0606, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.26284487510516696, |
| "grad_norm": 0.12970523536205292, |
| "learning_rate": 0.0005287449704271896, |
| "loss": 3.0553, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.26342510661754037, |
| "grad_norm": 0.1509917676448822, |
| "learning_rate": 0.0005283720900519365, |
| "loss": 3.0571, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.26400533812991384, |
| "grad_norm": 0.1372883915901184, |
| "learning_rate": 0.0005279983687867243, |
| "loss": 3.0635, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.2645855696422873, |
| "grad_norm": 0.1482354998588562, |
| "learning_rate": 0.0005276238080076335, |
| "loss": 3.0619, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.2651658011546607, |
| "grad_norm": 0.13884900510311127, |
| "learning_rate": 0.0005272484090938365, |
| "loss": 3.069, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.26574603266703417, |
| "grad_norm": 0.14500798285007477, |
| "learning_rate": 0.0005268721734275914, |
| "loss": 3.0715, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.2663262641794076, |
| "grad_norm": 0.1357218474149704, |
| "learning_rate": 0.000526495102394237, |
| "loss": 3.0584, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.26690649569178104, |
| "grad_norm": 0.14025723934173584, |
| "learning_rate": 0.0005261171973821887, |
| "loss": 3.0613, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.26748672720415445, |
| "grad_norm": 0.15253092348575592, |
| "learning_rate": 0.0005257384597829322, |
| "loss": 3.0584, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.2680669587165279, |
| "grad_norm": 0.14573270082473755, |
| "learning_rate": 0.0005253588909910191, |
| "loss": 3.0634, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.2686471902289013, |
| "grad_norm": 0.15005233883857727, |
| "learning_rate": 0.0005249784924040614, |
| "loss": 3.0526, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.2692274217412748, |
| "grad_norm": 0.15314225852489471, |
| "learning_rate": 0.0005245972654227265, |
| "loss": 3.0635, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.2698076532536482, |
| "grad_norm": 0.14412705600261688, |
| "learning_rate": 0.0005242152114507321, |
| "loss": 3.055, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.27038788476602166, |
| "grad_norm": 0.15046367049217224, |
| "learning_rate": 0.0005238323318948412, |
| "loss": 3.066, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.27096811627839507, |
| "grad_norm": 0.12618590891361237, |
| "learning_rate": 0.0005234486281648559, |
| "loss": 3.0433, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.27154834779076853, |
| "grad_norm": 0.14097653329372406, |
| "learning_rate": 0.000523064101673614, |
| "loss": 3.0593, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.27212857930314194, |
| "grad_norm": 0.14015048742294312, |
| "learning_rate": 0.0005226787538369821, |
| "loss": 3.057, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.2727088108155154, |
| "grad_norm": 0.1534152328968048, |
| "learning_rate": 0.0005222925860738513, |
| "loss": 3.06, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.2732890423278888, |
| "grad_norm": 0.1350966989994049, |
| "learning_rate": 0.0005219055998061319, |
| "loss": 3.0518, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.2738692738402623, |
| "grad_norm": 0.15589705109596252, |
| "learning_rate": 0.0005215177964587478, |
| "loss": 3.0468, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.2744495053526357, |
| "grad_norm": 0.14144299924373627, |
| "learning_rate": 0.0005211291774596316, |
| "loss": 3.0555, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.27502973686500914, |
| "grad_norm": 0.14553704857826233, |
| "learning_rate": 0.000520739744239719, |
| "loss": 3.0531, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.27560996837738255, |
| "grad_norm": 0.15157508850097656, |
| "learning_rate": 0.0005203494982329441, |
| "loss": 3.0504, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.276190199889756, |
| "grad_norm": 0.14391539990901947, |
| "learning_rate": 0.0005199584408762335, |
| "loss": 3.0512, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.2767704314021294, |
| "grad_norm": 0.1297539621591568, |
| "learning_rate": 0.0005195665736095013, |
| "loss": 3.036, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.2773506629145029, |
| "grad_norm": 0.13723768293857574, |
| "learning_rate": 0.0005191738978756439, |
| "loss": 3.0532, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.2779308944268763, |
| "grad_norm": 0.1422174870967865, |
| "learning_rate": 0.0005187804151205345, |
| "loss": 3.0605, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.27851112593924976, |
| "grad_norm": 0.137346088886261, |
| "learning_rate": 0.0005183861267930177, |
| "loss": 3.0552, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.2790913574516232, |
| "grad_norm": 0.13471810519695282, |
| "learning_rate": 0.0005179910343449046, |
| "loss": 3.0426, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.27967158896399663, |
| "grad_norm": 0.12727439403533936, |
| "learning_rate": 0.0005175951392309669, |
| "loss": 3.0448, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.2802518204763701, |
| "grad_norm": 0.13242101669311523, |
| "learning_rate": 0.0005171984429089318, |
| "loss": 3.0546, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.2808320519887435, |
| "grad_norm": 0.14276637136936188, |
| "learning_rate": 0.0005168009468394769, |
| "loss": 3.0392, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.28141228350111697, |
| "grad_norm": 0.1340208798646927, |
| "learning_rate": 0.0005164026524862242, |
| "loss": 3.0491, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.2819925150134904, |
| "grad_norm": 0.14000356197357178, |
| "learning_rate": 0.0005160035613157354, |
| "loss": 3.0396, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.28257274652586384, |
| "grad_norm": 0.15974439680576324, |
| "learning_rate": 0.0005156036747975059, |
| "loss": 3.0406, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.28315297803823725, |
| "grad_norm": 0.1382746398448944, |
| "learning_rate": 0.0005152029944039597, |
| "loss": 3.0449, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.2837332095506107, |
| "grad_norm": 0.14049001038074493, |
| "learning_rate": 0.000514801521610444, |
| "loss": 3.0463, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.2843134410629841, |
| "grad_norm": 0.13699445128440857, |
| "learning_rate": 0.0005143992578952238, |
| "loss": 3.0393, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.2848936725753576, |
| "grad_norm": 0.1515870988368988, |
| "learning_rate": 0.0005139962047394761, |
| "loss": 3.0399, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.285473904087731, |
| "grad_norm": 0.1437605917453766, |
| "learning_rate": 0.0005135923636272849, |
| "loss": 3.0378, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.28605413560010445, |
| "grad_norm": 0.13769088685512543, |
| "learning_rate": 0.0005131877360456355, |
| "loss": 3.0377, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.28663436711247786, |
| "grad_norm": 0.15194256603717804, |
| "learning_rate": 0.000512782323484409, |
| "loss": 3.0399, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.2872145986248513, |
| "grad_norm": 0.14672812819480896, |
| "learning_rate": 0.0005123761274363769, |
| "loss": 3.04, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.28779483013722473, |
| "grad_norm": 0.13162557780742645, |
| "learning_rate": 0.0005119691493971957, |
| "loss": 3.0317, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.2883750616495982, |
| "grad_norm": 0.13286751508712769, |
| "learning_rate": 0.0005115613908654011, |
| "loss": 3.0486, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.2889552931619716, |
| "grad_norm": 0.13034851849079132, |
| "learning_rate": 0.0005111528533424027, |
| "loss": 3.0399, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.28953552467434507, |
| "grad_norm": 0.1405908614397049, |
| "learning_rate": 0.0005107435383324786, |
| "loss": 3.0372, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.2901157561867185, |
| "grad_norm": 0.16415055096149445, |
| "learning_rate": 0.0005103334473427695, |
| "loss": 3.0333, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.2901157561867185, |
| "eval_loss": 2.9981322288513184, |
| "eval_runtime": 3.2581, |
| "eval_samples_per_second": 1329.001, |
| "eval_steps_per_second": 2.762, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.29069598769909194, |
| "grad_norm": 0.12301915884017944, |
| "learning_rate": 0.0005099225818832731, |
| "loss": 3.0312, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.29127621921146535, |
| "grad_norm": 0.16767041385173798, |
| "learning_rate": 0.0005095109434668395, |
| "loss": 3.0247, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.2918564507238388, |
| "grad_norm": 0.13234609365463257, |
| "learning_rate": 0.0005090985336091642, |
| "loss": 3.0348, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.2924366822362123, |
| "grad_norm": 0.14020933210849762, |
| "learning_rate": 0.0005086853538287835, |
| "loss": 3.0317, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.2930169137485857, |
| "grad_norm": 0.14580604434013367, |
| "learning_rate": 0.0005082714056470687, |
| "loss": 3.0321, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.29359714526095915, |
| "grad_norm": 0.13627541065216064, |
| "learning_rate": 0.0005078566905882205, |
| "loss": 3.0318, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.29417737677333256, |
| "grad_norm": 0.12629657983779907, |
| "learning_rate": 0.0005074412101792631, |
| "loss": 3.0284, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.294757608285706, |
| "grad_norm": 0.13409367203712463, |
| "learning_rate": 0.0005070249659500387, |
| "loss": 3.0381, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.2953378397980794, |
| "grad_norm": 0.1341470181941986, |
| "learning_rate": 0.0005066079594332023, |
| "loss": 3.0229, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.2959180713104529, |
| "grad_norm": 0.1630919873714447, |
| "learning_rate": 0.0005061901921642156, |
| "loss": 3.0315, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.2964983028228263, |
| "grad_norm": 0.12825888395309448, |
| "learning_rate": 0.0005057716656813416, |
| "loss": 3.0249, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.29707853433519976, |
| "grad_norm": 0.1613105833530426, |
| "learning_rate": 0.0005053523815256384, |
| "loss": 3.0238, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.29765876584757317, |
| "grad_norm": 0.14038483798503876, |
| "learning_rate": 0.0005049323412409542, |
| "loss": 3.0294, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.29823899735994663, |
| "grad_norm": 0.16509568691253662, |
| "learning_rate": 0.0005045115463739215, |
| "loss": 3.0356, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.29881922887232004, |
| "grad_norm": 0.14289237558841705, |
| "learning_rate": 0.0005040899984739509, |
| "loss": 3.0228, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.2993994603846935, |
| "grad_norm": 0.14584140479564667, |
| "learning_rate": 0.000503667699093226, |
| "loss": 3.0294, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.2999796918970669, |
| "grad_norm": 0.12970221042633057, |
| "learning_rate": 0.0005032446497866973, |
| "loss": 3.0321, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.3005599234094404, |
| "grad_norm": 0.13744401931762695, |
| "learning_rate": 0.0005028208521120769, |
| "loss": 3.0236, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.3011401549218138, |
| "grad_norm": 0.1317235380411148, |
| "learning_rate": 0.0005023963076298321, |
| "loss": 3.0254, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.30172038643418725, |
| "grad_norm": 0.14213494956493378, |
| "learning_rate": 0.0005019710179031801, |
| "loss": 3.0275, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.30230061794656066, |
| "grad_norm": 0.13712069392204285, |
| "learning_rate": 0.0005015449844980823, |
| "loss": 3.0249, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.3028808494589341, |
| "grad_norm": 0.14411009848117828, |
| "learning_rate": 0.0005011182089832381, |
| "loss": 3.0215, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.30346108097130753, |
| "grad_norm": 0.12583871185779572, |
| "learning_rate": 0.0005006906929300799, |
| "loss": 3.0275, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.304041312483681, |
| "grad_norm": 0.14499635994434357, |
| "learning_rate": 0.0005002624379127666, |
| "loss": 3.0258, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.3046215439960544, |
| "grad_norm": 0.14918765425682068, |
| "learning_rate": 0.0004998334455081779, |
| "loss": 3.0209, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.30520177550842786, |
| "grad_norm": 0.13245496153831482, |
| "learning_rate": 0.0004994037172959089, |
| "loss": 3.0212, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.3057820070208013, |
| "grad_norm": 0.12850724160671234, |
| "learning_rate": 0.0004989732548582638, |
| "loss": 3.0258, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.30636223853317474, |
| "grad_norm": 0.1346123367547989, |
| "learning_rate": 0.0004985420597802503, |
| "loss": 3.0138, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.3069424700455482, |
| "grad_norm": 0.14746621251106262, |
| "learning_rate": 0.0004981101336495741, |
| "loss": 3.0202, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.3075227015579216, |
| "grad_norm": 0.140406534075737, |
| "learning_rate": 0.0004976774780566324, |
| "loss": 3.0276, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.30810293307029507, |
| "grad_norm": 0.133416548371315, |
| "learning_rate": 0.0004972440945945083, |
| "loss": 3.0228, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.3086831645826685, |
| "grad_norm": 0.140433207154274, |
| "learning_rate": 0.0004968099848589651, |
| "loss": 3.0219, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.30926339609504194, |
| "grad_norm": 0.14963370561599731, |
| "learning_rate": 0.0004963751504484403, |
| "loss": 3.0119, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.30984362760741535, |
| "grad_norm": 0.12273452430963516, |
| "learning_rate": 0.0004959395929640401, |
| "loss": 3.0136, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.3104238591197888, |
| "grad_norm": 0.14232607185840607, |
| "learning_rate": 0.0004955033140095322, |
| "loss": 3.0088, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.3110040906321622, |
| "grad_norm": 0.15276071429252625, |
| "learning_rate": 0.0004950663151913419, |
| "loss": 3.0189, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.3115843221445357, |
| "grad_norm": 0.14110638201236725, |
| "learning_rate": 0.0004946285981185446, |
| "loss": 3.0273, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.3121645536569091, |
| "grad_norm": 0.12971307337284088, |
| "learning_rate": 0.0004941901644028601, |
| "loss": 3.0181, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.31274478516928256, |
| "grad_norm": 0.12775759398937225, |
| "learning_rate": 0.0004937510156586474, |
| "loss": 3.0108, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.31332501668165597, |
| "grad_norm": 0.15120139718055725, |
| "learning_rate": 0.0004933111535028983, |
| "loss": 3.0142, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.31390524819402943, |
| "grad_norm": 0.14965811371803284, |
| "learning_rate": 0.0004928705795552312, |
| "loss": 3.0137, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.31448547970640284, |
| "grad_norm": 0.1459018588066101, |
| "learning_rate": 0.0004924292954378856, |
| "loss": 3.0146, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.3150657112187763, |
| "grad_norm": 0.1286230981349945, |
| "learning_rate": 0.0004919873027757159, |
| "loss": 3.0162, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.3156459427311497, |
| "grad_norm": 0.13560357689857483, |
| "learning_rate": 0.0004915446031961854, |
| "loss": 3.0129, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.3162261742435232, |
| "grad_norm": 0.1419978141784668, |
| "learning_rate": 0.0004911011983293601, |
| "loss": 3.0115, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.3168064057558966, |
| "grad_norm": 0.12910611927509308, |
| "learning_rate": 0.0004906570898079032, |
| "loss": 3.0151, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.31738663726827004, |
| "grad_norm": 0.15491628646850586, |
| "learning_rate": 0.0004902122792670692, |
| "loss": 3.0118, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.31796686878064345, |
| "grad_norm": 0.12448934465646744, |
| "learning_rate": 0.0004897667683446967, |
| "loss": 3.0119, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.3185471002930169, |
| "grad_norm": 0.1288510411977768, |
| "learning_rate": 0.0004893205586812036, |
| "loss": 3.0078, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.3191273318053903, |
| "grad_norm": 0.12903016805648804, |
| "learning_rate": 0.000488873651919581, |
| "loss": 3.0085, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.3197075633177638, |
| "grad_norm": 0.14042973518371582, |
| "learning_rate": 0.0004884260497053859, |
| "loss": 3.0093, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.32028779483013725, |
| "grad_norm": 0.13995361328125, |
| "learning_rate": 0.0004879777536867369, |
| "loss": 3.0009, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.32086802634251066, |
| "grad_norm": 0.13979199528694153, |
| "learning_rate": 0.00048752876551430677, |
| "loss": 3.0089, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.3214482578548841, |
| "grad_norm": 0.130417600274086, |
| "learning_rate": 0.0004870790868413171, |
| "loss": 3.0087, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.32202848936725753, |
| "grad_norm": 0.13676275312900543, |
| "learning_rate": 0.00048662871932353164, |
| "loss": 3.0092, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.322608720879631, |
| "grad_norm": 0.12869158387184143, |
| "learning_rate": 0.00048617766461925104, |
| "loss": 3.0074, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.3231889523920044, |
| "grad_norm": 0.13846737146377563, |
| "learning_rate": 0.0004857259243893058, |
| "loss": 3.0079, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.32376918390437787, |
| "grad_norm": 0.1349971890449524, |
| "learning_rate": 0.0004852735002970509, |
| "loss": 2.9915, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.3243494154167513, |
| "grad_norm": 0.13398951292037964, |
| "learning_rate": 0.000484820394008359, |
| "loss": 2.9982, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.32492964692912474, |
| "grad_norm": 0.13627557456493378, |
| "learning_rate": 0.0004843666071916152, |
| "loss": 3.0019, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.32550987844149815, |
| "grad_norm": 0.13470283150672913, |
| "learning_rate": 0.00048391214151771, |
| "loss": 3.0015, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.3260901099538716, |
| "grad_norm": 0.14207038283348083, |
| "learning_rate": 0.0004834569986600336, |
| "loss": 3.0051, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.326670341466245, |
| "grad_norm": 0.13324964046478271, |
| "learning_rate": 0.00048300118029446967, |
| "loss": 2.9956, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.3272505729786185, |
| "grad_norm": 0.15288645029067993, |
| "learning_rate": 0.0004825446880993892, |
| "loss": 3.0087, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.3278308044909919, |
| "grad_norm": 0.13744772970676422, |
| "learning_rate": 0.00048208752375564424, |
| "loss": 3.0049, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.32841103600336535, |
| "grad_norm": 0.13114534318447113, |
| "learning_rate": 0.00048162968894656193, |
| "loss": 2.9993, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.32899126751573876, |
| "grad_norm": 0.1254429966211319, |
| "learning_rate": 0.00048117118535793773, |
| "loss": 2.9937, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.3295714990281122, |
| "grad_norm": 0.15155521035194397, |
| "learning_rate": 0.00048071201467803017, |
| "loss": 3.0017, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.33015173054048563, |
| "grad_norm": 0.1420249044895172, |
| "learning_rate": 0.00048025217859755365, |
| "loss": 3.017, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.3307319620528591, |
| "grad_norm": 0.14615775644779205, |
| "learning_rate": 0.0004797916788096728, |
| "loss": 3.0052, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.3313121935652325, |
| "grad_norm": 0.12851493060588837, |
| "learning_rate": 0.00047933051700999605, |
| "loss": 3.0041, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.33189242507760597, |
| "grad_norm": 0.13371190428733826, |
| "learning_rate": 0.00047886869489656956, |
| "loss": 2.9879, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.3324726565899794, |
| "grad_norm": 0.13223771750926971, |
| "learning_rate": 0.0004784062141698707, |
| "loss": 2.993, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.33305288810235284, |
| "grad_norm": 0.13460920751094818, |
| "learning_rate": 0.00047794307653280184, |
| "loss": 2.9928, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.3336331196147263, |
| "grad_norm": 0.12678171694278717, |
| "learning_rate": 0.0004774792836906844, |
| "loss": 3.0053, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.3342133511270997, |
| "grad_norm": 0.14595790207386017, |
| "learning_rate": 0.0004770148373512522, |
| "loss": 2.9974, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.3347935826394732, |
| "grad_norm": 0.1505734771490097, |
| "learning_rate": 0.00047654973922464525, |
| "loss": 3.0053, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.3353738141518466, |
| "grad_norm": 0.13636811077594757, |
| "learning_rate": 0.00047608399102340367, |
| "loss": 2.9984, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.33595404566422005, |
| "grad_norm": 0.14487333595752716, |
| "learning_rate": 0.000475617594462461, |
| "loss": 3.0013, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.33653427717659345, |
| "grad_norm": 0.13392585515975952, |
| "learning_rate": 0.00047515055125913825, |
| "loss": 2.9897, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.3371145086889669, |
| "grad_norm": 0.1241224929690361, |
| "learning_rate": 0.0004746828631331376, |
| "loss": 2.9918, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.3376947402013403, |
| "grad_norm": 0.1381169706583023, |
| "learning_rate": 0.00047421453180653553, |
| "loss": 2.9874, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.3382749717137138, |
| "grad_norm": 0.12413561344146729, |
| "learning_rate": 0.00047374555900377716, |
| "loss": 2.9928, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.3388552032260872, |
| "grad_norm": 0.13286706805229187, |
| "learning_rate": 0.0004732759464516694, |
| "loss": 2.9907, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.33943543473846066, |
| "grad_norm": 0.1558184027671814, |
| "learning_rate": 0.0004728056958793749, |
| "loss": 3.0036, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.34001566625083407, |
| "grad_norm": 0.13220670819282532, |
| "learning_rate": 0.0004723348090184056, |
| "loss": 2.9945, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.34059589776320753, |
| "grad_norm": 0.13015997409820557, |
| "learning_rate": 0.00047186328760261603, |
| "loss": 3.0005, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.34117612927558094, |
| "grad_norm": 0.146441251039505, |
| "learning_rate": 0.0004713911333681976, |
| "loss": 2.9984, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.3417563607879544, |
| "grad_norm": 0.12352869659662247, |
| "learning_rate": 0.0004709183480536718, |
| "loss": 2.9946, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.3423365923003278, |
| "grad_norm": 0.12516902387142181, |
| "learning_rate": 0.0004704449333998834, |
| "loss": 2.9918, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.3429168238127013, |
| "grad_norm": 0.14155182242393494, |
| "learning_rate": 0.00046997089114999494, |
| "loss": 2.9937, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.3434970553250747, |
| "grad_norm": 0.12636148929595947, |
| "learning_rate": 0.0004694962230494796, |
| "loss": 2.9869, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.34407728683744815, |
| "grad_norm": 0.14390048384666443, |
| "learning_rate": 0.000469020930846115, |
| "loss": 2.9759, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.34465751834982156, |
| "grad_norm": 0.14705798029899597, |
| "learning_rate": 0.0004685450162899768, |
| "loss": 2.9876, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.345237749862195, |
| "grad_norm": 0.13937653601169586, |
| "learning_rate": 0.00046806848113343234, |
| "loss": 2.9872, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.34581798137456843, |
| "grad_norm": 0.13351042568683624, |
| "learning_rate": 0.00046759132713113403, |
| "loss": 2.986, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.3463982128869419, |
| "grad_norm": 0.133000910282135, |
| "learning_rate": 0.0004671135560400127, |
| "loss": 2.9886, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.3469784443993153, |
| "grad_norm": 0.1261400580406189, |
| "learning_rate": 0.0004666351696192718, |
| "loss": 2.9811, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.34755867591168876, |
| "grad_norm": 0.13575439155101776, |
| "learning_rate": 0.00046615616963038007, |
| "loss": 2.9796, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.3481389074240622, |
| "grad_norm": 0.13202066719532013, |
| "learning_rate": 0.0004656765578370657, |
| "loss": 2.9958, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.3481389074240622, |
| "eval_loss": 2.949599027633667, |
| "eval_runtime": 3.2655, |
| "eval_samples_per_second": 1325.986, |
| "eval_steps_per_second": 2.756, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.34871913893643564, |
| "grad_norm": 0.14002783596515656, |
| "learning_rate": 0.0004651963360053096, |
| "loss": 2.9811, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.3492993704488091, |
| "grad_norm": 0.1519598364830017, |
| "learning_rate": 0.00046471550590333874, |
| "loss": 2.9884, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.3498796019611825, |
| "grad_norm": 0.1435564160346985, |
| "learning_rate": 0.00046423406930162, |
| "loss": 2.9831, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.35045983347355597, |
| "grad_norm": 0.1241581067442894, |
| "learning_rate": 0.0004637520279728534, |
| "loss": 2.9801, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.3510400649859294, |
| "grad_norm": 0.124722421169281, |
| "learning_rate": 0.00046326938369196566, |
| "loss": 2.9872, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.35162029649830284, |
| "grad_norm": 0.12400694936513901, |
| "learning_rate": 0.0004627861382361034, |
| "loss": 2.9863, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.35220052801067625, |
| "grad_norm": 0.14388398826122284, |
| "learning_rate": 0.0004623022933846272, |
| "loss": 2.973, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.3527807595230497, |
| "grad_norm": 0.14111004769802094, |
| "learning_rate": 0.0004618178509191045, |
| "loss": 2.9902, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.3533609910354231, |
| "grad_norm": 0.1257510930299759, |
| "learning_rate": 0.000461332812623303, |
| "loss": 2.9877, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.3539412225477966, |
| "grad_norm": 0.1282566338777542, |
| "learning_rate": 0.00046084718028318466, |
| "loss": 2.9832, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.35452145406017, |
| "grad_norm": 0.14325213432312012, |
| "learning_rate": 0.00046036095568689864, |
| "loss": 2.9782, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.35510168557254346, |
| "grad_norm": 0.1563083529472351, |
| "learning_rate": 0.0004598741406247748, |
| "loss": 2.9793, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.35568191708491687, |
| "grad_norm": 0.1327456384897232, |
| "learning_rate": 0.0004593867368893172, |
| "loss": 2.9843, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.35626214859729033, |
| "grad_norm": 0.13930997252464294, |
| "learning_rate": 0.0004588987462751975, |
| "loss": 2.976, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.35684238010966374, |
| "grad_norm": 0.1295255720615387, |
| "learning_rate": 0.00045841017057924807, |
| "loss": 2.9801, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.3574226116220372, |
| "grad_norm": 0.1404607594013214, |
| "learning_rate": 0.00045792101160045613, |
| "loss": 2.9788, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.3580028431344106, |
| "grad_norm": 0.12297389656305313, |
| "learning_rate": 0.0004574312711399561, |
| "loss": 2.9853, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.3585830746467841, |
| "grad_norm": 0.15521986782550812, |
| "learning_rate": 0.0004569409510010236, |
| "loss": 2.9825, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.3591633061591575, |
| "grad_norm": 0.12915629148483276, |
| "learning_rate": 0.00045645005298906887, |
| "loss": 2.984, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.35974353767153094, |
| "grad_norm": 0.12852182984352112, |
| "learning_rate": 0.00045595857891162964, |
| "loss": 2.9703, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.36032376918390435, |
| "grad_norm": 0.1300152987241745, |
| "learning_rate": 0.00045546653057836517, |
| "loss": 2.971, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.3609040006962778, |
| "grad_norm": 0.13348935544490814, |
| "learning_rate": 0.00045497390980104885, |
| "loss": 2.9762, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.3614842322086513, |
| "grad_norm": 0.13476519286632538, |
| "learning_rate": 0.00045448071839356203, |
| "loss": 2.9756, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.3620644637210247, |
| "grad_norm": 0.13884297013282776, |
| "learning_rate": 0.000453986958171887, |
| "loss": 2.9829, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.36264469523339815, |
| "grad_norm": 0.12928573787212372, |
| "learning_rate": 0.00045349263095410087, |
| "loss": 2.9752, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.36322492674577156, |
| "grad_norm": 0.13350141048431396, |
| "learning_rate": 0.000452997738560368, |
| "loss": 2.9748, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.363805158258145, |
| "grad_norm": 0.13747799396514893, |
| "learning_rate": 0.00045250228281293423, |
| "loss": 2.9705, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.36438538977051843, |
| "grad_norm": 0.1344989687204361, |
| "learning_rate": 0.00045200626553611943, |
| "loss": 2.9801, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.3649656212828919, |
| "grad_norm": 0.1321888118982315, |
| "learning_rate": 0.00045150968855631104, |
| "loss": 2.9781, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.3655458527952653, |
| "grad_norm": 0.12561041116714478, |
| "learning_rate": 0.0004510125537019577, |
| "loss": 2.973, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.36612608430763877, |
| "grad_norm": 0.13948814570903778, |
| "learning_rate": 0.00045051486280356194, |
| "loss": 2.9731, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.3667063158200122, |
| "grad_norm": 0.12595129013061523, |
| "learning_rate": 0.0004500166176936739, |
| "loss": 2.9659, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.36728654733238564, |
| "grad_norm": 0.12941335141658783, |
| "learning_rate": 0.00044951782020688415, |
| "loss": 2.973, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.36786677884475905, |
| "grad_norm": 0.14215658605098724, |
| "learning_rate": 0.00044901847217981736, |
| "loss": 2.975, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.3684470103571325, |
| "grad_norm": 0.12309448421001434, |
| "learning_rate": 0.00044851857545112525, |
| "loss": 2.9749, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.3690272418695059, |
| "grad_norm": 0.12824192643165588, |
| "learning_rate": 0.00044801813186147986, |
| "loss": 2.9672, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.3696074733818794, |
| "grad_norm": 0.12063992768526077, |
| "learning_rate": 0.00044751714325356697, |
| "loss": 2.9708, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.3701877048942528, |
| "grad_norm": 0.12898465991020203, |
| "learning_rate": 0.0004470156114720792, |
| "loss": 2.9699, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.37076793640662625, |
| "grad_norm": 0.1321457326412201, |
| "learning_rate": 0.00044651353836370897, |
| "loss": 2.9661, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.37134816791899966, |
| "grad_norm": 0.13804246485233307, |
| "learning_rate": 0.0004460109257771422, |
| "loss": 2.9783, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.3719283994313731, |
| "grad_norm": 0.12447643280029297, |
| "learning_rate": 0.00044550777556305094, |
| "loss": 2.9691, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.37250863094374653, |
| "grad_norm": 0.1610770970582962, |
| "learning_rate": 0.00044500408957408706, |
| "loss": 2.972, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.37308886245612, |
| "grad_norm": 0.1278504580259323, |
| "learning_rate": 0.00044449986966487527, |
| "loss": 2.9694, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.3736690939684934, |
| "grad_norm": 0.13527578115463257, |
| "learning_rate": 0.0004439951176920059, |
| "loss": 2.9707, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.37424932548086687, |
| "grad_norm": 0.14050637185573578, |
| "learning_rate": 0.0004434898355140287, |
| "loss": 2.9712, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.3748295569932403, |
| "grad_norm": 0.1513315588235855, |
| "learning_rate": 0.00044298402499144554, |
| "loss": 2.9705, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.37540978850561374, |
| "grad_norm": 0.1299854964017868, |
| "learning_rate": 0.00044247768798670367, |
| "loss": 2.9662, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.3759900200179872, |
| "grad_norm": 0.1321675330400467, |
| "learning_rate": 0.00044197082636418907, |
| "loss": 2.9675, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.3765702515303606, |
| "grad_norm": 0.1453583687543869, |
| "learning_rate": 0.00044146344199021934, |
| "loss": 2.9639, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.3771504830427341, |
| "grad_norm": 0.13450521230697632, |
| "learning_rate": 0.00044095553673303685, |
| "loss": 2.9661, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.3777307145551075, |
| "grad_norm": 0.13579097390174866, |
| "learning_rate": 0.00044044711246280215, |
| "loss": 2.9608, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.37831094606748095, |
| "grad_norm": 0.1469910442829132, |
| "learning_rate": 0.00043993817105158627, |
| "loss": 2.9686, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.37889117757985435, |
| "grad_norm": 0.1311839371919632, |
| "learning_rate": 0.00043942871437336527, |
| "loss": 2.9636, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.3794714090922278, |
| "grad_norm": 0.15060357749462128, |
| "learning_rate": 0.0004389187443040116, |
| "loss": 2.9613, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.3800516406046012, |
| "grad_norm": 0.13408997654914856, |
| "learning_rate": 0.00043840826272128873, |
| "loss": 2.9626, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.3806318721169747, |
| "grad_norm": 0.1458410769701004, |
| "learning_rate": 0.0004378972715048434, |
| "loss": 2.9604, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.3812121036293481, |
| "grad_norm": 0.13342171907424927, |
| "learning_rate": 0.0004373857725361984, |
| "loss": 2.9602, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.38179233514172156, |
| "grad_norm": 0.12624911963939667, |
| "learning_rate": 0.00043687376769874686, |
| "loss": 2.9703, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.38237256665409497, |
| "grad_norm": 0.13120518624782562, |
| "learning_rate": 0.0004363612588777442, |
| "loss": 2.9601, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.38295279816646843, |
| "grad_norm": 0.1357596516609192, |
| "learning_rate": 0.00043584824796030145, |
| "loss": 2.9561, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.38353302967884184, |
| "grad_norm": 0.1270647495985031, |
| "learning_rate": 0.00043533473683537863, |
| "loss": 2.9522, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.3841132611912153, |
| "grad_norm": 0.1325126439332962, |
| "learning_rate": 0.0004348207273937776, |
| "loss": 2.9603, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.3846934927035887, |
| "grad_norm": 0.13015331327915192, |
| "learning_rate": 0.0004343062215281347, |
| "loss": 2.955, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.3852737242159622, |
| "grad_norm": 0.12867479026317596, |
| "learning_rate": 0.00043379122113291465, |
| "loss": 2.9692, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.3858539557283356, |
| "grad_norm": 0.14423881471157074, |
| "learning_rate": 0.00043327572810440283, |
| "loss": 2.9539, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.38643418724070905, |
| "grad_norm": 0.13097575306892395, |
| "learning_rate": 0.00043275974434069846, |
| "loss": 2.9576, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.38701441875308246, |
| "grad_norm": 0.129910409450531, |
| "learning_rate": 0.0004322432717417079, |
| "loss": 2.9617, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.3875946502654559, |
| "grad_norm": 0.13308489322662354, |
| "learning_rate": 0.00043172631220913735, |
| "loss": 2.9514, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.38817488177782933, |
| "grad_norm": 0.12263292074203491, |
| "learning_rate": 0.00043120886764648605, |
| "loss": 2.9557, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.3887551132902028, |
| "grad_norm": 0.1288110911846161, |
| "learning_rate": 0.0004306909399590389, |
| "loss": 2.9558, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.38933534480257626, |
| "grad_norm": 0.12322728335857391, |
| "learning_rate": 0.00043017253105386005, |
| "loss": 2.9551, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.38991557631494966, |
| "grad_norm": 0.1551227867603302, |
| "learning_rate": 0.0004296536428397853, |
| "loss": 2.9583, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.3904958078273231, |
| "grad_norm": 0.12883497774600983, |
| "learning_rate": 0.00042913427722741546, |
| "loss": 2.9495, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.39107603933969654, |
| "grad_norm": 0.12460558116436005, |
| "learning_rate": 0.00042861443612910913, |
| "loss": 2.9597, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.39165627085207, |
| "grad_norm": 0.122388556599617, |
| "learning_rate": 0.00042809412145897576, |
| "loss": 2.9557, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.3922365023644434, |
| "grad_norm": 0.12150498479604721, |
| "learning_rate": 0.00042757333513286834, |
| "loss": 2.9489, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.39281673387681687, |
| "grad_norm": 0.15273340046405792, |
| "learning_rate": 0.00042705207906837666, |
| "loss": 2.9503, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.3933969653891903, |
| "grad_norm": 0.13954737782478333, |
| "learning_rate": 0.00042653035518482025, |
| "loss": 2.9481, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.39397719690156374, |
| "grad_norm": 0.15386004745960236, |
| "learning_rate": 0.0004260081654032411, |
| "loss": 2.9596, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.39455742841393715, |
| "grad_norm": 0.1319696307182312, |
| "learning_rate": 0.0004254855116463966, |
| "loss": 2.9526, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3951376599263106, |
| "grad_norm": 0.14486876130104065, |
| "learning_rate": 0.00042496239583875286, |
| "loss": 2.9501, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.395717891438684, |
| "grad_norm": 0.12461838871240616, |
| "learning_rate": 0.0004244388199064768, |
| "loss": 2.9519, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.3962981229510575, |
| "grad_norm": 0.14132647216320038, |
| "learning_rate": 0.00042391478577743006, |
| "loss": 2.9533, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.3968783544634309, |
| "grad_norm": 0.12907026708126068, |
| "learning_rate": 0.00042339029538116104, |
| "loss": 2.9451, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.39745858597580436, |
| "grad_norm": 0.13801275193691254, |
| "learning_rate": 0.0004228653506488984, |
| "loss": 2.9382, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.39803881748817777, |
| "grad_norm": 0.11962810158729553, |
| "learning_rate": 0.00042233995351354366, |
| "loss": 2.9501, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.39861904900055123, |
| "grad_norm": 0.12804014980793, |
| "learning_rate": 0.00042181410590966413, |
| "loss": 2.9556, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.39919928051292464, |
| "grad_norm": 0.1232592836022377, |
| "learning_rate": 0.0004212878097734857, |
| "loss": 2.9493, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.3997795120252981, |
| "grad_norm": 0.12467402964830399, |
| "learning_rate": 0.0004207610670428859, |
| "loss": 2.9518, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.4003597435376715, |
| "grad_norm": 0.13029509782791138, |
| "learning_rate": 0.0004202338796573866, |
| "loss": 2.9476, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.40093997505004497, |
| "grad_norm": 0.13504283130168915, |
| "learning_rate": 0.0004197062495581471, |
| "loss": 2.9457, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.4015202065624184, |
| "grad_norm": 0.12205976992845535, |
| "learning_rate": 0.00041917817868795666, |
| "loss": 2.9418, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.40210043807479184, |
| "grad_norm": 0.14173905551433563, |
| "learning_rate": 0.0004186496689912275, |
| "loss": 2.9401, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.40268066958716525, |
| "grad_norm": 0.131003275513649, |
| "learning_rate": 0.00041812072241398764, |
| "loss": 2.9416, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.4032609010995387, |
| "grad_norm": 0.1430942267179489, |
| "learning_rate": 0.00041759134090387396, |
| "loss": 2.9526, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.4038411326119122, |
| "grad_norm": 0.11908053606748581, |
| "learning_rate": 0.00041706152641012435, |
| "loss": 2.9457, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.4044213641242856, |
| "grad_norm": 0.12189971655607224, |
| "learning_rate": 0.0004165312808835716, |
| "loss": 2.9497, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.40500159563665905, |
| "grad_norm": 0.1238475888967514, |
| "learning_rate": 0.00041600060627663515, |
| "loss": 2.9426, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.40558182714903246, |
| "grad_norm": 0.13269031047821045, |
| "learning_rate": 0.00041546950454331437, |
| "loss": 2.9441, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.4061620586614059, |
| "grad_norm": 0.14216388761997223, |
| "learning_rate": 0.0004149379776391817, |
| "loss": 2.9443, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.4061620586614059, |
| "eval_loss": 2.910210609436035, |
| "eval_runtime": 3.2597, |
| "eval_samples_per_second": 1328.339, |
| "eval_steps_per_second": 2.761, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.40674229017377933, |
| "grad_norm": 0.13298869132995605, |
| "learning_rate": 0.0004144060275213747, |
| "loss": 2.946, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.4073225216861528, |
| "grad_norm": 0.14648084342479706, |
| "learning_rate": 0.00041387365614858955, |
| "loss": 2.9468, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.4079027531985262, |
| "grad_norm": 0.13918638229370117, |
| "learning_rate": 0.00041334086548107336, |
| "loss": 2.9561, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.40848298471089967, |
| "grad_norm": 0.1421622335910797, |
| "learning_rate": 0.00041280765748061727, |
| "loss": 2.9437, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.4090632162232731, |
| "grad_norm": 0.1364564597606659, |
| "learning_rate": 0.0004122740341105488, |
| "loss": 2.9354, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.40964344773564654, |
| "grad_norm": 0.1310495287179947, |
| "learning_rate": 0.00041173999733572523, |
| "loss": 2.9471, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.41022367924801995, |
| "grad_norm": 0.14024296402931213, |
| "learning_rate": 0.000411205549122526, |
| "loss": 2.9372, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.4108039107603934, |
| "grad_norm": 0.1430574357509613, |
| "learning_rate": 0.0004106706914388452, |
| "loss": 2.9468, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.4113841422727668, |
| "grad_norm": 0.12103896588087082, |
| "learning_rate": 0.00041013542625408504, |
| "loss": 2.9463, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.4119643737851403, |
| "grad_norm": 0.12720054388046265, |
| "learning_rate": 0.00040959975553914787, |
| "loss": 2.9427, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.4125446052975137, |
| "grad_norm": 0.14135150611400604, |
| "learning_rate": 0.0004090636812664295, |
| "loss": 2.9407, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.41312483680988715, |
| "grad_norm": 0.14666588604450226, |
| "learning_rate": 0.0004085272054098115, |
| "loss": 2.9435, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.41370506832226056, |
| "grad_norm": 0.13804596662521362, |
| "learning_rate": 0.0004079903299446541, |
| "loss": 2.9365, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.414285299834634, |
| "grad_norm": 0.1470736414194107, |
| "learning_rate": 0.00040745305684778907, |
| "loss": 2.9278, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.41486553134700743, |
| "grad_norm": 0.12926244735717773, |
| "learning_rate": 0.00040691538809751234, |
| "loss": 2.9354, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.4154457628593809, |
| "grad_norm": 0.1294509321451187, |
| "learning_rate": 0.00040637732567357635, |
| "loss": 2.9466, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.4160259943717543, |
| "grad_norm": 0.12196213006973267, |
| "learning_rate": 0.0004058388715571835, |
| "loss": 2.9322, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.41660622588412777, |
| "grad_norm": 0.15902066230773926, |
| "learning_rate": 0.00040530002773097825, |
| "loss": 2.9448, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.41718645739650123, |
| "grad_norm": 0.11859998106956482, |
| "learning_rate": 0.0004047607961790399, |
| "loss": 2.9428, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.41776668890887464, |
| "grad_norm": 0.13470393419265747, |
| "learning_rate": 0.00040422117888687555, |
| "loss": 2.942, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.4183469204212481, |
| "grad_norm": 0.1288190484046936, |
| "learning_rate": 0.0004036811778414125, |
| "loss": 2.9362, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.4189271519336215, |
| "grad_norm": 0.12759481370449066, |
| "learning_rate": 0.0004031407950309915, |
| "loss": 2.9447, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.419507383445995, |
| "grad_norm": 0.13468439877033234, |
| "learning_rate": 0.0004026000324453584, |
| "loss": 2.9313, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.4200876149583684, |
| "grad_norm": 0.12287794053554535, |
| "learning_rate": 0.0004020588920756577, |
| "loss": 2.9369, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.42066784647074185, |
| "grad_norm": 0.12006892263889313, |
| "learning_rate": 0.00040151737591442497, |
| "loss": 2.9329, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.42124807798311525, |
| "grad_norm": 0.13062633574008942, |
| "learning_rate": 0.00040097548595557935, |
| "loss": 2.9474, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.4218283094954887, |
| "grad_norm": 0.12141095846891403, |
| "learning_rate": 0.00040043322419441667, |
| "loss": 2.9386, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.4224085410078621, |
| "grad_norm": 0.13452979922294617, |
| "learning_rate": 0.0003998905926276014, |
| "loss": 2.9203, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.4229887725202356, |
| "grad_norm": 0.13672851026058197, |
| "learning_rate": 0.0003993475932531598, |
| "loss": 2.9353, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.423569004032609, |
| "grad_norm": 0.1266540139913559, |
| "learning_rate": 0.0003988042280704724, |
| "loss": 2.929, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.42414923554498246, |
| "grad_norm": 0.1192171648144722, |
| "learning_rate": 0.0003982604990802668, |
| "loss": 2.9314, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.42472946705735587, |
| "grad_norm": 0.11528236418962479, |
| "learning_rate": 0.0003977164082846101, |
| "loss": 2.9349, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.42530969856972933, |
| "grad_norm": 0.12837885320186615, |
| "learning_rate": 0.00039717195768690155, |
| "loss": 2.9211, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.42588993008210274, |
| "grad_norm": 0.1254536211490631, |
| "learning_rate": 0.0003966271492918654, |
| "loss": 2.9311, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.4264701615944762, |
| "grad_norm": 0.12365511804819107, |
| "learning_rate": 0.0003960819851055432, |
| "loss": 2.9411, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.4270503931068496, |
| "grad_norm": 0.14178220927715302, |
| "learning_rate": 0.00039553646713528644, |
| "loss": 2.9322, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.4276306246192231, |
| "grad_norm": 0.13220851123332977, |
| "learning_rate": 0.0003949905973897496, |
| "loss": 2.9397, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.4282108561315965, |
| "grad_norm": 0.12264362722635269, |
| "learning_rate": 0.00039444437787888224, |
| "loss": 2.9355, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.42879108764396995, |
| "grad_norm": 0.12907512485980988, |
| "learning_rate": 0.00039389781061392184, |
| "loss": 2.9259, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.42937131915634336, |
| "grad_norm": 0.1319524645805359, |
| "learning_rate": 0.00039335089760738625, |
| "loss": 2.9284, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.4299515506687168, |
| "grad_norm": 0.1404864490032196, |
| "learning_rate": 0.0003928036408730664, |
| "loss": 2.932, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.43053178218109023, |
| "grad_norm": 0.12499509751796722, |
| "learning_rate": 0.00039225604242601914, |
| "loss": 2.9313, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.4311120136934637, |
| "grad_norm": 0.13161097466945648, |
| "learning_rate": 0.0003917081042825591, |
| "loss": 2.9261, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.43169224520583716, |
| "grad_norm": 0.13262121379375458, |
| "learning_rate": 0.000391159828460252, |
| "loss": 2.9302, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.43227247671821056, |
| "grad_norm": 0.13169781863689423, |
| "learning_rate": 0.0003906112169779069, |
| "loss": 2.9247, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.432852708230584, |
| "grad_norm": 0.1297696828842163, |
| "learning_rate": 0.00039006227185556865, |
| "loss": 2.9422, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.43343293974295743, |
| "grad_norm": 0.1292199194431305, |
| "learning_rate": 0.00038951299511451077, |
| "loss": 2.9232, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.4340131712553309, |
| "grad_norm": 0.13055439293384552, |
| "learning_rate": 0.0003889633887772278, |
| "loss": 2.9246, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.4345934027677043, |
| "grad_norm": 0.1166820153594017, |
| "learning_rate": 0.0003884134548674278, |
| "loss": 2.9361, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.43517363428007777, |
| "grad_norm": 0.12382174283266068, |
| "learning_rate": 0.00038786319541002487, |
| "loss": 2.9221, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.4357538657924512, |
| "grad_norm": 0.12510880827903748, |
| "learning_rate": 0.0003873126124311323, |
| "loss": 2.9289, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.43633409730482464, |
| "grad_norm": 0.13196755945682526, |
| "learning_rate": 0.000386761707958054, |
| "loss": 2.9203, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.43691432881719805, |
| "grad_norm": 0.13719266653060913, |
| "learning_rate": 0.00038621048401927817, |
| "loss": 2.9319, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.4374945603295715, |
| "grad_norm": 0.13211804628372192, |
| "learning_rate": 0.000385658942644469, |
| "loss": 2.9326, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.4380747918419449, |
| "grad_norm": 0.12999597191810608, |
| "learning_rate": 0.0003851070858644596, |
| "loss": 2.9239, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.4386550233543184, |
| "grad_norm": 0.13165125250816345, |
| "learning_rate": 0.0003845549157112445, |
| "loss": 2.9312, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.4392352548666918, |
| "grad_norm": 0.13743376731872559, |
| "learning_rate": 0.00038400243421797206, |
| "loss": 2.9254, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.43981548637906526, |
| "grad_norm": 0.12621231377124786, |
| "learning_rate": 0.00038344964341893684, |
| "loss": 2.9203, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.44039571789143866, |
| "grad_norm": 0.12167075276374817, |
| "learning_rate": 0.00038289654534957266, |
| "loss": 2.9281, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.44097594940381213, |
| "grad_norm": 0.13523493707180023, |
| "learning_rate": 0.0003823431420464444, |
| "loss": 2.916, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.44155618091618554, |
| "grad_norm": 0.11718156933784485, |
| "learning_rate": 0.0003817894355472413, |
| "loss": 2.9145, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.442136412428559, |
| "grad_norm": 0.13470205664634705, |
| "learning_rate": 0.0003812354278907683, |
| "loss": 2.9173, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.4427166439409324, |
| "grad_norm": 0.1286102533340454, |
| "learning_rate": 0.00038068112111693984, |
| "loss": 2.9249, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.44329687545330587, |
| "grad_norm": 0.13669750094413757, |
| "learning_rate": 0.00038012651726677146, |
| "loss": 2.9239, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.4438771069656793, |
| "grad_norm": 0.14638318121433258, |
| "learning_rate": 0.0003795716183823728, |
| "loss": 2.9306, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.44445733847805274, |
| "grad_norm": 0.13569045066833496, |
| "learning_rate": 0.00037901642650693944, |
| "loss": 2.9168, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.4450375699904262, |
| "grad_norm": 0.1257532387971878, |
| "learning_rate": 0.00037846094368474613, |
| "loss": 2.9242, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.4456178015027996, |
| "grad_norm": 0.11852803826332092, |
| "learning_rate": 0.0003779051719611389, |
| "loss": 2.9209, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.4461980330151731, |
| "grad_norm": 0.12594154477119446, |
| "learning_rate": 0.0003773491133825273, |
| "loss": 2.929, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.4467782645275465, |
| "grad_norm": 0.12566526234149933, |
| "learning_rate": 0.00037679276999637746, |
| "loss": 2.9119, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.44735849603991995, |
| "grad_norm": 0.13207079470157623, |
| "learning_rate": 0.0003762361438512038, |
| "loss": 2.917, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.44793872755229336, |
| "grad_norm": 0.13788865506649017, |
| "learning_rate": 0.00037567923699656226, |
| "loss": 2.92, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.4485189590646668, |
| "grad_norm": 0.13110986351966858, |
| "learning_rate": 0.00037512205148304204, |
| "loss": 2.9249, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.44909919057704023, |
| "grad_norm": 0.1643168181180954, |
| "learning_rate": 0.00037456458936225873, |
| "loss": 2.9232, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.4496794220894137, |
| "grad_norm": 0.14076946675777435, |
| "learning_rate": 0.00037400685268684623, |
| "loss": 2.9252, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.4502596536017871, |
| "grad_norm": 0.1238834485411644, |
| "learning_rate": 0.0003734488435104494, |
| "loss": 2.9093, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.45083988511416057, |
| "grad_norm": 0.11924099922180176, |
| "learning_rate": 0.00037289056388771643, |
| "loss": 2.9324, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.451420116626534, |
| "grad_norm": 0.13720078766345978, |
| "learning_rate": 0.0003723320158742914, |
| "loss": 2.9154, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.45200034813890744, |
| "grad_norm": 0.12532520294189453, |
| "learning_rate": 0.00037177320152680663, |
| "loss": 2.9228, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.45258057965128085, |
| "grad_norm": 0.129350483417511, |
| "learning_rate": 0.0003712141229028751, |
| "loss": 2.9071, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.4531608111636543, |
| "grad_norm": 0.12484076619148254, |
| "learning_rate": 0.0003706547820610828, |
| "loss": 2.9107, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.4537410426760277, |
| "grad_norm": 0.12527912855148315, |
| "learning_rate": 0.0003700951810609815, |
| "loss": 2.9166, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.4543212741884012, |
| "grad_norm": 0.1453130692243576, |
| "learning_rate": 0.0003695353219630803, |
| "loss": 2.9195, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.4549015057007746, |
| "grad_norm": 0.1291913241147995, |
| "learning_rate": 0.0003689752068288395, |
| "loss": 2.9124, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.45548173721314805, |
| "grad_norm": 0.12470022588968277, |
| "learning_rate": 0.0003684148377206615, |
| "loss": 2.9241, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.45606196872552146, |
| "grad_norm": 0.1276790350675583, |
| "learning_rate": 0.00036785421670188395, |
| "loss": 2.9178, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.4566422002378949, |
| "grad_norm": 0.15164950489997864, |
| "learning_rate": 0.0003672933458367724, |
| "loss": 2.9072, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.45722243175026833, |
| "grad_norm": 0.14891022443771362, |
| "learning_rate": 0.00036673222719051194, |
| "loss": 2.9235, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.4578026632626418, |
| "grad_norm": 0.1266569346189499, |
| "learning_rate": 0.0003661708628292003, |
| "loss": 2.9159, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.4583828947750152, |
| "grad_norm": 0.12030439078807831, |
| "learning_rate": 0.0003656092548198399, |
| "loss": 2.912, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.45896312628738867, |
| "grad_norm": 0.12590278685092926, |
| "learning_rate": 0.00036504740523033016, |
| "loss": 2.91, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.45954335779976213, |
| "grad_norm": 0.1255042403936386, |
| "learning_rate": 0.0003644853161294601, |
| "loss": 2.9127, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.46012358931213554, |
| "grad_norm": 0.1253713071346283, |
| "learning_rate": 0.0003639229895869009, |
| "loss": 2.9242, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.460703820824509, |
| "grad_norm": 0.1254982203245163, |
| "learning_rate": 0.0003633604276731975, |
| "loss": 2.9115, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.4612840523368824, |
| "grad_norm": 0.12157725542783737, |
| "learning_rate": 0.00036279763245976207, |
| "loss": 2.9114, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.4618642838492559, |
| "grad_norm": 0.12421195954084396, |
| "learning_rate": 0.00036223460601886537, |
| "loss": 2.9083, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.4624445153616293, |
| "grad_norm": 0.11870937049388885, |
| "learning_rate": 0.00036167135042362977, |
| "loss": 2.907, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.46302474687400275, |
| "grad_norm": 0.12460967898368835, |
| "learning_rate": 0.00036110786774802133, |
| "loss": 2.9088, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.46360497838637615, |
| "grad_norm": 0.1310334950685501, |
| "learning_rate": 0.00036054416006684245, |
| "loss": 2.9102, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.4641852098987496, |
| "grad_norm": 0.12560488283634186, |
| "learning_rate": 0.00035998022945572366, |
| "loss": 2.9097, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.4641852098987496, |
| "eval_loss": 2.875955820083618, |
| "eval_runtime": 3.2545, |
| "eval_samples_per_second": 1330.484, |
| "eval_steps_per_second": 2.765, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.464765441411123, |
| "grad_norm": 0.12761953473091125, |
| "learning_rate": 0.00035941607799111675, |
| "loss": 2.91, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.4653456729234965, |
| "grad_norm": 0.1247384324669838, |
| "learning_rate": 0.0003588517077502864, |
| "loss": 2.9149, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.4659259044358699, |
| "grad_norm": 0.14209751784801483, |
| "learning_rate": 0.00035828712081130296, |
| "loss": 2.9083, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.46650613594824336, |
| "grad_norm": 0.12985317409038544, |
| "learning_rate": 0.00035772231925303464, |
| "loss": 2.9046, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.46708636746061677, |
| "grad_norm": 0.14672869443893433, |
| "learning_rate": 0.00035715730515514, |
| "loss": 2.9113, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.46766659897299023, |
| "grad_norm": 0.13361111283302307, |
| "learning_rate": 0.0003565920805980602, |
| "loss": 2.913, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.46824683048536364, |
| "grad_norm": 0.12082985788583755, |
| "learning_rate": 0.0003560266476630112, |
| "loss": 2.9138, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.4688270619977371, |
| "grad_norm": 0.1150035560131073, |
| "learning_rate": 0.0003554610084319763, |
| "loss": 2.9048, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.4694072935101105, |
| "grad_norm": 0.1214471235871315, |
| "learning_rate": 0.0003548951649876984, |
| "loss": 2.9123, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.469987525022484, |
| "grad_norm": 0.12934035062789917, |
| "learning_rate": 0.0003543291194136723, |
| "loss": 2.9028, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.4705677565348574, |
| "grad_norm": 0.15276013314723969, |
| "learning_rate": 0.00035376287379413723, |
| "loss": 2.9031, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.47114798804723085, |
| "grad_norm": 0.1335725337266922, |
| "learning_rate": 0.00035319643021406886, |
| "loss": 2.9124, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.47172821955960426, |
| "grad_norm": 0.12289181351661682, |
| "learning_rate": 0.00035262979075917166, |
| "loss": 2.9053, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.4723084510719777, |
| "grad_norm": 0.11827896535396576, |
| "learning_rate": 0.0003520629575158715, |
| "loss": 2.9138, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.4728886825843512, |
| "grad_norm": 0.12505313754081726, |
| "learning_rate": 0.0003514959325713078, |
| "loss": 2.909, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.4734689140967246, |
| "grad_norm": 0.1321611851453781, |
| "learning_rate": 0.00035092871801332574, |
| "loss": 2.9075, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.47404914560909805, |
| "grad_norm": 0.12144722044467926, |
| "learning_rate": 0.00035036131593046895, |
| "loss": 2.9046, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.47462937712147146, |
| "grad_norm": 0.11893021315336227, |
| "learning_rate": 0.0003497937284119711, |
| "loss": 2.9021, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.4752096086338449, |
| "grad_norm": 0.13043691217899323, |
| "learning_rate": 0.0003492259575477491, |
| "loss": 2.9052, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.47578984014621833, |
| "grad_norm": 0.12443230301141739, |
| "learning_rate": 0.00034865800542839445, |
| "loss": 2.9003, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.4763700716585918, |
| "grad_norm": 0.1350659728050232, |
| "learning_rate": 0.0003480898741451667, |
| "loss": 2.9077, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.4769503031709652, |
| "grad_norm": 0.13212652504444122, |
| "learning_rate": 0.0003475215657899844, |
| "loss": 2.8955, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.47753053468333867, |
| "grad_norm": 0.13865076005458832, |
| "learning_rate": 0.0003469530824554188, |
| "loss": 2.9015, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.4781107661957121, |
| "grad_norm": 0.1313691884279251, |
| "learning_rate": 0.00034638442623468484, |
| "loss": 2.9014, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.47869099770808554, |
| "grad_norm": 0.13368923962116241, |
| "learning_rate": 0.00034581559922163447, |
| "loss": 2.8962, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.47927122922045895, |
| "grad_norm": 0.12228936702013016, |
| "learning_rate": 0.0003452466035107481, |
| "loss": 2.8997, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.4798514607328324, |
| "grad_norm": 0.12648892402648926, |
| "learning_rate": 0.00034467744119712787, |
| "loss": 2.9052, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.4804316922452058, |
| "grad_norm": 0.12937045097351074, |
| "learning_rate": 0.00034410811437648873, |
| "loss": 2.9037, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.4810119237575793, |
| "grad_norm": 0.12095940858125687, |
| "learning_rate": 0.00034353862514515185, |
| "loss": 2.9002, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.4815921552699527, |
| "grad_norm": 0.11992644518613815, |
| "learning_rate": 0.0003429689756000362, |
| "loss": 2.9051, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.48217238678232616, |
| "grad_norm": 0.1110587939620018, |
| "learning_rate": 0.0003423991678386511, |
| "loss": 2.9046, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.48275261829469956, |
| "grad_norm": 0.11831989139318466, |
| "learning_rate": 0.00034182920395908837, |
| "loss": 2.9001, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.48333284980707303, |
| "grad_norm": 0.11492130905389786, |
| "learning_rate": 0.0003412590860600148, |
| "loss": 2.8944, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.48391308131944644, |
| "grad_norm": 0.12855441868305206, |
| "learning_rate": 0.00034068881624066405, |
| "loss": 2.8941, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.4844933128318199, |
| "grad_norm": 0.12829254567623138, |
| "learning_rate": 0.0003401183966008296, |
| "loss": 2.8989, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.4850735443441933, |
| "grad_norm": 0.1167573556303978, |
| "learning_rate": 0.00033954782924085604, |
| "loss": 2.9027, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.48565377585656677, |
| "grad_norm": 0.12906575202941895, |
| "learning_rate": 0.0003389771162616324, |
| "loss": 2.893, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.4862340073689402, |
| "grad_norm": 0.12219451367855072, |
| "learning_rate": 0.00033840625976458357, |
| "loss": 2.8971, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.48681423888131364, |
| "grad_norm": 0.1430503875017166, |
| "learning_rate": 0.00033783526185166295, |
| "loss": 2.8945, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.4873944703936871, |
| "grad_norm": 0.1279267519712448, |
| "learning_rate": 0.00033726412462534454, |
| "loss": 2.8969, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.4879747019060605, |
| "grad_norm": 0.1239406168460846, |
| "learning_rate": 0.00033669285018861567, |
| "loss": 2.8994, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.488554933418434, |
| "grad_norm": 0.1379164159297943, |
| "learning_rate": 0.00033612144064496853, |
| "loss": 2.8949, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.4891351649308074, |
| "grad_norm": 0.12819483876228333, |
| "learning_rate": 0.00033554989809839294, |
| "loss": 2.897, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.48971539644318085, |
| "grad_norm": 0.12451434880495071, |
| "learning_rate": 0.00033497822465336854, |
| "loss": 2.903, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.49029562795555426, |
| "grad_norm": 0.1466275155544281, |
| "learning_rate": 0.0003344064224148567, |
| "loss": 2.8912, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.4908758594679277, |
| "grad_norm": 0.12186205387115479, |
| "learning_rate": 0.0003338344934882932, |
| "loss": 2.8998, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.49145609098030113, |
| "grad_norm": 0.12687867879867554, |
| "learning_rate": 0.00033326243997958014, |
| "loss": 2.8983, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.4920363224926746, |
| "grad_norm": 0.12620693445205688, |
| "learning_rate": 0.00033269026399507874, |
| "loss": 2.895, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.492616554005048, |
| "grad_norm": 0.1362224668264389, |
| "learning_rate": 0.00033211796764160074, |
| "loss": 2.9007, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.49319678551742147, |
| "grad_norm": 0.1300470530986786, |
| "learning_rate": 0.00033154555302640135, |
| "loss": 2.8914, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.4937770170297949, |
| "grad_norm": 0.12057654559612274, |
| "learning_rate": 0.00033097302225717096, |
| "loss": 2.8971, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.49435724854216834, |
| "grad_norm": 0.13263335824012756, |
| "learning_rate": 0.00033040037744202805, |
| "loss": 2.8971, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.49493748005454175, |
| "grad_norm": 0.12660051882266998, |
| "learning_rate": 0.00032982762068951073, |
| "loss": 2.8914, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.4955177115669152, |
| "grad_norm": 0.12398383021354675, |
| "learning_rate": 0.0003292547541085694, |
| "loss": 2.8936, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.4960979430792886, |
| "grad_norm": 0.1229000836610794, |
| "learning_rate": 0.00032868177980855876, |
| "loss": 2.888, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.4966781745916621, |
| "grad_norm": 0.11801040917634964, |
| "learning_rate": 0.0003281086998992303, |
| "loss": 2.8909, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.4972584061040355, |
| "grad_norm": 0.12945981323719025, |
| "learning_rate": 0.0003275355164907241, |
| "loss": 2.8878, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.49783863761640895, |
| "grad_norm": 0.12002068758010864, |
| "learning_rate": 0.0003269622316935618, |
| "loss": 2.892, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.49841886912878236, |
| "grad_norm": 0.12449994683265686, |
| "learning_rate": 0.0003263888476186377, |
| "loss": 2.8912, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.4989991006411558, |
| "grad_norm": 0.13638156652450562, |
| "learning_rate": 0.0003258153663772124, |
| "loss": 2.8877, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.49957933215352923, |
| "grad_norm": 0.12280316650867462, |
| "learning_rate": 0.0003252417900809038, |
| "loss": 2.8879, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.5001595636659026, |
| "grad_norm": 0.12275322526693344, |
| "learning_rate": 0.0003246681208416797, |
| "loss": 2.8906, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.5007397951782762, |
| "grad_norm": 0.1220172718167305, |
| "learning_rate": 0.0003240943607718506, |
| "loss": 2.8952, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.5013200266906496, |
| "grad_norm": 0.11458177119493484, |
| "learning_rate": 0.00032352051198406104, |
| "loss": 2.902, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.501900258203023, |
| "grad_norm": 0.12652765214443207, |
| "learning_rate": 0.0003229465765912824, |
| "loss": 2.9038, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.5024804897153965, |
| "grad_norm": 0.12456042319536209, |
| "learning_rate": 0.000322372556706805, |
| "loss": 2.8844, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.5030607212277699, |
| "grad_norm": 0.13799023628234863, |
| "learning_rate": 0.0003217984544442301, |
| "loss": 2.8987, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.5036409527401433, |
| "grad_norm": 0.12474406510591507, |
| "learning_rate": 0.00032122427191746234, |
| "loss": 2.8976, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.5042211842525167, |
| "grad_norm": 0.12724703550338745, |
| "learning_rate": 0.00032065001124070207, |
| "loss": 2.8862, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.5048014157648902, |
| "grad_norm": 0.11946358531713486, |
| "learning_rate": 0.0003200756745284371, |
| "loss": 2.8926, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.5053816472772636, |
| "grad_norm": 0.1258503645658493, |
| "learning_rate": 0.0003195012638954354, |
| "loss": 2.8932, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.505961878789637, |
| "grad_norm": 0.12079302221536636, |
| "learning_rate": 0.00031892678145673724, |
| "loss": 2.8914, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.5065421103020105, |
| "grad_norm": 0.12168605625629425, |
| "learning_rate": 0.000318352229327647, |
| "loss": 2.8867, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.507122341814384, |
| "grad_norm": 0.13427579402923584, |
| "learning_rate": 0.00031777760962372584, |
| "loss": 2.8893, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.5077025733267574, |
| "grad_norm": 0.1176985576748848, |
| "learning_rate": 0.00031720292446078374, |
| "loss": 2.8887, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.5082828048391308, |
| "grad_norm": 0.12351604551076889, |
| "learning_rate": 0.00031662817595487166, |
| "loss": 2.8915, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.5088630363515042, |
| "grad_norm": 0.1390778124332428, |
| "learning_rate": 0.00031605336622227365, |
| "loss": 2.8737, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.5094432678638777, |
| "grad_norm": 0.11954103410243988, |
| "learning_rate": 0.00031547849737949957, |
| "loss": 2.8888, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.5100234993762511, |
| "grad_norm": 0.12293373793363571, |
| "learning_rate": 0.00031490357154327674, |
| "loss": 2.8814, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.5106037308886245, |
| "grad_norm": 0.12284509837627411, |
| "learning_rate": 0.0003143285908305422, |
| "loss": 2.8874, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.511183962400998, |
| "grad_norm": 0.11924895644187927, |
| "learning_rate": 0.00031375355735843523, |
| "loss": 2.8813, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.5117641939133715, |
| "grad_norm": 0.12003005295991898, |
| "learning_rate": 0.00031317847324428924, |
| "loss": 2.8836, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.5123444254257449, |
| "grad_norm": 0.13070861995220184, |
| "learning_rate": 0.00031260334060562416, |
| "loss": 2.8851, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.5129246569381183, |
| "grad_norm": 0.11900255084037781, |
| "learning_rate": 0.0003120281615601387, |
| "loss": 2.8827, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.5135048884504917, |
| "grad_norm": 0.12470702081918716, |
| "learning_rate": 0.0003114529382257024, |
| "loss": 2.8916, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.5140851199628652, |
| "grad_norm": 0.1312616765499115, |
| "learning_rate": 0.0003108776727203478, |
| "loss": 2.897, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.5146653514752386, |
| "grad_norm": 0.13872870802879333, |
| "learning_rate": 0.00031030236716226265, |
| "loss": 2.8836, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.515245582987612, |
| "grad_norm": 0.11608674377202988, |
| "learning_rate": 0.00030972702366978237, |
| "loss": 2.8875, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.5158258144999855, |
| "grad_norm": 0.12205769121646881, |
| "learning_rate": 0.000309151644361382, |
| "loss": 2.8862, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.516406046012359, |
| "grad_norm": 0.12009671330451965, |
| "learning_rate": 0.0003085762313556683, |
| "loss": 2.8797, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.5169862775247324, |
| "grad_norm": 0.12120591104030609, |
| "learning_rate": 0.0003080007867713724, |
| "loss": 2.8905, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.5175665090371058, |
| "grad_norm": 0.12842518091201782, |
| "learning_rate": 0.00030742531272734153, |
| "loss": 2.8747, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.5181467405494793, |
| "grad_norm": 0.12532438337802887, |
| "learning_rate": 0.00030684981134253123, |
| "loss": 2.8892, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.5187269720618527, |
| "grad_norm": 0.1295221596956253, |
| "learning_rate": 0.0003062742847359981, |
| "loss": 2.8842, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.5193072035742261, |
| "grad_norm": 0.1296953707933426, |
| "learning_rate": 0.00030569873502689116, |
| "loss": 2.878, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.5198874350865995, |
| "grad_norm": 0.14120282232761383, |
| "learning_rate": 0.00030512316433444495, |
| "loss": 2.8809, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.520467666598973, |
| "grad_norm": 0.12610268592834473, |
| "learning_rate": 0.000304547574777971, |
| "loss": 2.8794, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.5210478981113464, |
| "grad_norm": 0.11908390372991562, |
| "learning_rate": 0.0003039719684768503, |
| "loss": 2.8839, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.5216281296237198, |
| "grad_norm": 0.13508306443691254, |
| "learning_rate": 0.0003033963475505256, |
| "loss": 2.8782, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.5222083611360933, |
| "grad_norm": 0.12108524888753891, |
| "learning_rate": 0.00030282071411849343, |
| "loss": 2.879, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5222083611360933, |
| "eval_loss": 2.845144271850586, |
| "eval_runtime": 3.2553, |
| "eval_samples_per_second": 1330.14, |
| "eval_steps_per_second": 2.765, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.5227885926484668, |
| "grad_norm": 0.13046176731586456, |
| "learning_rate": 0.00030224507030029627, |
| "loss": 2.8809, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.5233688241608402, |
| "grad_norm": 0.12113803625106812, |
| "learning_rate": 0.0003016694182155152, |
| "loss": 2.8839, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.5239490556732136, |
| "grad_norm": 0.12337899953126907, |
| "learning_rate": 0.0003010937599837613, |
| "loss": 2.8821, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.524529287185587, |
| "grad_norm": 0.11981160938739777, |
| "learning_rate": 0.0003005180977246686, |
| "loss": 2.888, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.5251095186979605, |
| "grad_norm": 0.12357629835605621, |
| "learning_rate": 0.0002999424335578858, |
| "loss": 2.8804, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.5256897502103339, |
| "grad_norm": 0.11688230186700821, |
| "learning_rate": 0.00029936676960306863, |
| "loss": 2.8891, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.5262699817227073, |
| "grad_norm": 0.11743608117103577, |
| "learning_rate": 0.0002987911079798723, |
| "loss": 2.8685, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.5268502132350807, |
| "grad_norm": 0.1338096410036087, |
| "learning_rate": 0.0002982154508079428, |
| "loss": 2.8758, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.5274304447474543, |
| "grad_norm": 0.13182982802391052, |
| "learning_rate": 0.0002976398002069105, |
| "loss": 2.882, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.5280106762598277, |
| "grad_norm": 0.12470164895057678, |
| "learning_rate": 0.000297064158296381, |
| "loss": 2.8817, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.5285909077722011, |
| "grad_norm": 0.11741513013839722, |
| "learning_rate": 0.0002964885271959282, |
| "loss": 2.8768, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.5291711392845746, |
| "grad_norm": 0.1364392340183258, |
| "learning_rate": 0.0002959129090250863, |
| "loss": 2.8822, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.529751370796948, |
| "grad_norm": 0.12005024403333664, |
| "learning_rate": 0.0002953373059033413, |
| "loss": 2.8789, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.5303316023093214, |
| "grad_norm": 0.1239180713891983, |
| "learning_rate": 0.0002947617199501245, |
| "loss": 2.8754, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.5309118338216948, |
| "grad_norm": 0.12774530053138733, |
| "learning_rate": 0.00029418615328480357, |
| "loss": 2.8773, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.5314920653340683, |
| "grad_norm": 0.11815381795167923, |
| "learning_rate": 0.00029361060802667526, |
| "loss": 2.8711, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.5320722968464418, |
| "grad_norm": 0.12450312077999115, |
| "learning_rate": 0.0002930350862949577, |
| "loss": 2.8743, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.5326525283588152, |
| "grad_norm": 0.12741632759571075, |
| "learning_rate": 0.00029245959020878187, |
| "loss": 2.8846, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.5332327598711886, |
| "grad_norm": 0.12712997198104858, |
| "learning_rate": 0.0002918841218871848, |
| "loss": 2.8774, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.5338129913835621, |
| "grad_norm": 0.11238303780555725, |
| "learning_rate": 0.0002913086834491012, |
| "loss": 2.8782, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.5343932228959355, |
| "grad_norm": 0.1266774982213974, |
| "learning_rate": 0.00029073327701335566, |
| "loss": 2.883, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.5349734544083089, |
| "grad_norm": 0.12266207486391068, |
| "learning_rate": 0.00029015790469865484, |
| "loss": 2.8735, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.5355536859206823, |
| "grad_norm": 0.10979332774877548, |
| "learning_rate": 0.0002895825686235799, |
| "loss": 2.8791, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.5361339174330558, |
| "grad_norm": 0.11939531564712524, |
| "learning_rate": 0.0002890072709065787, |
| "loss": 2.8745, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.5367141489454292, |
| "grad_norm": 0.12080537527799606, |
| "learning_rate": 0.0002884320136659575, |
| "loss": 2.8775, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.5372943804578026, |
| "grad_norm": 0.12394317239522934, |
| "learning_rate": 0.00028785679901987394, |
| "loss": 2.8734, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.537874611970176, |
| "grad_norm": 0.12320924550294876, |
| "learning_rate": 0.0002872816290863283, |
| "loss": 2.8703, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.5384548434825496, |
| "grad_norm": 0.12183520197868347, |
| "learning_rate": 0.0002867065059831568, |
| "loss": 2.8731, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.539035074994923, |
| "grad_norm": 0.13638751208782196, |
| "learning_rate": 0.0002861314318280229, |
| "loss": 2.8725, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.5396153065072964, |
| "grad_norm": 0.12684093415737152, |
| "learning_rate": 0.0002855564087384098, |
| "loss": 2.8714, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.5401955380196698, |
| "grad_norm": 0.11322664469480515, |
| "learning_rate": 0.00028498143883161277, |
| "loss": 2.8693, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.5407757695320433, |
| "grad_norm": 0.11759771406650543, |
| "learning_rate": 0.00028440652422473124, |
| "loss": 2.8679, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.5413560010444167, |
| "grad_norm": 0.12511123716831207, |
| "learning_rate": 0.0002838316670346612, |
| "loss": 2.8744, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.5419362325567901, |
| "grad_norm": 0.1160508468747139, |
| "learning_rate": 0.00028325686937808673, |
| "loss": 2.874, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.5425164640691637, |
| "grad_norm": 0.11813979595899582, |
| "learning_rate": 0.0002826821333714732, |
| "loss": 2.8691, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.5430966955815371, |
| "grad_norm": 0.11728700250387192, |
| "learning_rate": 0.0002821074611310588, |
| "loss": 2.8717, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.5436769270939105, |
| "grad_norm": 0.12824493646621704, |
| "learning_rate": 0.0002815328547728469, |
| "loss": 2.875, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.5442571586062839, |
| "grad_norm": 0.12653270363807678, |
| "learning_rate": 0.0002809583164125983, |
| "loss": 2.8682, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.5448373901186574, |
| "grad_norm": 0.13113363087177277, |
| "learning_rate": 0.00028038384816582337, |
| "loss": 2.8583, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.5454176216310308, |
| "grad_norm": 0.11145169287919998, |
| "learning_rate": 0.0002798094521477744, |
| "loss": 2.8714, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.5459978531434042, |
| "grad_norm": 0.12025914341211319, |
| "learning_rate": 0.0002792351304734378, |
| "loss": 2.8689, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.5465780846557776, |
| "grad_norm": 0.1347450315952301, |
| "learning_rate": 0.000278660885257526, |
| "loss": 2.8803, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.5471583161681511, |
| "grad_norm": 0.11728854477405548, |
| "learning_rate": 0.0002780867186144703, |
| "loss": 2.8614, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.5477385476805245, |
| "grad_norm": 0.1399793028831482, |
| "learning_rate": 0.00027751263265841204, |
| "loss": 2.8777, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.548318779192898, |
| "grad_norm": 0.13229645788669586, |
| "learning_rate": 0.0002769386295031961, |
| "loss": 2.8723, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.5488990107052714, |
| "grad_norm": 0.12199070304632187, |
| "learning_rate": 0.00027636471126236213, |
| "loss": 2.8577, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.5494792422176449, |
| "grad_norm": 0.14131730794906616, |
| "learning_rate": 0.0002757908800491373, |
| "loss": 2.857, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.5500594737300183, |
| "grad_norm": 0.1343252956867218, |
| "learning_rate": 0.0002752171379764283, |
| "loss": 2.8689, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.5506397052423917, |
| "grad_norm": 0.1338685154914856, |
| "learning_rate": 0.0002746434871568133, |
| "loss": 2.8775, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.5512199367547651, |
| "grad_norm": 0.12388128787279129, |
| "learning_rate": 0.00027406992970253506, |
| "loss": 2.8761, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.5518001682671386, |
| "grad_norm": 0.12272147834300995, |
| "learning_rate": 0.0002734964677254918, |
| "loss": 2.8722, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.552380399779512, |
| "grad_norm": 0.12000911682844162, |
| "learning_rate": 0.00027292310333723086, |
| "loss": 2.8743, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.5529606312918854, |
| "grad_norm": 0.13635672628879547, |
| "learning_rate": 0.00027234983864894, |
| "loss": 2.8657, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.5535408628042588, |
| "grad_norm": 0.12129581719636917, |
| "learning_rate": 0.0002717766757714398, |
| "loss": 2.8661, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.5541210943166324, |
| "grad_norm": 0.11717355996370316, |
| "learning_rate": 0.00027120361681517606, |
| "loss": 2.8707, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.5547013258290058, |
| "grad_norm": 0.12199341505765915, |
| "learning_rate": 0.0002706306638902117, |
| "loss": 2.8555, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.5552815573413792, |
| "grad_norm": 0.1175154522061348, |
| "learning_rate": 0.0002700578191062196, |
| "loss": 2.8721, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.5558617888537526, |
| "grad_norm": 0.12546683847904205, |
| "learning_rate": 0.00026948508457247416, |
| "loss": 2.8689, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.5564420203661261, |
| "grad_norm": 0.11439734697341919, |
| "learning_rate": 0.000268912462397844, |
| "loss": 2.8552, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.5570222518784995, |
| "grad_norm": 0.13139833509922028, |
| "learning_rate": 0.00026833995469078404, |
| "loss": 2.8728, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.5576024833908729, |
| "grad_norm": 0.14722158014774323, |
| "learning_rate": 0.00026776756355932743, |
| "loss": 2.8594, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.5581827149032464, |
| "grad_norm": 0.12206868082284927, |
| "learning_rate": 0.00026719529111107846, |
| "loss": 2.8713, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.5587629464156199, |
| "grad_norm": 0.11777371913194656, |
| "learning_rate": 0.00026662313945320404, |
| "loss": 2.8656, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.5593431779279933, |
| "grad_norm": 0.12058188021183014, |
| "learning_rate": 0.00026605111069242664, |
| "loss": 2.8712, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.5599234094403667, |
| "grad_norm": 0.1278459131717682, |
| "learning_rate": 0.00026547920693501616, |
| "loss": 2.8686, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.5605036409527402, |
| "grad_norm": 0.12272592633962631, |
| "learning_rate": 0.00026490743028678194, |
| "loss": 2.8636, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.5610838724651136, |
| "grad_norm": 0.11543965339660645, |
| "learning_rate": 0.00026433578285306567, |
| "loss": 2.8592, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.561664103977487, |
| "grad_norm": 0.11765621602535248, |
| "learning_rate": 0.0002637642667387329, |
| "loss": 2.867, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.5622443354898604, |
| "grad_norm": 0.12996822595596313, |
| "learning_rate": 0.0002631928840481662, |
| "loss": 2.8669, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.5628245670022339, |
| "grad_norm": 0.11992313712835312, |
| "learning_rate": 0.00026262163688525606, |
| "loss": 2.8576, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.5634047985146073, |
| "grad_norm": 0.1216612309217453, |
| "learning_rate": 0.00026205052735339457, |
| "loss": 2.8656, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.5639850300269807, |
| "grad_norm": 0.11923664063215256, |
| "learning_rate": 0.00026147955755546686, |
| "loss": 2.8625, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.5645652615393542, |
| "grad_norm": 0.1174679845571518, |
| "learning_rate": 0.00026090872959384353, |
| "loss": 2.8589, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.5651454930517277, |
| "grad_norm": 0.12439408898353577, |
| "learning_rate": 0.00026033804557037304, |
| "loss": 2.8573, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.5657257245641011, |
| "grad_norm": 0.12268688529729843, |
| "learning_rate": 0.0002597675075863735, |
| "loss": 2.8612, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.5663059560764745, |
| "grad_norm": 0.11994469910860062, |
| "learning_rate": 0.0002591971177426256, |
| "loss": 2.8667, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.5668861875888479, |
| "grad_norm": 0.12739793956279755, |
| "learning_rate": 0.0002586268781393648, |
| "loss": 2.8657, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.5674664191012214, |
| "grad_norm": 0.12942016124725342, |
| "learning_rate": 0.00025805679087627267, |
| "loss": 2.863, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.5680466506135948, |
| "grad_norm": 0.12867708504199982, |
| "learning_rate": 0.00025748685805247046, |
| "loss": 2.8596, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.5686268821259682, |
| "grad_norm": 0.1384700983762741, |
| "learning_rate": 0.00025691708176651034, |
| "loss": 2.8612, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.5692071136383416, |
| "grad_norm": 0.11695626378059387, |
| "learning_rate": 0.0002563474641163686, |
| "loss": 2.8613, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.5697873451507152, |
| "grad_norm": 0.12379258126020432, |
| "learning_rate": 0.0002557780071994367, |
| "loss": 2.8637, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.5703675766630886, |
| "grad_norm": 0.13220758736133575, |
| "learning_rate": 0.00025520871311251493, |
| "loss": 2.8572, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.570947808175462, |
| "grad_norm": 0.12004509568214417, |
| "learning_rate": 0.00025463958395180377, |
| "loss": 2.8614, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.5715280396878355, |
| "grad_norm": 0.12457242608070374, |
| "learning_rate": 0.0002540706218128962, |
| "loss": 2.8606, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.5721082712002089, |
| "grad_norm": 0.125260129570961, |
| "learning_rate": 0.0002535018287907707, |
| "loss": 2.8606, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.5726885027125823, |
| "grad_norm": 0.11718660593032837, |
| "learning_rate": 0.00025293320697978254, |
| "loss": 2.86, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.5732687342249557, |
| "grad_norm": 0.1096329316496849, |
| "learning_rate": 0.0002523647584736568, |
| "loss": 2.8743, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.5738489657373292, |
| "grad_norm": 0.11327598243951797, |
| "learning_rate": 0.0002517964853654806, |
| "loss": 2.8492, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.5744291972497026, |
| "grad_norm": 0.1237105280160904, |
| "learning_rate": 0.0002512283897476949, |
| "loss": 2.852, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.5750094287620761, |
| "grad_norm": 0.11739984154701233, |
| "learning_rate": 0.0002506604737120874, |
| "loss": 2.8535, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.5755896602744495, |
| "grad_norm": 0.12682320177555084, |
| "learning_rate": 0.00025009273934978424, |
| "loss": 2.8575, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.576169891786823, |
| "grad_norm": 0.12347414344549179, |
| "learning_rate": 0.00024952518875124305, |
| "loss": 2.8596, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.5767501232991964, |
| "grad_norm": 0.11207421123981476, |
| "learning_rate": 0.0002489578240062444, |
| "loss": 2.8563, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.5773303548115698, |
| "grad_norm": 0.12151192873716354, |
| "learning_rate": 0.0002483906472038848, |
| "loss": 2.8513, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.5779105863239432, |
| "grad_norm": 0.11661417037248611, |
| "learning_rate": 0.00024782366043256876, |
| "loss": 2.8538, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.5784908178363167, |
| "grad_norm": 0.11908597499132156, |
| "learning_rate": 0.0002472568657800007, |
| "loss": 2.8549, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.5790710493486901, |
| "grad_norm": 0.12369140982627869, |
| "learning_rate": 0.00024669026533317816, |
| "loss": 2.859, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.5796512808610635, |
| "grad_norm": 0.12169597297906876, |
| "learning_rate": 0.0002461238611783832, |
| "loss": 2.8516, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.580231512373437, |
| "grad_norm": 0.1137092188000679, |
| "learning_rate": 0.0002455576554011753, |
| "loss": 2.8506, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.580231512373437, |
| "eval_loss": 2.8198139667510986, |
| "eval_runtime": 3.2544, |
| "eval_samples_per_second": 1330.504, |
| "eval_steps_per_second": 2.765, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.5808117438858105, |
| "grad_norm": 0.11945224553346634, |
| "learning_rate": 0.00024499165008638355, |
| "loss": 2.8527, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.5813919753981839, |
| "grad_norm": 0.12194681167602539, |
| "learning_rate": 0.0002444258473180986, |
| "loss": 2.8676, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.5819722069105573, |
| "grad_norm": 0.12587039172649384, |
| "learning_rate": 0.00024386024917966563, |
| "loss": 2.8468, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.5825524384229307, |
| "grad_norm": 0.12192162871360779, |
| "learning_rate": 0.0002432948577536762, |
| "loss": 2.8484, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.5831326699353042, |
| "grad_norm": 0.11401449888944626, |
| "learning_rate": 0.00024272967512196093, |
| "loss": 2.8636, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.5837129014476776, |
| "grad_norm": 0.12227935343980789, |
| "learning_rate": 0.0002421647033655812, |
| "loss": 2.8497, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.584293132960051, |
| "grad_norm": 0.11773716658353806, |
| "learning_rate": 0.00024159994456482233, |
| "loss": 2.857, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.5848733644724246, |
| "grad_norm": 0.124253049492836, |
| "learning_rate": 0.00024103540079918555, |
| "loss": 2.8499, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.585453595984798, |
| "grad_norm": 0.11704014986753464, |
| "learning_rate": 0.00024047107414737985, |
| "loss": 2.8522, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.5860338274971714, |
| "grad_norm": 0.11885286867618561, |
| "learning_rate": 0.0002399069666873153, |
| "loss": 2.855, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.5866140590095448, |
| "grad_norm": 0.12006965279579163, |
| "learning_rate": 0.00023934308049609453, |
| "loss": 2.8488, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.5871942905219183, |
| "grad_norm": 0.12023113667964935, |
| "learning_rate": 0.00023877941765000564, |
| "loss": 2.8542, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.5877745220342917, |
| "grad_norm": 0.12737338244915009, |
| "learning_rate": 0.00023821598022451436, |
| "loss": 2.8588, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.5883547535466651, |
| "grad_norm": 0.11698620766401291, |
| "learning_rate": 0.00023765277029425607, |
| "loss": 2.8544, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.5889349850590385, |
| "grad_norm": 0.12589864432811737, |
| "learning_rate": 0.000237089789933029, |
| "loss": 2.8448, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.589515216571412, |
| "grad_norm": 0.11532309651374817, |
| "learning_rate": 0.0002365270412137856, |
| "loss": 2.8618, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.5900954480837854, |
| "grad_norm": 0.10937913507223129, |
| "learning_rate": 0.00023596452620862585, |
| "loss": 2.8527, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.5906756795961589, |
| "grad_norm": 0.11980416625738144, |
| "learning_rate": 0.00023540224698878861, |
| "loss": 2.8553, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.5912559111085323, |
| "grad_norm": 0.11810686439275742, |
| "learning_rate": 0.00023484020562464507, |
| "loss": 2.8545, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.5918361426209058, |
| "grad_norm": 0.11651547253131866, |
| "learning_rate": 0.00023427840418569043, |
| "loss": 2.8522, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.5924163741332792, |
| "grad_norm": 0.11145967990159988, |
| "learning_rate": 0.00023371684474053633, |
| "loss": 2.8564, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.5929966056456526, |
| "grad_norm": 0.11742381006479263, |
| "learning_rate": 0.0002331555293569037, |
| "loss": 2.8529, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.593576837158026, |
| "grad_norm": 0.1287650465965271, |
| "learning_rate": 0.00023259446010161425, |
| "loss": 2.847, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.5941570686703995, |
| "grad_norm": 0.12560808658599854, |
| "learning_rate": 0.00023203363904058394, |
| "loss": 2.8424, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.5947373001827729, |
| "grad_norm": 0.13144509494304657, |
| "learning_rate": 0.0002314730682388147, |
| "loss": 2.8497, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.5953175316951463, |
| "grad_norm": 0.11483640223741531, |
| "learning_rate": 0.00023091274976038686, |
| "loss": 2.8525, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.5958977632075197, |
| "grad_norm": 0.12085619568824768, |
| "learning_rate": 0.0002303526856684519, |
| "loss": 2.846, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.5964779947198933, |
| "grad_norm": 0.13581375777721405, |
| "learning_rate": 0.00022979287802522423, |
| "loss": 2.8471, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.5970582262322667, |
| "grad_norm": 0.11522037535905838, |
| "learning_rate": 0.00022923332889197447, |
| "loss": 2.841, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.5976384577446401, |
| "grad_norm": 0.1114853248000145, |
| "learning_rate": 0.00022867404032902097, |
| "loss": 2.8507, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.5982186892570136, |
| "grad_norm": 0.1106984093785286, |
| "learning_rate": 0.00022811501439572288, |
| "loss": 2.8501, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.598798920769387, |
| "grad_norm": 0.12095363438129425, |
| "learning_rate": 0.0002275562531504724, |
| "loss": 2.8392, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.5993791522817604, |
| "grad_norm": 0.11527710407972336, |
| "learning_rate": 0.00022699775865068667, |
| "loss": 2.8498, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.5999593837941338, |
| "grad_norm": 0.11631615459918976, |
| "learning_rate": 0.00022643953295280127, |
| "loss": 2.8526, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.6005396153065073, |
| "grad_norm": 0.1107979491353035, |
| "learning_rate": 0.0002258815781122614, |
| "loss": 2.8488, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.6011198468188808, |
| "grad_norm": 0.1126491129398346, |
| "learning_rate": 0.00022532389618351532, |
| "loss": 2.8404, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.6017000783312542, |
| "grad_norm": 0.11740950495004654, |
| "learning_rate": 0.00022476648922000646, |
| "loss": 2.8499, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.6022803098436276, |
| "grad_norm": 0.11938904970884323, |
| "learning_rate": 0.00022420935927416547, |
| "loss": 2.8547, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.6028605413560011, |
| "grad_norm": 0.11484769731760025, |
| "learning_rate": 0.00022365250839740338, |
| "loss": 2.8392, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.6034407728683745, |
| "grad_norm": 0.12051428109407425, |
| "learning_rate": 0.0002230959386401032, |
| "loss": 2.8416, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.6040210043807479, |
| "grad_norm": 0.12364054471254349, |
| "learning_rate": 0.00022253965205161326, |
| "loss": 2.8343, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.6046012358931213, |
| "grad_norm": 0.1125280112028122, |
| "learning_rate": 0.00022198365068023892, |
| "loss": 2.8441, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.6051814674054948, |
| "grad_norm": 0.11715447157621384, |
| "learning_rate": 0.00022142793657323558, |
| "loss": 2.8391, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.6057616989178682, |
| "grad_norm": 0.11433437466621399, |
| "learning_rate": 0.00022087251177680086, |
| "loss": 2.8549, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.6063419304302416, |
| "grad_norm": 0.1222948208451271, |
| "learning_rate": 0.00022031737833606686, |
| "loss": 2.8406, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.6069221619426151, |
| "grad_norm": 0.11805406212806702, |
| "learning_rate": 0.0002197625382950932, |
| "loss": 2.8415, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.6075023934549886, |
| "grad_norm": 0.13002602756023407, |
| "learning_rate": 0.00021920799369685892, |
| "loss": 2.851, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.608082624967362, |
| "grad_norm": 0.11929357796907425, |
| "learning_rate": 0.00021865374658325544, |
| "loss": 2.8437, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.6086628564797354, |
| "grad_norm": 0.11752030998468399, |
| "learning_rate": 0.00021809979899507876, |
| "loss": 2.8532, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.6092430879921088, |
| "grad_norm": 0.12201694399118423, |
| "learning_rate": 0.00021754615297202168, |
| "loss": 2.8474, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.6098233195044823, |
| "grad_norm": 0.12019883096218109, |
| "learning_rate": 0.00021699281055266706, |
| "loss": 2.8422, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.6104035510168557, |
| "grad_norm": 0.12413442134857178, |
| "learning_rate": 0.00021643977377447954, |
| "loss": 2.8316, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.6109837825292291, |
| "grad_norm": 0.11983013898134232, |
| "learning_rate": 0.00021588704467379862, |
| "loss": 2.8448, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.6115640140416027, |
| "grad_norm": 0.13365738093852997, |
| "learning_rate": 0.0002153346252858306, |
| "loss": 2.837, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.6121442455539761, |
| "grad_norm": 0.13185539841651917, |
| "learning_rate": 0.00021478251764464148, |
| "loss": 2.8468, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.6127244770663495, |
| "grad_norm": 0.1213960349559784, |
| "learning_rate": 0.00021423072378314964, |
| "loss": 2.8444, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.6133047085787229, |
| "grad_norm": 0.12037312239408493, |
| "learning_rate": 0.00021367924573311773, |
| "loss": 2.8438, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.6138849400910964, |
| "grad_norm": 0.12542636692523956, |
| "learning_rate": 0.00021312808552514592, |
| "loss": 2.8424, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.6144651716034698, |
| "grad_norm": 0.14415085315704346, |
| "learning_rate": 0.00021257724518866352, |
| "loss": 2.8417, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.6150454031158432, |
| "grad_norm": 0.1150176003575325, |
| "learning_rate": 0.00021202672675192248, |
| "loss": 2.8435, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.6156256346282166, |
| "grad_norm": 0.11662835627794266, |
| "learning_rate": 0.00021147653224198951, |
| "loss": 2.8441, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.6162058661405901, |
| "grad_norm": 0.11693531274795532, |
| "learning_rate": 0.00021092666368473817, |
| "loss": 2.8391, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.6167860976529635, |
| "grad_norm": 0.11077579110860825, |
| "learning_rate": 0.0002103771231048423, |
| "loss": 2.8345, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.617366329165337, |
| "grad_norm": 0.11653861403465271, |
| "learning_rate": 0.00020982791252576773, |
| "loss": 2.8448, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.6179465606777104, |
| "grad_norm": 0.11749275773763657, |
| "learning_rate": 0.00020927903396976552, |
| "loss": 2.8558, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.6185267921900839, |
| "grad_norm": 0.11677636206150055, |
| "learning_rate": 0.00020873048945786382, |
| "loss": 2.8353, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.6191070237024573, |
| "grad_norm": 0.11745753139257431, |
| "learning_rate": 0.00020818228100986106, |
| "loss": 2.8494, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.6196872552148307, |
| "grad_norm": 0.11747489869594574, |
| "learning_rate": 0.00020763441064431827, |
| "loss": 2.8397, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.6202674867272041, |
| "grad_norm": 0.11356910318136215, |
| "learning_rate": 0.00020708688037855138, |
| "loss": 2.8472, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.6208477182395776, |
| "grad_norm": 0.11063719540834427, |
| "learning_rate": 0.00020653969222862435, |
| "loss": 2.8508, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.621427949751951, |
| "grad_norm": 0.10978058725595474, |
| "learning_rate": 0.00020599284820934112, |
| "loss": 2.8308, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.6220081812643244, |
| "grad_norm": 0.11860186606645584, |
| "learning_rate": 0.00020544635033423867, |
| "loss": 2.8263, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.6225884127766979, |
| "grad_norm": 0.1312050074338913, |
| "learning_rate": 0.00020490020061557953, |
| "loss": 2.8455, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.6231686442890714, |
| "grad_norm": 0.13181331753730774, |
| "learning_rate": 0.00020435440106434408, |
| "loss": 2.8489, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.6237488758014448, |
| "grad_norm": 0.1471181958913803, |
| "learning_rate": 0.00020380895369022357, |
| "loss": 2.8285, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.6243291073138182, |
| "grad_norm": 0.12075991183519363, |
| "learning_rate": 0.00020326386050161215, |
| "loss": 2.8402, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.6249093388261916, |
| "grad_norm": 0.1117480993270874, |
| "learning_rate": 0.0002027191235056003, |
| "loss": 2.8426, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.6254895703385651, |
| "grad_norm": 0.11622477322816849, |
| "learning_rate": 0.0002021747447079665, |
| "loss": 2.8423, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.6260698018509385, |
| "grad_norm": 0.11475232988595963, |
| "learning_rate": 0.00020163072611317055, |
| "loss": 2.835, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.6266500333633119, |
| "grad_norm": 0.12252891808748245, |
| "learning_rate": 0.00020108706972434606, |
| "loss": 2.8381, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.6272302648756855, |
| "grad_norm": 0.11319098621606827, |
| "learning_rate": 0.00020054377754329258, |
| "loss": 2.8326, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.6278104963880589, |
| "grad_norm": 0.11103735119104385, |
| "learning_rate": 0.00020000085157046902, |
| "loss": 2.8292, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.6283907279004323, |
| "grad_norm": 0.12254971265792847, |
| "learning_rate": 0.00019945829380498556, |
| "loss": 2.8379, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.6289709594128057, |
| "grad_norm": 0.1253294050693512, |
| "learning_rate": 0.00019891610624459674, |
| "loss": 2.8404, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.6295511909251792, |
| "grad_norm": 0.12701797485351562, |
| "learning_rate": 0.0001983742908856942, |
| "loss": 2.8331, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.6301314224375526, |
| "grad_norm": 0.1351822167634964, |
| "learning_rate": 0.00019783284972329845, |
| "loss": 2.831, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.630711653949926, |
| "grad_norm": 0.11504077911376953, |
| "learning_rate": 0.00019729178475105292, |
| "loss": 2.8397, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.6312918854622994, |
| "grad_norm": 0.11900710314512253, |
| "learning_rate": 0.00019675109796121523, |
| "loss": 2.8328, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.6318721169746729, |
| "grad_norm": 0.11879398673772812, |
| "learning_rate": 0.00019621079134465096, |
| "loss": 2.8275, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.6324523484870463, |
| "grad_norm": 0.11795203387737274, |
| "learning_rate": 0.00019567086689082562, |
| "loss": 2.828, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.6330325799994198, |
| "grad_norm": 0.1163572296500206, |
| "learning_rate": 0.00019513132658779758, |
| "loss": 2.8387, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.6336128115117932, |
| "grad_norm": 0.11812139302492142, |
| "learning_rate": 0.00019459217242221092, |
| "loss": 2.8336, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.6341930430241667, |
| "grad_norm": 0.11195320636034012, |
| "learning_rate": 0.00019405340637928755, |
| "loss": 2.8427, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.6347732745365401, |
| "grad_norm": 0.11674754321575165, |
| "learning_rate": 0.0001935150304428206, |
| "loss": 2.8279, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.6353535060489135, |
| "grad_norm": 0.11432943493127823, |
| "learning_rate": 0.00019297704659516655, |
| "loss": 2.8267, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.6359337375612869, |
| "grad_norm": 0.12507887184619904, |
| "learning_rate": 0.0001924394568172384, |
| "loss": 2.8309, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.6365139690736604, |
| "grad_norm": 0.12057894468307495, |
| "learning_rate": 0.0001919022630884981, |
| "loss": 2.8422, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.6370942005860338, |
| "grad_norm": 0.11377721279859543, |
| "learning_rate": 0.000191365467386949, |
| "loss": 2.8381, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.6376744320984072, |
| "grad_norm": 0.11800755560398102, |
| "learning_rate": 0.00019082907168912932, |
| "loss": 2.8331, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.6382546636107806, |
| "grad_norm": 0.12301038950681686, |
| "learning_rate": 0.00019029307797010402, |
| "loss": 2.831, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.6382546636107806, |
| "eval_loss": 2.796895742416382, |
| "eval_runtime": 3.2627, |
| "eval_samples_per_second": 1327.123, |
| "eval_steps_per_second": 2.758, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.6388348951231542, |
| "grad_norm": 0.1179603561758995, |
| "learning_rate": 0.00018975748820345838, |
| "loss": 2.8436, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.6394151266355276, |
| "grad_norm": 0.13155020773410797, |
| "learning_rate": 0.0001892223043612898, |
| "loss": 2.8317, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.639995358147901, |
| "grad_norm": 0.11468763649463654, |
| "learning_rate": 0.00018868752841420122, |
| "loss": 2.8284, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.6405755896602745, |
| "grad_norm": 0.10960279405117035, |
| "learning_rate": 0.00018815316233129393, |
| "loss": 2.8286, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.6411558211726479, |
| "grad_norm": 0.1298363208770752, |
| "learning_rate": 0.00018761920808015966, |
| "loss": 2.8326, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.6417360526850213, |
| "grad_norm": 0.11535240709781647, |
| "learning_rate": 0.00018708566762687403, |
| "loss": 2.8281, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.6423162841973947, |
| "grad_norm": 0.12528617680072784, |
| "learning_rate": 0.00018655254293598866, |
| "loss": 2.8179, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.6428965157097682, |
| "grad_norm": 0.11952237784862518, |
| "learning_rate": 0.00018601983597052468, |
| "loss": 2.8294, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.6434767472221417, |
| "grad_norm": 0.12121649086475372, |
| "learning_rate": 0.00018548754869196496, |
| "loss": 2.8336, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.6440569787345151, |
| "grad_norm": 0.12465447187423706, |
| "learning_rate": 0.00018495568306024687, |
| "loss": 2.8314, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.6446372102468885, |
| "grad_norm": 0.10858411341905594, |
| "learning_rate": 0.00018442424103375563, |
| "loss": 2.8191, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.645217441759262, |
| "grad_norm": 0.1240803673863411, |
| "learning_rate": 0.00018389322456931616, |
| "loss": 2.8334, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.6457976732716354, |
| "grad_norm": 0.11604313552379608, |
| "learning_rate": 0.00018336263562218695, |
| "loss": 2.8241, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.6463779047840088, |
| "grad_norm": 0.10764401406049728, |
| "learning_rate": 0.00018283247614605185, |
| "loss": 2.8343, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.6469581362963822, |
| "grad_norm": 0.11341771483421326, |
| "learning_rate": 0.00018230274809301377, |
| "loss": 2.8323, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.6475383678087557, |
| "grad_norm": 0.11618595570325851, |
| "learning_rate": 0.00018177345341358699, |
| "loss": 2.8295, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.6481185993211291, |
| "grad_norm": 0.11492364853620529, |
| "learning_rate": 0.00018124459405668967, |
| "loss": 2.8253, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.6486988308335025, |
| "grad_norm": 0.12541726231575012, |
| "learning_rate": 0.0001807161719696377, |
| "loss": 2.8305, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.649279062345876, |
| "grad_norm": 0.1240224838256836, |
| "learning_rate": 0.0001801881890981362, |
| "loss": 2.832, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.6498592938582495, |
| "grad_norm": 0.12260005623102188, |
| "learning_rate": 0.00017966064738627363, |
| "loss": 2.8274, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.6504395253706229, |
| "grad_norm": 0.11284399777650833, |
| "learning_rate": 0.00017913354877651386, |
| "loss": 2.8291, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.6510197568829963, |
| "grad_norm": 0.11993937194347382, |
| "learning_rate": 0.00017860689520968906, |
| "loss": 2.8357, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.6515999883953697, |
| "grad_norm": 0.11259515583515167, |
| "learning_rate": 0.00017808068862499302, |
| "loss": 2.8134, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.6521802199077432, |
| "grad_norm": 0.1146656796336174, |
| "learning_rate": 0.0001775549309599733, |
| "loss": 2.8275, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.6527604514201166, |
| "grad_norm": 0.11118417978286743, |
| "learning_rate": 0.0001770296241505248, |
| "loss": 2.8276, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.65334068293249, |
| "grad_norm": 0.1155654564499855, |
| "learning_rate": 0.00017650477013088218, |
| "loss": 2.8333, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.6539209144448636, |
| "grad_norm": 0.12370238453149796, |
| "learning_rate": 0.000175980370833613, |
| "loss": 2.8209, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.654501145957237, |
| "grad_norm": 0.11332956701517105, |
| "learning_rate": 0.00017545642818961045, |
| "loss": 2.824, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.6550813774696104, |
| "grad_norm": 0.11696597188711166, |
| "learning_rate": 0.00017493294412808603, |
| "loss": 2.8285, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.6556616089819838, |
| "grad_norm": 0.11556991934776306, |
| "learning_rate": 0.00017440992057656302, |
| "loss": 2.833, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.6562418404943573, |
| "grad_norm": 0.11072834581136703, |
| "learning_rate": 0.000173887359460869, |
| "loss": 2.8202, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.6568220720067307, |
| "grad_norm": 0.12139474600553513, |
| "learning_rate": 0.0001733652627051285, |
| "loss": 2.8323, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.6574023035191041, |
| "grad_norm": 0.11882605403661728, |
| "learning_rate": 0.0001728436322317567, |
| "loss": 2.8325, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.6579825350314775, |
| "grad_norm": 0.10851707309484482, |
| "learning_rate": 0.00017232246996145163, |
| "loss": 2.8304, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.658562766543851, |
| "grad_norm": 0.11566723883152008, |
| "learning_rate": 0.0001718017778131873, |
| "loss": 2.8359, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.6591429980562244, |
| "grad_norm": 0.1224483922123909, |
| "learning_rate": 0.00017128155770420673, |
| "loss": 2.8246, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.6597232295685979, |
| "grad_norm": 0.11472085118293762, |
| "learning_rate": 0.00017076181155001492, |
| "loss": 2.8274, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.6603034610809713, |
| "grad_norm": 0.11463634669780731, |
| "learning_rate": 0.00017024254126437149, |
| "loss": 2.8208, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.6608836925933448, |
| "grad_norm": 0.11640073359012604, |
| "learning_rate": 0.00016972374875928427, |
| "loss": 2.8351, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.6614639241057182, |
| "grad_norm": 0.12146312743425369, |
| "learning_rate": 0.00016920543594500147, |
| "loss": 2.8249, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.6620441556180916, |
| "grad_norm": 0.11683548241853714, |
| "learning_rate": 0.00016868760473000524, |
| "loss": 2.8281, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.662624387130465, |
| "grad_norm": 0.11443763226270676, |
| "learning_rate": 0.0001681702570210043, |
| "loss": 2.8239, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.6632046186428385, |
| "grad_norm": 0.1136617586016655, |
| "learning_rate": 0.00016765339472292714, |
| "loss": 2.827, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.6637848501552119, |
| "grad_norm": 0.11093004792928696, |
| "learning_rate": 0.00016713701973891472, |
| "loss": 2.8359, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.6643650816675853, |
| "grad_norm": 0.12110643088817596, |
| "learning_rate": 0.00016662113397031413, |
| "loss": 2.8164, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.6649453131799588, |
| "grad_norm": 0.12236957252025604, |
| "learning_rate": 0.00016610573931667065, |
| "loss": 2.8295, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.6655255446923323, |
| "grad_norm": 0.11643628776073456, |
| "learning_rate": 0.0001655908376757214, |
| "loss": 2.8199, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.6661057762047057, |
| "grad_norm": 0.12198419123888016, |
| "learning_rate": 0.00016507643094338818, |
| "loss": 2.8234, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.6666860077170791, |
| "grad_norm": 0.11697736382484436, |
| "learning_rate": 0.00016456252101377042, |
| "loss": 2.8309, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.6672662392294526, |
| "grad_norm": 0.11377154290676117, |
| "learning_rate": 0.00016404910977913824, |
| "loss": 2.8174, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.667846470741826, |
| "grad_norm": 0.1169874370098114, |
| "learning_rate": 0.0001635361991299258, |
| "loss": 2.8174, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.6684267022541994, |
| "grad_norm": 0.11022408306598663, |
| "learning_rate": 0.00016302379095472374, |
| "loss": 2.8251, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.6690069337665728, |
| "grad_norm": 0.11143022775650024, |
| "learning_rate": 0.00016251188714027265, |
| "loss": 2.832, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.6695871652789464, |
| "grad_norm": 0.11829391121864319, |
| "learning_rate": 0.00016200048957145597, |
| "loss": 2.8181, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.6701673967913198, |
| "grad_norm": 0.11668332666158676, |
| "learning_rate": 0.00016148960013129303, |
| "loss": 2.8163, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.6707476283036932, |
| "grad_norm": 0.11444656550884247, |
| "learning_rate": 0.0001609792207009325, |
| "loss": 2.8171, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.6713278598160666, |
| "grad_norm": 0.11538255959749222, |
| "learning_rate": 0.00016046935315964476, |
| "loss": 2.8192, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.6719080913284401, |
| "grad_norm": 0.13890443742275238, |
| "learning_rate": 0.0001599599993848155, |
| "loss": 2.814, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.6724883228408135, |
| "grad_norm": 0.10878733545541763, |
| "learning_rate": 0.00015945116125193876, |
| "loss": 2.8161, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.6730685543531869, |
| "grad_norm": 0.11337769776582718, |
| "learning_rate": 0.00015894284063460966, |
| "loss": 2.8161, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.6736487858655603, |
| "grad_norm": 0.1095629557967186, |
| "learning_rate": 0.00015843503940451834, |
| "loss": 2.8087, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.6742290173779338, |
| "grad_norm": 0.1378069370985031, |
| "learning_rate": 0.00015792775943144165, |
| "loss": 2.8151, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.6748092488903072, |
| "grad_norm": 0.1202809140086174, |
| "learning_rate": 0.00015742100258323794, |
| "loss": 2.831, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.6753894804026807, |
| "grad_norm": 0.12298610061407089, |
| "learning_rate": 0.00015691477072583894, |
| "loss": 2.8247, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.6759697119150541, |
| "grad_norm": 0.11947082728147507, |
| "learning_rate": 0.00015640906572324319, |
| "loss": 2.8238, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.6765499434274276, |
| "grad_norm": 0.11039472371339798, |
| "learning_rate": 0.00015590388943750988, |
| "loss": 2.8267, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.677130174939801, |
| "grad_norm": 0.11807908117771149, |
| "learning_rate": 0.0001553992437287505, |
| "loss": 2.8222, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.6777104064521744, |
| "grad_norm": 0.11934113502502441, |
| "learning_rate": 0.00015489513045512386, |
| "loss": 2.8193, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.6782906379645478, |
| "grad_norm": 0.11163033545017242, |
| "learning_rate": 0.00015439155147282764, |
| "loss": 2.8137, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.6788708694769213, |
| "grad_norm": 0.11381068080663681, |
| "learning_rate": 0.0001538885086360923, |
| "loss": 2.8202, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.6794511009892947, |
| "grad_norm": 0.11011006683111191, |
| "learning_rate": 0.0001533860037971747, |
| "loss": 2.8213, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.6800313325016681, |
| "grad_norm": 0.11611464619636536, |
| "learning_rate": 0.0001528840388063497, |
| "loss": 2.8216, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.6806115640140415, |
| "grad_norm": 0.10734301805496216, |
| "learning_rate": 0.0001523826155119055, |
| "loss": 2.8188, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.6811917955264151, |
| "grad_norm": 0.12189003825187683, |
| "learning_rate": 0.00015188173576013482, |
| "loss": 2.8206, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.6817720270387885, |
| "grad_norm": 0.11146776378154755, |
| "learning_rate": 0.0001513814013953296, |
| "loss": 2.8176, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.6823522585511619, |
| "grad_norm": 0.11531021445989609, |
| "learning_rate": 0.0001508816142597733, |
| "loss": 2.8192, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.6829324900635354, |
| "grad_norm": 0.11541693657636642, |
| "learning_rate": 0.00015038237619373443, |
| "loss": 2.8219, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.6835127215759088, |
| "grad_norm": 0.11345332115888596, |
| "learning_rate": 0.0001498836890354602, |
| "loss": 2.8024, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.6840929530882822, |
| "grad_norm": 0.10796009749174118, |
| "learning_rate": 0.00014938555462116842, |
| "loss": 2.8119, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.6846731846006556, |
| "grad_norm": 0.11463455855846405, |
| "learning_rate": 0.00014888797478504261, |
| "loss": 2.8119, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.6852534161130291, |
| "grad_norm": 0.11192594468593597, |
| "learning_rate": 0.00014839095135922372, |
| "loss": 2.8252, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.6858336476254026, |
| "grad_norm": 0.11805829405784607, |
| "learning_rate": 0.000147894486173804, |
| "loss": 2.8095, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.686413879137776, |
| "grad_norm": 0.11721805483102798, |
| "learning_rate": 0.00014739858105682053, |
| "loss": 2.8123, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.6869941106501494, |
| "grad_norm": 0.11619780957698822, |
| "learning_rate": 0.0001469032378342475, |
| "loss": 2.8177, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.6875743421625229, |
| "grad_norm": 0.10933215916156769, |
| "learning_rate": 0.00014640845832999087, |
| "loss": 2.8078, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.6881545736748963, |
| "grad_norm": 0.11362309753894806, |
| "learning_rate": 0.0001459142443658805, |
| "loss": 2.8103, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.6887348051872697, |
| "grad_norm": 0.10805781930685043, |
| "learning_rate": 0.00014542059776166382, |
| "loss": 2.8073, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.6893150366996431, |
| "grad_norm": 0.124758280813694, |
| "learning_rate": 0.00014492752033499977, |
| "loss": 2.8133, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.6898952682120166, |
| "grad_norm": 0.11096182465553284, |
| "learning_rate": 0.00014443501390145057, |
| "loss": 2.8061, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.69047549972439, |
| "grad_norm": 0.1132817193865776, |
| "learning_rate": 0.00014394308027447685, |
| "loss": 2.8209, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.6910557312367634, |
| "grad_norm": 0.10996360331773758, |
| "learning_rate": 0.00014345172126542966, |
| "loss": 2.8161, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.6916359627491369, |
| "grad_norm": 0.11297384649515152, |
| "learning_rate": 0.0001429609386835442, |
| "loss": 2.8116, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.6922161942615104, |
| "grad_norm": 0.12191120535135269, |
| "learning_rate": 0.00014247073433593373, |
| "loss": 2.8156, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.6927964257738838, |
| "grad_norm": 0.11631318181753159, |
| "learning_rate": 0.00014198111002758154, |
| "loss": 2.8225, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.6933766572862572, |
| "grad_norm": 0.14487071335315704, |
| "learning_rate": 0.00014149206756133595, |
| "loss": 2.8153, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.6939568887986306, |
| "grad_norm": 0.11780226230621338, |
| "learning_rate": 0.00014100360873790248, |
| "loss": 2.8163, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.6945371203110041, |
| "grad_norm": 0.11396613717079163, |
| "learning_rate": 0.00014051573535583766, |
| "loss": 2.8101, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.6951173518233775, |
| "grad_norm": 0.11514125019311905, |
| "learning_rate": 0.00014002844921154233, |
| "loss": 2.819, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.6956975833357509, |
| "grad_norm": 0.11687569320201874, |
| "learning_rate": 0.00013954175209925513, |
| "loss": 2.8106, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.6962778148481245, |
| "grad_norm": 0.11218845099210739, |
| "learning_rate": 0.00013905564581104607, |
| "loss": 2.8156, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6962778148481245, |
| "eval_loss": 2.778130531311035, |
| "eval_runtime": 3.2555, |
| "eval_samples_per_second": 1330.053, |
| "eval_steps_per_second": 2.765, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.6968580463604979, |
| "grad_norm": 0.11513704061508179, |
| "learning_rate": 0.000138570132136809, |
| "loss": 2.8185, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.6974382778728713, |
| "grad_norm": 0.12384956330060959, |
| "learning_rate": 0.00013808521286425644, |
| "loss": 2.8159, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.6980185093852447, |
| "grad_norm": 0.11136494576931, |
| "learning_rate": 0.0001376008897789119, |
| "loss": 2.8196, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.6985987408976182, |
| "grad_norm": 0.11704517900943756, |
| "learning_rate": 0.00013711716466410353, |
| "loss": 2.8118, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.6991789724099916, |
| "grad_norm": 0.11521551758050919, |
| "learning_rate": 0.00013663403930095827, |
| "loss": 2.8131, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.699759203922365, |
| "grad_norm": 0.10568945109844208, |
| "learning_rate": 0.00013615151546839382, |
| "loss": 2.8098, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.7003394354347384, |
| "grad_norm": 0.1213884949684143, |
| "learning_rate": 0.00013566959494311386, |
| "loss": 2.8091, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.7009196669471119, |
| "grad_norm": 0.11004059761762619, |
| "learning_rate": 0.00013518827949960015, |
| "loss": 2.8238, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.7014998984594853, |
| "grad_norm": 0.11095508933067322, |
| "learning_rate": 0.00013470757091010649, |
| "loss": 2.8116, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.7020801299718588, |
| "grad_norm": 0.11275944113731384, |
| "learning_rate": 0.00013422747094465234, |
| "loss": 2.8109, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.7026603614842322, |
| "grad_norm": 0.11312493681907654, |
| "learning_rate": 0.00013374798137101595, |
| "loss": 2.814, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.7032405929966057, |
| "grad_norm": 0.10738647729158401, |
| "learning_rate": 0.00013326910395472833, |
| "loss": 2.8111, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.7038208245089791, |
| "grad_norm": 0.11198966205120087, |
| "learning_rate": 0.00013279084045906623, |
| "loss": 2.806, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.7044010560213525, |
| "grad_norm": 0.11718153953552246, |
| "learning_rate": 0.00013231319264504594, |
| "loss": 2.8186, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.7049812875337259, |
| "grad_norm": 0.11054380983114243, |
| "learning_rate": 0.00013183616227141674, |
| "loss": 2.8144, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.7055615190460994, |
| "grad_norm": 0.11579257249832153, |
| "learning_rate": 0.0001313597510946543, |
| "loss": 2.8101, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.7061417505584728, |
| "grad_norm": 0.10710903257131577, |
| "learning_rate": 0.00013088396086895476, |
| "loss": 2.8104, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.7067219820708462, |
| "grad_norm": 0.11220473051071167, |
| "learning_rate": 0.00013040879334622738, |
| "loss": 2.8049, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.7073022135832197, |
| "grad_norm": 0.10872667282819748, |
| "learning_rate": 0.00012993425027608884, |
| "loss": 2.8175, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.7078824450955932, |
| "grad_norm": 0.10861840099096298, |
| "learning_rate": 0.00012946033340585641, |
| "loss": 2.8072, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.7084626766079666, |
| "grad_norm": 0.11558268964290619, |
| "learning_rate": 0.00012898704448054162, |
| "loss": 2.8034, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.70904290812034, |
| "grad_norm": 0.11709378659725189, |
| "learning_rate": 0.00012851438524284382, |
| "loss": 2.8047, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.7096231396327135, |
| "grad_norm": 0.12139759957790375, |
| "learning_rate": 0.00012804235743314401, |
| "loss": 2.8056, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.7102033711450869, |
| "grad_norm": 0.11130308359861374, |
| "learning_rate": 0.00012757096278949792, |
| "loss": 2.8138, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.7107836026574603, |
| "grad_norm": 0.1112653836607933, |
| "learning_rate": 0.00012710020304763003, |
| "loss": 2.8004, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.7113638341698337, |
| "grad_norm": 0.11182957142591476, |
| "learning_rate": 0.00012663007994092703, |
| "loss": 2.8064, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.7119440656822072, |
| "grad_norm": 0.13386094570159912, |
| "learning_rate": 0.00012616059520043145, |
| "loss": 2.8148, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.7125242971945807, |
| "grad_norm": 0.11641652137041092, |
| "learning_rate": 0.0001256917505548352, |
| "loss": 2.8102, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.7131045287069541, |
| "grad_norm": 0.10916447639465332, |
| "learning_rate": 0.00012522354773047352, |
| "loss": 2.8148, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.7136847602193275, |
| "grad_norm": 0.10887318104505539, |
| "learning_rate": 0.0001247559884513182, |
| "loss": 2.8047, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.714264991731701, |
| "grad_norm": 0.11701834946870804, |
| "learning_rate": 0.0001242890744389715, |
| "loss": 2.8144, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.7148452232440744, |
| "grad_norm": 0.10473381727933884, |
| "learning_rate": 0.00012382280741265968, |
| "loss": 2.8057, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.7154254547564478, |
| "grad_norm": 0.10586260259151459, |
| "learning_rate": 0.00012335718908922685, |
| "loss": 2.8032, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.7160056862688212, |
| "grad_norm": 0.10688824206590652, |
| "learning_rate": 0.00012289222118312822, |
| "loss": 2.8054, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.7165859177811947, |
| "grad_norm": 0.11233460903167725, |
| "learning_rate": 0.0001224279054064247, |
| "loss": 2.801, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.7171661492935681, |
| "grad_norm": 0.10600557923316956, |
| "learning_rate": 0.00012196424346877541, |
| "loss": 2.8035, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.7177463808059416, |
| "grad_norm": 0.11300963163375854, |
| "learning_rate": 0.00012150123707743219, |
| "loss": 2.8098, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.718326612318315, |
| "grad_norm": 0.11773265898227692, |
| "learning_rate": 0.00012103888793723312, |
| "loss": 2.8103, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.7189068438306885, |
| "grad_norm": 0.11092250049114227, |
| "learning_rate": 0.00012057719775059602, |
| "loss": 2.8028, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.7194870753430619, |
| "grad_norm": 0.10554751008749008, |
| "learning_rate": 0.00012011616821751271, |
| "loss": 2.8044, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.7200673068554353, |
| "grad_norm": 0.1148175522685051, |
| "learning_rate": 0.0001196558010355422, |
| "loss": 2.8099, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.7206475383678087, |
| "grad_norm": 0.10981535166501999, |
| "learning_rate": 0.00011919609789980458, |
| "loss": 2.7991, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.7212277698801822, |
| "grad_norm": 0.11188452690839767, |
| "learning_rate": 0.00011873706050297508, |
| "loss": 2.8067, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.7218080013925556, |
| "grad_norm": 0.11328940838575363, |
| "learning_rate": 0.00011827869053527727, |
| "loss": 2.8049, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.722388232904929, |
| "grad_norm": 0.11542364954948425, |
| "learning_rate": 0.00011782098968447774, |
| "loss": 2.7988, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.7229684644173026, |
| "grad_norm": 0.11087549477815628, |
| "learning_rate": 0.00011736395963587857, |
| "loss": 2.8102, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.723548695929676, |
| "grad_norm": 0.11298040300607681, |
| "learning_rate": 0.00011690760207231256, |
| "loss": 2.8063, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.7241289274420494, |
| "grad_norm": 0.10775293409824371, |
| "learning_rate": 0.00011645191867413596, |
| "loss": 2.8065, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.7247091589544228, |
| "grad_norm": 0.11240221560001373, |
| "learning_rate": 0.00011599691111922272, |
| "loss": 2.8062, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.7252893904667963, |
| "grad_norm": 0.1069854348897934, |
| "learning_rate": 0.00011554258108295859, |
| "loss": 2.79, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.7258696219791697, |
| "grad_norm": 0.11566832661628723, |
| "learning_rate": 0.00011508893023823393, |
| "loss": 2.7977, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.7264498534915431, |
| "grad_norm": 0.11771980673074722, |
| "learning_rate": 0.00011463596025543905, |
| "loss": 2.803, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.7270300850039165, |
| "grad_norm": 0.11435101926326752, |
| "learning_rate": 0.0001141836728024567, |
| "loss": 2.7985, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.72761031651629, |
| "grad_norm": 0.10902056097984314, |
| "learning_rate": 0.0001137320695446566, |
| "loss": 2.8096, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.7281905480286635, |
| "grad_norm": 0.10939980298280716, |
| "learning_rate": 0.0001132811521448896, |
| "loss": 2.8121, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.7287707795410369, |
| "grad_norm": 0.10922636091709137, |
| "learning_rate": 0.00011283092226348031, |
| "loss": 2.8093, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.7293510110534103, |
| "grad_norm": 0.10520195960998535, |
| "learning_rate": 0.00011238138155822275, |
| "loss": 2.8031, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.7299312425657838, |
| "grad_norm": 0.10655706375837326, |
| "learning_rate": 0.00011193253168437253, |
| "loss": 2.8083, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.7305114740781572, |
| "grad_norm": 0.11627507954835892, |
| "learning_rate": 0.00011148437429464215, |
| "loss": 2.7994, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.7310917055905306, |
| "grad_norm": 0.1093965470790863, |
| "learning_rate": 0.00011103691103919401, |
| "loss": 2.8054, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.731671937102904, |
| "grad_norm": 0.113887257874012, |
| "learning_rate": 0.00011059014356563458, |
| "loss": 2.7963, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.7322521686152775, |
| "grad_norm": 0.10929399728775024, |
| "learning_rate": 0.00011014407351900879, |
| "loss": 2.8033, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.7328324001276509, |
| "grad_norm": 0.11176785826683044, |
| "learning_rate": 0.00010969870254179285, |
| "loss": 2.8061, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.7334126316400243, |
| "grad_norm": 0.10631275177001953, |
| "learning_rate": 0.00010925403227388973, |
| "loss": 2.8107, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.7339928631523978, |
| "grad_norm": 0.11108485609292984, |
| "learning_rate": 0.00010881006435262179, |
| "loss": 2.8059, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.7345730946647713, |
| "grad_norm": 0.10749488323926926, |
| "learning_rate": 0.00010836680041272536, |
| "loss": 2.8004, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.7351533261771447, |
| "grad_norm": 0.10994744300842285, |
| "learning_rate": 0.00010792424208634495, |
| "loss": 2.8093, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.7357335576895181, |
| "grad_norm": 0.10910103470087051, |
| "learning_rate": 0.00010748239100302627, |
| "loss": 2.7928, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.7363137892018915, |
| "grad_norm": 0.10835743695497513, |
| "learning_rate": 0.0001070412487897117, |
| "loss": 2.8077, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.736894020714265, |
| "grad_norm": 0.10580655187368393, |
| "learning_rate": 0.00010660081707073288, |
| "loss": 2.7991, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.7374742522266384, |
| "grad_norm": 0.10928157716989517, |
| "learning_rate": 0.00010616109746780546, |
| "loss": 2.7905, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.7380544837390118, |
| "grad_norm": 0.10654684156179428, |
| "learning_rate": 0.00010572209160002339, |
| "loss": 2.8021, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.7386347152513854, |
| "grad_norm": 0.10834140330553055, |
| "learning_rate": 0.00010528380108385186, |
| "loss": 2.805, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.7392149467637588, |
| "grad_norm": 0.1152142882347107, |
| "learning_rate": 0.00010484622753312279, |
| "loss": 2.7916, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.7397951782761322, |
| "grad_norm": 0.10981319844722748, |
| "learning_rate": 0.0001044093725590277, |
| "loss": 2.8029, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.7403754097885056, |
| "grad_norm": 0.1065368577837944, |
| "learning_rate": 0.00010397323777011229, |
| "loss": 2.8048, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.7409556413008791, |
| "grad_norm": 0.10563939809799194, |
| "learning_rate": 0.00010353782477227083, |
| "loss": 2.8058, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.7415358728132525, |
| "grad_norm": 0.11117275804281235, |
| "learning_rate": 0.00010310313516873922, |
| "loss": 2.7985, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.7421161043256259, |
| "grad_norm": 0.11544723808765411, |
| "learning_rate": 0.00010266917056009036, |
| "loss": 2.8001, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.7426963358379993, |
| "grad_norm": 0.11005005240440369, |
| "learning_rate": 0.00010223593254422733, |
| "loss": 2.7954, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.7432765673503728, |
| "grad_norm": 0.11374104768037796, |
| "learning_rate": 0.0001018034227163779, |
| "loss": 2.8053, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.7438567988627462, |
| "grad_norm": 0.11264318227767944, |
| "learning_rate": 0.00010137164266908854, |
| "loss": 2.8029, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.7444370303751197, |
| "grad_norm": 0.10718287527561188, |
| "learning_rate": 0.00010094059399221855, |
| "loss": 2.7964, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.7450172618874931, |
| "grad_norm": 0.11395127326250076, |
| "learning_rate": 0.00010051027827293457, |
| "loss": 2.8057, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.7455974933998666, |
| "grad_norm": 0.11251317709684372, |
| "learning_rate": 0.00010008069709570378, |
| "loss": 2.8036, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.74617772491224, |
| "grad_norm": 0.1180030032992363, |
| "learning_rate": 9.965185204228941e-05, |
| "loss": 2.8016, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.7467579564246134, |
| "grad_norm": 0.12361141294240952, |
| "learning_rate": 9.922374469174372e-05, |
| "loss": 2.7891, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.7473381879369868, |
| "grad_norm": 0.11456003040075302, |
| "learning_rate": 9.879637662040275e-05, |
| "loss": 2.8028, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.7479184194493603, |
| "grad_norm": 0.11008987575769424, |
| "learning_rate": 9.83697494018808e-05, |
| "loss": 2.8093, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.7484986509617337, |
| "grad_norm": 0.11017616838216782, |
| "learning_rate": 9.794386460706356e-05, |
| "loss": 2.8005, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.7490788824741071, |
| "grad_norm": 0.11627316474914551, |
| "learning_rate": 9.751872380410378e-05, |
| "loss": 2.799, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.7496591139864806, |
| "grad_norm": 0.11369270831346512, |
| "learning_rate": 9.709432855841436e-05, |
| "loss": 2.7941, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.7502393454988541, |
| "grad_norm": 0.10983362793922424, |
| "learning_rate": 9.667068043266302e-05, |
| "loss": 2.7996, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.7508195770112275, |
| "grad_norm": 0.10419350117444992, |
| "learning_rate": 9.624778098676652e-05, |
| "loss": 2.8052, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.7513998085236009, |
| "grad_norm": 0.10500075668096542, |
| "learning_rate": 9.582563177788487e-05, |
| "loss": 2.7993, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.7519800400359744, |
| "grad_norm": 0.10765775293111801, |
| "learning_rate": 9.540423436041585e-05, |
| "loss": 2.7964, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.7525602715483478, |
| "grad_norm": 0.10872151702642441, |
| "learning_rate": 9.49835902859888e-05, |
| "loss": 2.7876, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.7531405030607212, |
| "grad_norm": 0.10935165733098984, |
| "learning_rate": 9.456370110345927e-05, |
| "loss": 2.8003, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.7537207345730946, |
| "grad_norm": 0.1083398386836052, |
| "learning_rate": 9.414456835890322e-05, |
| "loss": 2.7945, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.7543009660854681, |
| "grad_norm": 0.10846253484487534, |
| "learning_rate": 9.372619359561121e-05, |
| "loss": 2.799, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.7543009660854681, |
| "eval_loss": 2.7616169452667236, |
| "eval_runtime": 3.2768, |
| "eval_samples_per_second": 1321.408, |
| "eval_steps_per_second": 2.747, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.7548811975978416, |
| "grad_norm": 0.10937865823507309, |
| "learning_rate": 9.330857835408318e-05, |
| "loss": 2.7962, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.755461429110215, |
| "grad_norm": 0.10633205622434616, |
| "learning_rate": 9.289172417202205e-05, |
| "loss": 2.7989, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.7560416606225884, |
| "grad_norm": 0.11001235246658325, |
| "learning_rate": 9.247563258432861e-05, |
| "loss": 2.7955, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.7566218921349619, |
| "grad_norm": 0.10847952216863632, |
| "learning_rate": 9.206030512309566e-05, |
| "loss": 2.7959, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.7572021236473353, |
| "grad_norm": 0.10858704149723053, |
| "learning_rate": 9.164574331760246e-05, |
| "loss": 2.7965, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.7577823551597087, |
| "grad_norm": 0.10710106790065765, |
| "learning_rate": 9.123194869430888e-05, |
| "loss": 2.7921, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.7583625866720821, |
| "grad_norm": 0.10932508111000061, |
| "learning_rate": 9.081892277685026e-05, |
| "loss": 2.7921, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.7589428181844556, |
| "grad_norm": 0.11362321674823761, |
| "learning_rate": 9.040666708603125e-05, |
| "loss": 2.7981, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.759523049696829, |
| "grad_norm": 0.10791613906621933, |
| "learning_rate": 8.999518313982039e-05, |
| "loss": 2.7993, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.7601032812092025, |
| "grad_norm": 0.11038652807474136, |
| "learning_rate": 8.958447245334476e-05, |
| "loss": 2.7922, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.7606835127215759, |
| "grad_norm": 0.11153964698314667, |
| "learning_rate": 8.91745365388841e-05, |
| "loss": 2.8016, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.7612637442339494, |
| "grad_norm": 0.10748942941427231, |
| "learning_rate": 8.876537690586529e-05, |
| "loss": 2.791, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.7618439757463228, |
| "grad_norm": 0.1106482520699501, |
| "learning_rate": 8.83569950608572e-05, |
| "loss": 2.8008, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.7624242072586962, |
| "grad_norm": 0.10443028807640076, |
| "learning_rate": 8.794939250756441e-05, |
| "loss": 2.7936, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.7630044387710696, |
| "grad_norm": 0.11383570730686188, |
| "learning_rate": 8.754257074682222e-05, |
| "loss": 2.7912, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.7635846702834431, |
| "grad_norm": 0.10836578160524368, |
| "learning_rate": 8.713653127659105e-05, |
| "loss": 2.7939, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.7641649017958165, |
| "grad_norm": 0.10870825499296188, |
| "learning_rate": 8.673127559195066e-05, |
| "loss": 2.7991, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.7647451333081899, |
| "grad_norm": 0.10718671977519989, |
| "learning_rate": 8.632680518509492e-05, |
| "loss": 2.7879, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.7653253648205635, |
| "grad_norm": 0.11277935653924942, |
| "learning_rate": 8.592312154532637e-05, |
| "loss": 2.7947, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.7659055963329369, |
| "grad_norm": 0.11088382452726364, |
| "learning_rate": 8.552022615905038e-05, |
| "loss": 2.7996, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.7664858278453103, |
| "grad_norm": 0.10912182927131653, |
| "learning_rate": 8.511812050977003e-05, |
| "loss": 2.7943, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.7670660593576837, |
| "grad_norm": 0.10919041931629181, |
| "learning_rate": 8.471680607808035e-05, |
| "loss": 2.7992, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.7676462908700572, |
| "grad_norm": 0.10616286844015121, |
| "learning_rate": 8.431628434166309e-05, |
| "loss": 2.7977, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.7682265223824306, |
| "grad_norm": 0.10572168231010437, |
| "learning_rate": 8.391655677528143e-05, |
| "loss": 2.7959, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.768806753894804, |
| "grad_norm": 0.10937794297933578, |
| "learning_rate": 8.3517624850774e-05, |
| "loss": 2.793, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.7693869854071774, |
| "grad_norm": 0.10820769518613815, |
| "learning_rate": 8.311949003704996e-05, |
| "loss": 2.7991, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.769967216919551, |
| "grad_norm": 0.10802992433309555, |
| "learning_rate": 8.272215380008343e-05, |
| "loss": 2.7965, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.7705474484319244, |
| "grad_norm": 0.10747858881950378, |
| "learning_rate": 8.232561760290794e-05, |
| "loss": 2.7957, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.7711276799442978, |
| "grad_norm": 0.11238089948892593, |
| "learning_rate": 8.192988290561157e-05, |
| "loss": 2.7922, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.7717079114566712, |
| "grad_norm": 0.1034981980919838, |
| "learning_rate": 8.153495116533056e-05, |
| "loss": 2.789, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.7722881429690447, |
| "grad_norm": 0.10910629481077194, |
| "learning_rate": 8.11408238362453e-05, |
| "loss": 2.7899, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.7728683744814181, |
| "grad_norm": 0.11309719830751419, |
| "learning_rate": 8.07475023695737e-05, |
| "loss": 2.7978, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.7734486059937915, |
| "grad_norm": 0.10908596217632294, |
| "learning_rate": 8.035498821356664e-05, |
| "loss": 2.7938, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.7740288375061649, |
| "grad_norm": 0.11714279651641846, |
| "learning_rate": 7.996328281350252e-05, |
| "loss": 2.7967, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.7746090690185384, |
| "grad_norm": 0.10943669080734253, |
| "learning_rate": 7.957238761168135e-05, |
| "loss": 2.7803, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.7751893005309118, |
| "grad_norm": 0.11171719431877136, |
| "learning_rate": 7.918230404742045e-05, |
| "loss": 2.7941, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.7757695320432852, |
| "grad_norm": 0.10363152623176575, |
| "learning_rate": 7.879303355704834e-05, |
| "loss": 2.8043, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.7763497635556587, |
| "grad_norm": 0.1147744432091713, |
| "learning_rate": 7.840457757389968e-05, |
| "loss": 2.8022, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.7769299950680322, |
| "grad_norm": 0.10682083666324615, |
| "learning_rate": 7.801693752831012e-05, |
| "loss": 2.7914, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.7775102265804056, |
| "grad_norm": 0.11352023482322693, |
| "learning_rate": 7.763011484761082e-05, |
| "loss": 2.7958, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.778090458092779, |
| "grad_norm": 0.10785870254039764, |
| "learning_rate": 7.724411095612366e-05, |
| "loss": 2.7971, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.7786706896051525, |
| "grad_norm": 0.10762759298086166, |
| "learning_rate": 7.68589272751551e-05, |
| "loss": 2.7916, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.7792509211175259, |
| "grad_norm": 0.10556434839963913, |
| "learning_rate": 7.647456522299207e-05, |
| "loss": 2.784, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.7798311526298993, |
| "grad_norm": 0.1077750101685524, |
| "learning_rate": 7.609102621489577e-05, |
| "loss": 2.7906, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.7804113841422727, |
| "grad_norm": 0.10472170263528824, |
| "learning_rate": 7.570831166309693e-05, |
| "loss": 2.7833, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.7809916156546463, |
| "grad_norm": 0.1061674952507019, |
| "learning_rate": 7.532642297679093e-05, |
| "loss": 2.796, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.7815718471670197, |
| "grad_norm": 0.10716653615236282, |
| "learning_rate": 7.494536156213151e-05, |
| "loss": 2.791, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.7821520786793931, |
| "grad_norm": 0.11008104681968689, |
| "learning_rate": 7.456512882222703e-05, |
| "loss": 2.7874, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.7827323101917665, |
| "grad_norm": 0.11095033586025238, |
| "learning_rate": 7.418572615713413e-05, |
| "loss": 2.7874, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.78331254170414, |
| "grad_norm": 0.10690274834632874, |
| "learning_rate": 7.380715496385316e-05, |
| "loss": 2.7897, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.7838927732165134, |
| "grad_norm": 0.10463336110115051, |
| "learning_rate": 7.34294166363231e-05, |
| "loss": 2.7965, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.7844730047288868, |
| "grad_norm": 0.10628803819417953, |
| "learning_rate": 7.30525125654157e-05, |
| "loss": 2.7878, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.7850532362412602, |
| "grad_norm": 0.10758186876773834, |
| "learning_rate": 7.267644413893152e-05, |
| "loss": 2.7893, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.7856334677536337, |
| "grad_norm": 0.10785481333732605, |
| "learning_rate": 7.230121274159384e-05, |
| "loss": 2.7896, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.7862136992660071, |
| "grad_norm": 0.10700030624866486, |
| "learning_rate": 7.192681975504382e-05, |
| "loss": 2.786, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.7867939307783806, |
| "grad_norm": 0.10182949900627136, |
| "learning_rate": 7.155326655783597e-05, |
| "loss": 2.7889, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.787374162290754, |
| "grad_norm": 0.10802864283323288, |
| "learning_rate": 7.118055452543193e-05, |
| "loss": 2.7946, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.7879543938031275, |
| "grad_norm": 0.10849913954734802, |
| "learning_rate": 7.080868503019672e-05, |
| "loss": 2.786, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.7885346253155009, |
| "grad_norm": 0.10770730674266815, |
| "learning_rate": 7.043765944139264e-05, |
| "loss": 2.7804, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.7891148568278743, |
| "grad_norm": 0.11441770195960999, |
| "learning_rate": 7.006747912517475e-05, |
| "loss": 2.79, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.7896950883402477, |
| "grad_norm": 0.10908571630716324, |
| "learning_rate": 6.9698145444586e-05, |
| "loss": 2.7897, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.7902753198526212, |
| "grad_norm": 0.10705877095460892, |
| "learning_rate": 6.932965975955134e-05, |
| "loss": 2.7857, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.7908555513649946, |
| "grad_norm": 0.11635982990264893, |
| "learning_rate": 6.896202342687397e-05, |
| "loss": 2.7888, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.791435782877368, |
| "grad_norm": 0.1107436865568161, |
| "learning_rate": 6.859523780022911e-05, |
| "loss": 2.7902, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.7920160143897415, |
| "grad_norm": 0.11131720244884491, |
| "learning_rate": 6.822930423016003e-05, |
| "loss": 2.7982, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.792596245902115, |
| "grad_norm": 0.10535065829753876, |
| "learning_rate": 6.786422406407247e-05, |
| "loss": 2.7838, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.7931764774144884, |
| "grad_norm": 0.10784085094928741, |
| "learning_rate": 6.749999864622973e-05, |
| "loss": 2.7778, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.7937567089268618, |
| "grad_norm": 0.10266363620758057, |
| "learning_rate": 6.713662931774818e-05, |
| "loss": 2.7929, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.7943369404392353, |
| "grad_norm": 0.11121921241283417, |
| "learning_rate": 6.677411741659145e-05, |
| "loss": 2.787, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.7949171719516087, |
| "grad_norm": 0.10687406361103058, |
| "learning_rate": 6.641246427756657e-05, |
| "loss": 2.7915, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.7954974034639821, |
| "grad_norm": 0.10604474693536758, |
| "learning_rate": 6.605167123231822e-05, |
| "loss": 2.7816, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.7960776349763555, |
| "grad_norm": 0.10484491288661957, |
| "learning_rate": 6.569173960932404e-05, |
| "loss": 2.7844, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.796657866488729, |
| "grad_norm": 0.10788851231336594, |
| "learning_rate": 6.533267073389034e-05, |
| "loss": 2.7815, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.7972380980011025, |
| "grad_norm": 0.10421809554100037, |
| "learning_rate": 6.49744659281459e-05, |
| "loss": 2.7953, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.7978183295134759, |
| "grad_norm": 0.10567434132099152, |
| "learning_rate": 6.461712651103859e-05, |
| "loss": 2.7898, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.7983985610258493, |
| "grad_norm": 0.10381162911653519, |
| "learning_rate": 6.426065379832959e-05, |
| "loss": 2.7902, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.7989787925382228, |
| "grad_norm": 0.10707089304924011, |
| "learning_rate": 6.390504910258867e-05, |
| "loss": 2.7923, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.7995590240505962, |
| "grad_norm": 0.10568366944789886, |
| "learning_rate": 6.355031373318961e-05, |
| "loss": 2.793, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.8001392555629696, |
| "grad_norm": 0.10662976652383804, |
| "learning_rate": 6.319644899630514e-05, |
| "loss": 2.7954, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.800719487075343, |
| "grad_norm": 0.10822783410549164, |
| "learning_rate": 6.28434561949024e-05, |
| "loss": 2.7875, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.8012997185877165, |
| "grad_norm": 0.10903995484113693, |
| "learning_rate": 6.249133662873783e-05, |
| "loss": 2.7952, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.8018799501000899, |
| "grad_norm": 0.11016574501991272, |
| "learning_rate": 6.214009159435254e-05, |
| "loss": 2.7833, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.8024601816124634, |
| "grad_norm": 0.10669629275798798, |
| "learning_rate": 6.178972238506758e-05, |
| "loss": 2.7966, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.8030404131248368, |
| "grad_norm": 0.10725666582584381, |
| "learning_rate": 6.144023029097891e-05, |
| "loss": 2.781, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.8036206446372103, |
| "grad_norm": 0.10259473323822021, |
| "learning_rate": 6.10916165989533e-05, |
| "loss": 2.7858, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.8042008761495837, |
| "grad_norm": 0.10819372534751892, |
| "learning_rate": 6.0743882592622736e-05, |
| "loss": 2.782, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.8047811076619571, |
| "grad_norm": 0.09982424229383469, |
| "learning_rate": 6.039702955238026e-05, |
| "loss": 2.7767, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.8053613391743305, |
| "grad_norm": 0.11254626512527466, |
| "learning_rate": 6.005105875537515e-05, |
| "loss": 2.7773, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.805941570686704, |
| "grad_norm": 0.10880761593580246, |
| "learning_rate": 5.970597147550808e-05, |
| "loss": 2.7925, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.8065218021990774, |
| "grad_norm": 0.10454876720905304, |
| "learning_rate": 5.936176898342649e-05, |
| "loss": 2.7887, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.8071020337114508, |
| "grad_norm": 0.10871117562055588, |
| "learning_rate": 5.9018452546520165e-05, |
| "loss": 2.7914, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.8076822652238244, |
| "grad_norm": 0.10645408183336258, |
| "learning_rate": 5.8676023428916175e-05, |
| "loss": 2.7946, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.8082624967361978, |
| "grad_norm": 0.11597729474306107, |
| "learning_rate": 5.83344828914743e-05, |
| "loss": 2.7917, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.8088427282485712, |
| "grad_norm": 0.1034785658121109, |
| "learning_rate": 5.799383219178264e-05, |
| "loss": 2.7912, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.8094229597609446, |
| "grad_norm": 0.10739534348249435, |
| "learning_rate": 5.7654072584152787e-05, |
| "loss": 2.7848, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.8100031912733181, |
| "grad_norm": 0.10825861990451813, |
| "learning_rate": 5.731520531961505e-05, |
| "loss": 2.7908, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.8105834227856915, |
| "grad_norm": 0.10880185663700104, |
| "learning_rate": 5.697723164591441e-05, |
| "loss": 2.7904, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.8111636542980649, |
| "grad_norm": 0.1085624098777771, |
| "learning_rate": 5.6640152807505236e-05, |
| "loss": 2.7839, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.8117438858104383, |
| "grad_norm": 0.10740832984447479, |
| "learning_rate": 5.630397004554713e-05, |
| "loss": 2.7858, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.8123241173228118, |
| "grad_norm": 0.10401804000139236, |
| "learning_rate": 5.596868459790025e-05, |
| "loss": 2.7802, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.8123241173228118, |
| "eval_loss": 2.749423027038574, |
| "eval_runtime": 3.2586, |
| "eval_samples_per_second": 1328.792, |
| "eval_steps_per_second": 2.762, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.8129043488351853, |
| "grad_norm": 0.10784956812858582, |
| "learning_rate": 5.563429769912071e-05, |
| "loss": 2.7852, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.8134845803475587, |
| "grad_norm": 0.10523492097854614, |
| "learning_rate": 5.530081058045606e-05, |
| "loss": 2.7856, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.8140648118599321, |
| "grad_norm": 0.10354667156934738, |
| "learning_rate": 5.4968224469840935e-05, |
| "loss": 2.7826, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.8146450433723056, |
| "grad_norm": 0.10460636019706726, |
| "learning_rate": 5.4636540591892164e-05, |
| "loss": 2.7844, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.815225274884679, |
| "grad_norm": 0.11116158217191696, |
| "learning_rate": 5.430576016790453e-05, |
| "loss": 2.7879, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.8158055063970524, |
| "grad_norm": 0.11445162445306778, |
| "learning_rate": 5.3975884415846206e-05, |
| "loss": 2.7847, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.8163857379094258, |
| "grad_norm": 0.10757939517498016, |
| "learning_rate": 5.3646914550354204e-05, |
| "loss": 2.7884, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.8169659694217993, |
| "grad_norm": 0.10770777612924576, |
| "learning_rate": 5.331885178273015e-05, |
| "loss": 2.775, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.8175462009341727, |
| "grad_norm": 0.10863149166107178, |
| "learning_rate": 5.2991697320935486e-05, |
| "loss": 2.7883, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.8181264324465461, |
| "grad_norm": 0.10049009323120117, |
| "learning_rate": 5.266545236958718e-05, |
| "loss": 2.7878, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.8187066639589196, |
| "grad_norm": 0.104975625872612, |
| "learning_rate": 5.2340118129953346e-05, |
| "loss": 2.7806, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.8192868954712931, |
| "grad_norm": 0.10563846677541733, |
| "learning_rate": 5.201569579994865e-05, |
| "loss": 2.7807, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.8198671269836665, |
| "grad_norm": 0.10182633996009827, |
| "learning_rate": 5.1692186574130324e-05, |
| "loss": 2.7782, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.8204473584960399, |
| "grad_norm": 0.10903611779212952, |
| "learning_rate": 5.1369591643692896e-05, |
| "loss": 2.7792, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.8210275900084134, |
| "grad_norm": 0.10453125089406967, |
| "learning_rate": 5.1047912196464944e-05, |
| "loss": 2.7814, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.8216078215207868, |
| "grad_norm": 0.11026264727115631, |
| "learning_rate": 5.072714941690387e-05, |
| "loss": 2.7847, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.8221880530331602, |
| "grad_norm": 0.10732634365558624, |
| "learning_rate": 5.040730448609166e-05, |
| "loss": 2.7716, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.8227682845455336, |
| "grad_norm": 0.10351432114839554, |
| "learning_rate": 5.008837858173113e-05, |
| "loss": 2.7883, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.8233485160579072, |
| "grad_norm": 0.10946208238601685, |
| "learning_rate": 4.9770372878140575e-05, |
| "loss": 2.786, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.8239287475702806, |
| "grad_norm": 0.1038416251540184, |
| "learning_rate": 4.9453288546250494e-05, |
| "loss": 2.7799, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.824508979082654, |
| "grad_norm": 0.10568647086620331, |
| "learning_rate": 4.913712675359861e-05, |
| "loss": 2.7874, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.8250892105950274, |
| "grad_norm": 0.10334275662899017, |
| "learning_rate": 4.882188866432568e-05, |
| "loss": 2.7835, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.8256694421074009, |
| "grad_norm": 0.10559739917516708, |
| "learning_rate": 4.850757543917144e-05, |
| "loss": 2.7791, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.8262496736197743, |
| "grad_norm": 0.1026688888669014, |
| "learning_rate": 4.819418823546999e-05, |
| "loss": 2.7777, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.8268299051321477, |
| "grad_norm": 0.10159046947956085, |
| "learning_rate": 4.788172820714611e-05, |
| "loss": 2.7876, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.8274101366445211, |
| "grad_norm": 0.114133320748806, |
| "learning_rate": 4.7570196504710026e-05, |
| "loss": 2.7777, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.8279903681568946, |
| "grad_norm": 0.10327325016260147, |
| "learning_rate": 4.725959427525432e-05, |
| "loss": 2.7976, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.828570599669268, |
| "grad_norm": 0.10618502646684647, |
| "learning_rate": 4.694992266244889e-05, |
| "loss": 2.7904, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.8291508311816415, |
| "grad_norm": 0.10732074081897736, |
| "learning_rate": 4.6641182806537e-05, |
| "loss": 2.7724, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.8297310626940149, |
| "grad_norm": 0.10467931628227234, |
| "learning_rate": 4.63333758443313e-05, |
| "loss": 2.7843, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.8303112942063884, |
| "grad_norm": 0.10281146317720413, |
| "learning_rate": 4.6026502909209004e-05, |
| "loss": 2.7842, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.8308915257187618, |
| "grad_norm": 0.1023208498954773, |
| "learning_rate": 4.572056513110867e-05, |
| "loss": 2.774, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.8314717572311352, |
| "grad_norm": 0.10323374718427658, |
| "learning_rate": 4.541556363652511e-05, |
| "loss": 2.7755, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.8320519887435086, |
| "grad_norm": 0.10136920213699341, |
| "learning_rate": 4.5111499548505727e-05, |
| "loss": 2.7814, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.8326322202558821, |
| "grad_norm": 0.10571028292179108, |
| "learning_rate": 4.4808373986646565e-05, |
| "loss": 2.7878, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.8332124517682555, |
| "grad_norm": 0.10252848267555237, |
| "learning_rate": 4.45061880670874e-05, |
| "loss": 2.7754, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.8337926832806289, |
| "grad_norm": 0.10471548140048981, |
| "learning_rate": 4.420494290250869e-05, |
| "loss": 2.7767, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.8343729147930025, |
| "grad_norm": 0.10701679438352585, |
| "learning_rate": 4.390463960212658e-05, |
| "loss": 2.7792, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.8349531463053759, |
| "grad_norm": 0.10377515107393265, |
| "learning_rate": 4.3605279271689264e-05, |
| "loss": 2.7829, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.8355333778177493, |
| "grad_norm": 0.10350141674280167, |
| "learning_rate": 4.330686301347298e-05, |
| "loss": 2.7861, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.8361136093301227, |
| "grad_norm": 0.10299152880907059, |
| "learning_rate": 4.300939192627742e-05, |
| "loss": 2.7891, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.8366938408424962, |
| "grad_norm": 0.1038345992565155, |
| "learning_rate": 4.2712867105422465e-05, |
| "loss": 2.7812, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.8372740723548696, |
| "grad_norm": 0.10262761265039444, |
| "learning_rate": 4.241728964274352e-05, |
| "loss": 2.7784, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.837854303867243, |
| "grad_norm": 0.10034337639808655, |
| "learning_rate": 4.212266062658777e-05, |
| "loss": 2.7857, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.8384345353796164, |
| "grad_norm": 0.10054679960012436, |
| "learning_rate": 4.1828981141810104e-05, |
| "loss": 2.7783, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.83901476689199, |
| "grad_norm": 0.10352133959531784, |
| "learning_rate": 4.15362522697691e-05, |
| "loss": 2.7936, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.8395949984043634, |
| "grad_norm": 0.10465723276138306, |
| "learning_rate": 4.124447508832332e-05, |
| "loss": 2.7692, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.8401752299167368, |
| "grad_norm": 0.10384640097618103, |
| "learning_rate": 4.095365067182665e-05, |
| "loss": 2.781, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.8407554614291102, |
| "grad_norm": 0.10312188416719437, |
| "learning_rate": 4.066378009112523e-05, |
| "loss": 2.7767, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.8413356929414837, |
| "grad_norm": 0.10447024554014206, |
| "learning_rate": 4.037486441355288e-05, |
| "loss": 2.7832, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.8419159244538571, |
| "grad_norm": 0.10162138938903809, |
| "learning_rate": 4.008690470292732e-05, |
| "loss": 2.7786, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.8424961559662305, |
| "grad_norm": 0.09777431935071945, |
| "learning_rate": 3.979990201954653e-05, |
| "loss": 2.7792, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.8430763874786039, |
| "grad_norm": 0.10050346702337265, |
| "learning_rate": 3.9513857420184216e-05, |
| "loss": 2.7866, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.8436566189909774, |
| "grad_norm": 0.10209480673074722, |
| "learning_rate": 3.922877195808678e-05, |
| "loss": 2.7886, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.8442368505033508, |
| "grad_norm": 0.10496553033590317, |
| "learning_rate": 3.894464668296864e-05, |
| "loss": 2.7854, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.8448170820157243, |
| "grad_norm": 0.10205195099115372, |
| "learning_rate": 3.8661482641008866e-05, |
| "loss": 2.7869, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.8453973135280977, |
| "grad_norm": 0.10940441489219666, |
| "learning_rate": 3.837928087484711e-05, |
| "loss": 2.7799, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.8459775450404712, |
| "grad_norm": 0.10287832468748093, |
| "learning_rate": 3.8098042423579766e-05, |
| "loss": 2.7804, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.8465577765528446, |
| "grad_norm": 0.0999421551823616, |
| "learning_rate": 3.781776832275639e-05, |
| "loss": 2.7835, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.847138008065218, |
| "grad_norm": 0.10340355336666107, |
| "learning_rate": 3.753845960437557e-05, |
| "loss": 2.7831, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.8477182395775914, |
| "grad_norm": 0.10355892032384872, |
| "learning_rate": 3.72601172968812e-05, |
| "loss": 2.7749, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.8482984710899649, |
| "grad_norm": 0.10467097908258438, |
| "learning_rate": 3.6982742425158886e-05, |
| "loss": 2.7834, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.8488787026023383, |
| "grad_norm": 0.1060672402381897, |
| "learning_rate": 3.670633601053182e-05, |
| "loss": 2.7801, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.8494589341147117, |
| "grad_norm": 0.10443491488695145, |
| "learning_rate": 3.643089907075759e-05, |
| "loss": 2.7896, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.8500391656270853, |
| "grad_norm": 0.1023486852645874, |
| "learning_rate": 3.6156432620023726e-05, |
| "loss": 2.7691, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.8506193971394587, |
| "grad_norm": 0.10417921096086502, |
| "learning_rate": 3.5882937668944476e-05, |
| "loss": 2.7703, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.8511996286518321, |
| "grad_norm": 0.10138606280088425, |
| "learning_rate": 3.561041522455691e-05, |
| "loss": 2.7885, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.8517798601642055, |
| "grad_norm": 0.10121186077594757, |
| "learning_rate": 3.5338866290317204e-05, |
| "loss": 2.7721, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.852360091676579, |
| "grad_norm": 0.10391680151224136, |
| "learning_rate": 3.506829186609691e-05, |
| "loss": 2.7818, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.8529403231889524, |
| "grad_norm": 0.10207725316286087, |
| "learning_rate": 3.479869294817955e-05, |
| "loss": 2.775, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.8535205547013258, |
| "grad_norm": 0.10676626861095428, |
| "learning_rate": 3.4530070529256524e-05, |
| "loss": 2.7759, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.8541007862136992, |
| "grad_norm": 0.10105539858341217, |
| "learning_rate": 3.42624255984237e-05, |
| "loss": 2.7855, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.8546810177260727, |
| "grad_norm": 0.10040144622325897, |
| "learning_rate": 3.399575914117777e-05, |
| "loss": 2.7736, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.8552612492384462, |
| "grad_norm": 0.10322125256061554, |
| "learning_rate": 3.3730072139412456e-05, |
| "loss": 2.7834, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.8558414807508196, |
| "grad_norm": 0.10220754891633987, |
| "learning_rate": 3.3465365571415315e-05, |
| "loss": 2.7692, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.856421712263193, |
| "grad_norm": 0.10107099264860153, |
| "learning_rate": 3.3201640411863584e-05, |
| "loss": 2.7672, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.8570019437755665, |
| "grad_norm": 0.10284842550754547, |
| "learning_rate": 3.293889763182089e-05, |
| "loss": 2.7851, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.8575821752879399, |
| "grad_norm": 0.10386528819799423, |
| "learning_rate": 3.26771381987337e-05, |
| "loss": 2.7787, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.8581624068003133, |
| "grad_norm": 0.1039406880736351, |
| "learning_rate": 3.241636307642769e-05, |
| "loss": 2.7838, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.8587426383126867, |
| "grad_norm": 0.1034376472234726, |
| "learning_rate": 3.2156573225104145e-05, |
| "loss": 2.7794, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.8593228698250602, |
| "grad_norm": 0.10199546813964844, |
| "learning_rate": 3.189776960133645e-05, |
| "loss": 2.7806, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.8599031013374336, |
| "grad_norm": 0.10086624324321747, |
| "learning_rate": 3.163995315806681e-05, |
| "loss": 2.7666, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.860483332849807, |
| "grad_norm": 0.10021676123142242, |
| "learning_rate": 3.138312484460228e-05, |
| "loss": 2.7738, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.8610635643621805, |
| "grad_norm": 0.10465867072343826, |
| "learning_rate": 3.112728560661164e-05, |
| "loss": 2.7786, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.861643795874554, |
| "grad_norm": 0.10076703131198883, |
| "learning_rate": 3.0872436386121776e-05, |
| "loss": 2.7705, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.8622240273869274, |
| "grad_norm": 0.10121941566467285, |
| "learning_rate": 3.061857812151414e-05, |
| "loss": 2.7737, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.8628042588993008, |
| "grad_norm": 0.10309196263551712, |
| "learning_rate": 3.0365711747521538e-05, |
| "loss": 2.7783, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.8633844904116743, |
| "grad_norm": 0.10456740111112595, |
| "learning_rate": 3.011383819522446e-05, |
| "loss": 2.7809, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.8639647219240477, |
| "grad_norm": 0.1025143563747406, |
| "learning_rate": 2.986295839204764e-05, |
| "loss": 2.7813, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.8645449534364211, |
| "grad_norm": 0.10585116595029831, |
| "learning_rate": 2.961307326175688e-05, |
| "loss": 2.7738, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.8651251849487945, |
| "grad_norm": 0.10203658789396286, |
| "learning_rate": 2.936418372445527e-05, |
| "loss": 2.7777, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.865705416461168, |
| "grad_norm": 0.10538860410451889, |
| "learning_rate": 2.911629069658037e-05, |
| "loss": 2.7757, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.8662856479735415, |
| "grad_norm": 0.10184674710035324, |
| "learning_rate": 2.8869395090900037e-05, |
| "loss": 2.7797, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.8668658794859149, |
| "grad_norm": 0.10757064819335938, |
| "learning_rate": 2.862349781650991e-05, |
| "loss": 2.7837, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.8674461109982883, |
| "grad_norm": 0.09947676211595535, |
| "learning_rate": 2.8378599778829492e-05, |
| "loss": 2.7764, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.8680263425106618, |
| "grad_norm": 0.0980169028043747, |
| "learning_rate": 2.8134701879598965e-05, |
| "loss": 2.7877, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.8686065740230352, |
| "grad_norm": 0.09837668389081955, |
| "learning_rate": 2.7891805016876057e-05, |
| "loss": 2.7806, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.8691868055354086, |
| "grad_norm": 0.09911120682954788, |
| "learning_rate": 2.7649910085032277e-05, |
| "loss": 2.7807, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.869767037047782, |
| "grad_norm": 0.09837288409471512, |
| "learning_rate": 2.7409017974750257e-05, |
| "loss": 2.7677, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.8703472685601555, |
| "grad_norm": 0.10560393333435059, |
| "learning_rate": 2.7169129573019943e-05, |
| "loss": 2.7785, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.8703472685601555, |
| "eval_loss": 2.7414441108703613, |
| "eval_runtime": 3.2661, |
| "eval_samples_per_second": 1325.755, |
| "eval_steps_per_second": 2.756, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.870927500072529, |
| "grad_norm": 0.09839779883623123, |
| "learning_rate": 2.6930245763135504e-05, |
| "loss": 2.7759, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.8715077315849024, |
| "grad_norm": 0.09770379960536957, |
| "learning_rate": 2.6692367424692272e-05, |
| "loss": 2.787, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.8720879630972758, |
| "grad_norm": 0.09834130108356476, |
| "learning_rate": 2.645549543358304e-05, |
| "loss": 2.7731, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.8726681946096493, |
| "grad_norm": 0.1047162264585495, |
| "learning_rate": 2.6219630661995528e-05, |
| "loss": 2.7832, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.8732484261220227, |
| "grad_norm": 0.10111907124519348, |
| "learning_rate": 2.5984773978408257e-05, |
| "loss": 2.779, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.8738286576343961, |
| "grad_norm": 0.10093654692173004, |
| "learning_rate": 2.5750926247588322e-05, |
| "loss": 2.768, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.8744088891467695, |
| "grad_norm": 0.10071719437837601, |
| "learning_rate": 2.551808833058755e-05, |
| "loss": 2.7867, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.874989120659143, |
| "grad_norm": 0.10237322747707367, |
| "learning_rate": 2.5286261084739445e-05, |
| "loss": 2.7838, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.8755693521715164, |
| "grad_norm": 0.09815766662359238, |
| "learning_rate": 2.5055445363656358e-05, |
| "loss": 2.7839, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.8761495836838898, |
| "grad_norm": 0.10203532874584198, |
| "learning_rate": 2.482564201722581e-05, |
| "loss": 2.7878, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.8767298151962634, |
| "grad_norm": 0.10766585171222687, |
| "learning_rate": 2.4596851891607884e-05, |
| "loss": 2.7823, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.8773100467086368, |
| "grad_norm": 0.09876078367233276, |
| "learning_rate": 2.4369075829231766e-05, |
| "loss": 2.7762, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.8778902782210102, |
| "grad_norm": 0.10014016181230545, |
| "learning_rate": 2.414231466879274e-05, |
| "loss": 2.7733, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.8784705097333836, |
| "grad_norm": 0.10114018619060516, |
| "learning_rate": 2.3916569245249306e-05, |
| "loss": 2.7861, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.8790507412457571, |
| "grad_norm": 0.10012462735176086, |
| "learning_rate": 2.3691840389819526e-05, |
| "loss": 2.7635, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.8796309727581305, |
| "grad_norm": 0.10367590934038162, |
| "learning_rate": 2.3468128929978757e-05, |
| "loss": 2.7727, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.8802112042705039, |
| "grad_norm": 0.10224179178476334, |
| "learning_rate": 2.3245435689456015e-05, |
| "loss": 2.7712, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.8807914357828773, |
| "grad_norm": 0.0989450216293335, |
| "learning_rate": 2.302376148823102e-05, |
| "loss": 2.7761, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.8813716672952508, |
| "grad_norm": 0.10036759078502655, |
| "learning_rate": 2.2803107142531617e-05, |
| "loss": 2.7815, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.8819518988076243, |
| "grad_norm": 0.10400567203760147, |
| "learning_rate": 2.2583473464830005e-05, |
| "loss": 2.7826, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.8825321303199977, |
| "grad_norm": 0.09990741312503815, |
| "learning_rate": 2.2364861263840507e-05, |
| "loss": 2.7869, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.8831123618323711, |
| "grad_norm": 0.10067487508058548, |
| "learning_rate": 2.2147271344516128e-05, |
| "loss": 2.7771, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.8836925933447446, |
| "grad_norm": 0.10068360716104507, |
| "learning_rate": 2.1930704508045714e-05, |
| "loss": 2.781, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.884272824857118, |
| "grad_norm": 0.10076344013214111, |
| "learning_rate": 2.171516155185117e-05, |
| "loss": 2.7793, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.8848530563694914, |
| "grad_norm": 0.0988764762878418, |
| "learning_rate": 2.1500643269584027e-05, |
| "loss": 2.772, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.8854332878818648, |
| "grad_norm": 0.09937159717082977, |
| "learning_rate": 2.1287150451123224e-05, |
| "loss": 2.7786, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.8860135193942383, |
| "grad_norm": 0.10244645178318024, |
| "learning_rate": 2.1074683882571675e-05, |
| "loss": 2.7752, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.8865937509066117, |
| "grad_norm": 0.09691537171602249, |
| "learning_rate": 2.0863244346253517e-05, |
| "loss": 2.7735, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.8871739824189852, |
| "grad_norm": 0.09877140074968338, |
| "learning_rate": 2.065283262071128e-05, |
| "loss": 2.777, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.8877542139313586, |
| "grad_norm": 0.09832227975130081, |
| "learning_rate": 2.044344948070289e-05, |
| "loss": 2.7718, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.8883344454437321, |
| "grad_norm": 0.09934905916452408, |
| "learning_rate": 2.02350956971992e-05, |
| "loss": 2.7725, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.8889146769561055, |
| "grad_norm": 0.09960002452135086, |
| "learning_rate": 2.0027772037380463e-05, |
| "loss": 2.77, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.8894949084684789, |
| "grad_norm": 0.10142461210489273, |
| "learning_rate": 1.9821479264634234e-05, |
| "loss": 2.7781, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.8900751399808524, |
| "grad_norm": 0.09648580849170685, |
| "learning_rate": 1.96162181385521e-05, |
| "loss": 2.7774, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.8906553714932258, |
| "grad_norm": 0.09822871536016464, |
| "learning_rate": 1.9411989414926953e-05, |
| "loss": 2.7718, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.8912356030055992, |
| "grad_norm": 0.1000954881310463, |
| "learning_rate": 1.9208793845750504e-05, |
| "loss": 2.7763, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.8918158345179726, |
| "grad_norm": 0.10170748084783554, |
| "learning_rate": 1.9006632179209925e-05, |
| "loss": 2.78, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.8923960660303462, |
| "grad_norm": 0.10458207130432129, |
| "learning_rate": 1.8805505159685807e-05, |
| "loss": 2.77, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.8929762975427196, |
| "grad_norm": 0.09986699372529984, |
| "learning_rate": 1.8605413527748823e-05, |
| "loss": 2.776, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.893556529055093, |
| "grad_norm": 0.09813553094863892, |
| "learning_rate": 1.8406358020157364e-05, |
| "loss": 2.7711, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.8941367605674664, |
| "grad_norm": 0.09960541874170303, |
| "learning_rate": 1.8208339369854663e-05, |
| "loss": 2.7781, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.8947169920798399, |
| "grad_norm": 0.09737250953912735, |
| "learning_rate": 1.801135830596605e-05, |
| "loss": 2.7657, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.8952972235922133, |
| "grad_norm": 0.0949782207608223, |
| "learning_rate": 1.7815415553796575e-05, |
| "loss": 2.7705, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.8958774551045867, |
| "grad_norm": 0.09773328900337219, |
| "learning_rate": 1.762051183482788e-05, |
| "loss": 2.7684, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.8964576866169601, |
| "grad_norm": 0.09638100862503052, |
| "learning_rate": 1.7426647866715925e-05, |
| "loss": 2.7724, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.8970379181293336, |
| "grad_norm": 0.09620904177427292, |
| "learning_rate": 1.7233824363288118e-05, |
| "loss": 2.7738, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.897618149641707, |
| "grad_norm": 0.09929810464382172, |
| "learning_rate": 1.7042042034540783e-05, |
| "loss": 2.7754, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.8981983811540805, |
| "grad_norm": 0.09778960049152374, |
| "learning_rate": 1.6851301586636613e-05, |
| "loss": 2.7766, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.8987786126664539, |
| "grad_norm": 0.09684190899133682, |
| "learning_rate": 1.6661603721901873e-05, |
| "loss": 2.7777, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.8993588441788274, |
| "grad_norm": 0.09664195775985718, |
| "learning_rate": 1.6472949138823967e-05, |
| "loss": 2.7859, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.8999390756912008, |
| "grad_norm": 0.10036718100309372, |
| "learning_rate": 1.628533853204883e-05, |
| "loss": 2.7713, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.9005193072035742, |
| "grad_norm": 0.09811628609895706, |
| "learning_rate": 1.6098772592378417e-05, |
| "loss": 2.7733, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.9010995387159476, |
| "grad_norm": 0.09862551838159561, |
| "learning_rate": 1.591325200676795e-05, |
| "loss": 2.7701, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.9016797702283211, |
| "grad_norm": 0.09947618097066879, |
| "learning_rate": 1.5728777458323803e-05, |
| "loss": 2.7771, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.9022600017406945, |
| "grad_norm": 0.09834101796150208, |
| "learning_rate": 1.554534962630053e-05, |
| "loss": 2.7768, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.902840233253068, |
| "grad_norm": 0.10113567858934402, |
| "learning_rate": 1.5362969186098594e-05, |
| "loss": 2.7682, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.9034204647654415, |
| "grad_norm": 0.0977102592587471, |
| "learning_rate": 1.5181636809261921e-05, |
| "loss": 2.7769, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.9040006962778149, |
| "grad_norm": 0.09831026196479797, |
| "learning_rate": 1.5001353163475283e-05, |
| "loss": 2.7681, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.9045809277901883, |
| "grad_norm": 0.09537149965763092, |
| "learning_rate": 1.4822118912561943e-05, |
| "loss": 2.7628, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.9051611593025617, |
| "grad_norm": 0.09654498845338821, |
| "learning_rate": 1.4643934716481253e-05, |
| "loss": 2.7676, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.9057413908149352, |
| "grad_norm": 0.09738855808973312, |
| "learning_rate": 1.446680123132603e-05, |
| "loss": 2.7744, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.9063216223273086, |
| "grad_norm": 0.10082467645406723, |
| "learning_rate": 1.4290719109320382e-05, |
| "loss": 2.7706, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.906901853839682, |
| "grad_norm": 0.10283984988927841, |
| "learning_rate": 1.4115688998817043e-05, |
| "loss": 2.7742, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.9074820853520554, |
| "grad_norm": 0.09994236379861832, |
| "learning_rate": 1.3941711544295287e-05, |
| "loss": 2.7638, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.908062316864429, |
| "grad_norm": 0.09737379103899002, |
| "learning_rate": 1.3768787386358282e-05, |
| "loss": 2.7715, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.9086425483768024, |
| "grad_norm": 0.09915235638618469, |
| "learning_rate": 1.3596917161730902e-05, |
| "loss": 2.7694, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.9092227798891758, |
| "grad_norm": 0.09791626036167145, |
| "learning_rate": 1.3426101503257358e-05, |
| "loss": 2.7628, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.9098030114015492, |
| "grad_norm": 0.09681922197341919, |
| "learning_rate": 1.3256341039898766e-05, |
| "loss": 2.7741, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.9103832429139227, |
| "grad_norm": 0.09645412862300873, |
| "learning_rate": 1.3087636396730949e-05, |
| "loss": 2.7704, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.9109634744262961, |
| "grad_norm": 0.09795381873846054, |
| "learning_rate": 1.2919988194942011e-05, |
| "loss": 2.7666, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.9115437059386695, |
| "grad_norm": 0.09636548161506653, |
| "learning_rate": 1.2753397051830294e-05, |
| "loss": 2.7763, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.9121239374510429, |
| "grad_norm": 0.0992702841758728, |
| "learning_rate": 1.2587863580801794e-05, |
| "loss": 2.7693, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.9127041689634164, |
| "grad_norm": 0.09708980470895767, |
| "learning_rate": 1.2423388391368083e-05, |
| "loss": 2.7696, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.9132844004757898, |
| "grad_norm": 0.09657064080238342, |
| "learning_rate": 1.2259972089144054e-05, |
| "loss": 2.7799, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.9138646319881633, |
| "grad_norm": 0.09743205457925797, |
| "learning_rate": 1.2097615275845617e-05, |
| "loss": 2.7683, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.9144448635005367, |
| "grad_norm": 0.09803003072738647, |
| "learning_rate": 1.1936318549287638e-05, |
| "loss": 2.7731, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.9150250950129102, |
| "grad_norm": 0.0977969542145729, |
| "learning_rate": 1.1776082503381468e-05, |
| "loss": 2.778, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.9156053265252836, |
| "grad_norm": 0.0986003428697586, |
| "learning_rate": 1.1616907728133084e-05, |
| "loss": 2.7794, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.916185558037657, |
| "grad_norm": 0.09887285530567169, |
| "learning_rate": 1.1458794809640693e-05, |
| "loss": 2.7743, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.9167657895500304, |
| "grad_norm": 0.10056151449680328, |
| "learning_rate": 1.1301744330092522e-05, |
| "loss": 2.7739, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.9173460210624039, |
| "grad_norm": 0.09636414051055908, |
| "learning_rate": 1.1145756867765033e-05, |
| "loss": 2.7772, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.9179262525747773, |
| "grad_norm": 0.09793318808078766, |
| "learning_rate": 1.0990832997020282e-05, |
| "loss": 2.7729, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.9185064840871507, |
| "grad_norm": 0.09378232061862946, |
| "learning_rate": 1.0836973288304229e-05, |
| "loss": 2.7783, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.9190867155995243, |
| "grad_norm": 0.09904693067073822, |
| "learning_rate": 1.0684178308144498e-05, |
| "loss": 2.7697, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.9196669471118977, |
| "grad_norm": 0.0982363149523735, |
| "learning_rate": 1.0532448619148115e-05, |
| "loss": 2.7712, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.9202471786242711, |
| "grad_norm": 0.0995451807975769, |
| "learning_rate": 1.038178477999978e-05, |
| "loss": 2.7702, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.9208274101366445, |
| "grad_norm": 0.09749618917703629, |
| "learning_rate": 1.0232187345459431e-05, |
| "loss": 2.771, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.921407641649018, |
| "grad_norm": 0.09808894246816635, |
| "learning_rate": 1.0083656866360646e-05, |
| "loss": 2.7706, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.9219878731613914, |
| "grad_norm": 0.09838584810495377, |
| "learning_rate": 9.936193889608012e-06, |
| "loss": 2.7656, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.9225681046737648, |
| "grad_norm": 0.10016359388828278, |
| "learning_rate": 9.789798958175832e-06, |
| "loss": 2.7749, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.9231483361861382, |
| "grad_norm": 0.09670013934373856, |
| "learning_rate": 9.64447261110548e-06, |
| "loss": 2.7693, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.9237285676985117, |
| "grad_norm": 0.09639087319374084, |
| "learning_rate": 9.500215383503784e-06, |
| "loss": 2.7675, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.9243087992108852, |
| "grad_norm": 0.09851641952991486, |
| "learning_rate": 9.357027806541084e-06, |
| "loss": 2.7748, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.9248890307232586, |
| "grad_norm": 0.10145829617977142, |
| "learning_rate": 9.214910407448871e-06, |
| "loss": 2.7841, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.925469262235632, |
| "grad_norm": 0.09769120067358017, |
| "learning_rate": 9.073863709518426e-06, |
| "loss": 2.7703, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.9260494937480055, |
| "grad_norm": 0.09475893527269363, |
| "learning_rate": 8.933888232098408e-06, |
| "loss": 2.7703, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.9266297252603789, |
| "grad_norm": 0.09624000638723373, |
| "learning_rate": 8.794984490593171e-06, |
| "loss": 2.7753, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.9272099567727523, |
| "grad_norm": 0.09569297730922699, |
| "learning_rate": 8.657152996460958e-06, |
| "loss": 2.7635, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.9277901882851257, |
| "grad_norm": 0.10107609629631042, |
| "learning_rate": 8.520394257211605e-06, |
| "loss": 2.7714, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.9283704197974992, |
| "grad_norm": 0.09753672778606415, |
| "learning_rate": 8.384708776405236e-06, |
| "loss": 2.7706, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.9283704197974992, |
| "eval_loss": 2.7369606494903564, |
| "eval_runtime": 3.2559, |
| "eval_samples_per_second": 1329.896, |
| "eval_steps_per_second": 2.764, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.9289506513098726, |
| "grad_norm": 0.09548928588628769, |
| "learning_rate": 8.25009705364994e-06, |
| "loss": 2.7754, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.929530882822246, |
| "grad_norm": 0.09287203848361969, |
| "learning_rate": 8.116559584600201e-06, |
| "loss": 2.7777, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.9301111143346195, |
| "grad_norm": 0.0972280502319336, |
| "learning_rate": 7.984096860955036e-06, |
| "loss": 2.781, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.930691345846993, |
| "grad_norm": 0.09617298096418381, |
| "learning_rate": 7.852709370455922e-06, |
| "loss": 2.7692, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.9312715773593664, |
| "grad_norm": 0.09682459384202957, |
| "learning_rate": 7.72239759688551e-06, |
| "loss": 2.7742, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.9318518088717398, |
| "grad_norm": 0.09648177772760391, |
| "learning_rate": 7.593162020065313e-06, |
| "loss": 2.7783, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.9324320403841133, |
| "grad_norm": 0.09511367976665497, |
| "learning_rate": 7.4650031158542845e-06, |
| "loss": 2.7706, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.9330122718964867, |
| "grad_norm": 0.09434488415718079, |
| "learning_rate": 7.337921356146981e-06, |
| "loss": 2.7694, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.9335925034088601, |
| "grad_norm": 0.09737717360258102, |
| "learning_rate": 7.211917208871665e-06, |
| "loss": 2.7674, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.9341727349212335, |
| "grad_norm": 0.09725455194711685, |
| "learning_rate": 7.086991137988906e-06, |
| "loss": 2.7639, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.9347529664336071, |
| "grad_norm": 0.10136746615171432, |
| "learning_rate": 6.963143603489518e-06, |
| "loss": 2.7677, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.9353331979459805, |
| "grad_norm": 0.09756675362586975, |
| "learning_rate": 6.840375061393122e-06, |
| "loss": 2.765, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.9359134294583539, |
| "grad_norm": 0.09939330816268921, |
| "learning_rate": 6.718685963746318e-06, |
| "loss": 2.7751, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.9364936609707273, |
| "grad_norm": 0.09836092591285706, |
| "learning_rate": 6.598076758621118e-06, |
| "loss": 2.7828, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.9370738924831008, |
| "grad_norm": 0.09677501767873764, |
| "learning_rate": 6.4785478901133506e-06, |
| "loss": 2.769, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.9376541239954742, |
| "grad_norm": 0.097322478890419, |
| "learning_rate": 6.360099798340656e-06, |
| "loss": 2.7656, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.9382343555078476, |
| "grad_norm": 0.09472298622131348, |
| "learning_rate": 6.242732919441462e-06, |
| "loss": 2.7737, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.938814587020221, |
| "grad_norm": 0.09517394751310349, |
| "learning_rate": 6.126447685572844e-06, |
| "loss": 2.7807, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.9393948185325945, |
| "grad_norm": 0.09591302275657654, |
| "learning_rate": 6.011244524909198e-06, |
| "loss": 2.7774, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.939975050044968, |
| "grad_norm": 0.09797896444797516, |
| "learning_rate": 5.8971238616407405e-06, |
| "loss": 2.7637, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.9405552815573414, |
| "grad_norm": 0.09744720160961151, |
| "learning_rate": 5.7840861159715425e-06, |
| "loss": 2.7773, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.9411355130697148, |
| "grad_norm": 0.09814444929361343, |
| "learning_rate": 5.672131704118565e-06, |
| "loss": 2.7741, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.9417157445820883, |
| "grad_norm": 0.09604529291391373, |
| "learning_rate": 5.561261038309628e-06, |
| "loss": 2.7727, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.9422959760944617, |
| "grad_norm": 0.09737398475408554, |
| "learning_rate": 5.4514745267821404e-06, |
| "loss": 2.7737, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.9428762076068351, |
| "grad_norm": 0.09697815030813217, |
| "learning_rate": 5.342772573781507e-06, |
| "loss": 2.7638, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.9434564391192085, |
| "grad_norm": 0.09917178004980087, |
| "learning_rate": 5.235155579559725e-06, |
| "loss": 2.7709, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.944036670631582, |
| "grad_norm": 0.096290223300457, |
| "learning_rate": 5.128623940373888e-06, |
| "loss": 2.7674, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.9446169021439554, |
| "grad_norm": 0.09504272043704987, |
| "learning_rate": 5.023178048484589e-06, |
| "loss": 2.7694, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.9451971336563288, |
| "grad_norm": 0.09743209183216095, |
| "learning_rate": 4.91881829215468e-06, |
| "loss": 2.781, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.9457773651687024, |
| "grad_norm": 0.09843679517507553, |
| "learning_rate": 4.815545055647718e-06, |
| "loss": 2.776, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.9463575966810758, |
| "grad_norm": 0.0955999493598938, |
| "learning_rate": 4.713358719226523e-06, |
| "loss": 2.7789, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.9469378281934492, |
| "grad_norm": 0.09576351940631866, |
| "learning_rate": 4.612259659151984e-06, |
| "loss": 2.7716, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.9475180597058226, |
| "grad_norm": 0.09730935841798782, |
| "learning_rate": 4.512248247681394e-06, |
| "loss": 2.7802, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.9480982912181961, |
| "grad_norm": 0.09646177291870117, |
| "learning_rate": 4.413324853067213e-06, |
| "loss": 2.7765, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.9486785227305695, |
| "grad_norm": 0.09553349018096924, |
| "learning_rate": 4.3154898395557744e-06, |
| "loss": 2.778, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.9492587542429429, |
| "grad_norm": 0.09604230523109436, |
| "learning_rate": 4.218743567385852e-06, |
| "loss": 2.78, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.9498389857553163, |
| "grad_norm": 0.09518173336982727, |
| "learning_rate": 4.123086392787289e-06, |
| "loss": 2.7695, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.9504192172676899, |
| "grad_norm": 0.09625556319952011, |
| "learning_rate": 4.0285186679799406e-06, |
| "loss": 2.7694, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.9509994487800633, |
| "grad_norm": 0.09755248576402664, |
| "learning_rate": 3.935040741171969e-06, |
| "loss": 2.7625, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.9515796802924367, |
| "grad_norm": 0.09465952962636948, |
| "learning_rate": 3.842652956558945e-06, |
| "loss": 2.7658, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.9521599118048101, |
| "grad_norm": 0.0960998460650444, |
| "learning_rate": 3.7513556543223855e-06, |
| "loss": 2.7846, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.9527401433171836, |
| "grad_norm": 0.09892145544290543, |
| "learning_rate": 3.6611491706284856e-06, |
| "loss": 2.7708, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.953320374829557, |
| "grad_norm": 0.09714221954345703, |
| "learning_rate": 3.572033837626953e-06, |
| "loss": 2.7874, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.9539006063419304, |
| "grad_norm": 0.09727420657873154, |
| "learning_rate": 3.484009983449809e-06, |
| "loss": 2.7834, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.9544808378543038, |
| "grad_norm": 0.09665530920028687, |
| "learning_rate": 3.397077932210124e-06, |
| "loss": 2.7726, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.9550610693666773, |
| "grad_norm": 0.09558922797441483, |
| "learning_rate": 3.3112380040008156e-06, |
| "loss": 2.7723, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.9556413008790507, |
| "grad_norm": 0.0972527414560318, |
| "learning_rate": 3.2264905148934208e-06, |
| "loss": 2.772, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.9562215323914242, |
| "grad_norm": 0.09882599860429764, |
| "learning_rate": 3.142835776937158e-06, |
| "loss": 2.7685, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.9568017639037976, |
| "grad_norm": 0.09505190700292587, |
| "learning_rate": 3.060274098157467e-06, |
| "loss": 2.7694, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.9573819954161711, |
| "grad_norm": 0.09600254893302917, |
| "learning_rate": 2.9788057825551714e-06, |
| "loss": 2.7778, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.9579622269285445, |
| "grad_norm": 0.09696151316165924, |
| "learning_rate": 2.8984311301050835e-06, |
| "loss": 2.784, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.9585424584409179, |
| "grad_norm": 0.09621264785528183, |
| "learning_rate": 2.819150436755135e-06, |
| "loss": 2.7668, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.9591226899532914, |
| "grad_norm": 0.09673577547073364, |
| "learning_rate": 2.7409639944251162e-06, |
| "loss": 2.774, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.9597029214656648, |
| "grad_norm": 0.09513070434331894, |
| "learning_rate": 2.6638720910056697e-06, |
| "loss": 2.7783, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.9602831529780382, |
| "grad_norm": 0.09311112761497498, |
| "learning_rate": 2.587875010357332e-06, |
| "loss": 2.7665, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.9608633844904116, |
| "grad_norm": 0.09406144171953201, |
| "learning_rate": 2.5129730323092622e-06, |
| "loss": 2.7671, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.9614436160027852, |
| "grad_norm": 0.09770730882883072, |
| "learning_rate": 2.439166432658446e-06, |
| "loss": 2.7673, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.9620238475151586, |
| "grad_norm": 0.09938254207372665, |
| "learning_rate": 2.366455483168428e-06, |
| "loss": 2.7637, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.962604079027532, |
| "grad_norm": 0.09504234790802002, |
| "learning_rate": 2.2948404515686136e-06, |
| "loss": 2.7708, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.9631843105399054, |
| "grad_norm": 0.09619156271219254, |
| "learning_rate": 2.2243216015530362e-06, |
| "loss": 2.7716, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.9637645420522789, |
| "grad_norm": 0.09520803391933441, |
| "learning_rate": 2.1548991927794244e-06, |
| "loss": 2.771, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.9643447735646523, |
| "grad_norm": 0.09521950781345367, |
| "learning_rate": 2.0865734808684697e-06, |
| "loss": 2.7679, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.9649250050770257, |
| "grad_norm": 0.09744451195001602, |
| "learning_rate": 2.0193447174025268e-06, |
| "loss": 2.7715, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.9655052365893991, |
| "grad_norm": 0.09531662613153458, |
| "learning_rate": 1.953213149924948e-06, |
| "loss": 2.7824, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.9660854681017726, |
| "grad_norm": 0.09525689482688904, |
| "learning_rate": 1.8881790219391512e-06, |
| "loss": 2.7694, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.9666656996141461, |
| "grad_norm": 0.09457177668809891, |
| "learning_rate": 1.8242425729075527e-06, |
| "loss": 2.7588, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.9672459311265195, |
| "grad_norm": 0.09685463458299637, |
| "learning_rate": 1.7614040382508687e-06, |
| "loss": 2.7714, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.9678261626388929, |
| "grad_norm": 0.09774652868509293, |
| "learning_rate": 1.6996636493471494e-06, |
| "loss": 2.7683, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.9684063941512664, |
| "grad_norm": 0.09525836259126663, |
| "learning_rate": 1.6390216335309792e-06, |
| "loss": 2.77, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.9689866256636398, |
| "grad_norm": 0.09421420842409134, |
| "learning_rate": 1.5794782140926775e-06, |
| "loss": 2.7723, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.9695668571760132, |
| "grad_norm": 0.09693361073732376, |
| "learning_rate": 1.5210336102772668e-06, |
| "loss": 2.772, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.9701470886883866, |
| "grad_norm": 0.09740012139081955, |
| "learning_rate": 1.463688037283972e-06, |
| "loss": 2.7673, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.9707273202007601, |
| "grad_norm": 0.09596629440784454, |
| "learning_rate": 1.4074417062651221e-06, |
| "loss": 2.7878, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.9713075517131335, |
| "grad_norm": 0.09561031311750412, |
| "learning_rate": 1.3522948243256503e-06, |
| "loss": 2.7728, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.971887783225507, |
| "grad_norm": 0.09793524444103241, |
| "learning_rate": 1.2982475945221615e-06, |
| "loss": 2.7718, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.9724680147378804, |
| "grad_norm": 0.09407012164592743, |
| "learning_rate": 1.245300215862166e-06, |
| "loss": 2.7797, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.9730482462502539, |
| "grad_norm": 0.09444325417280197, |
| "learning_rate": 1.1934528833035139e-06, |
| "loss": 2.7725, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.9736284777626273, |
| "grad_norm": 0.09787797182798386, |
| "learning_rate": 1.1427057877534951e-06, |
| "loss": 2.7691, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.9742087092750007, |
| "grad_norm": 0.09456036239862442, |
| "learning_rate": 1.09305911606824e-06, |
| "loss": 2.7766, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.9747889407873742, |
| "grad_norm": 0.095250204205513, |
| "learning_rate": 1.044513051051954e-06, |
| "loss": 2.7701, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.9753691722997476, |
| "grad_norm": 0.09521818906068802, |
| "learning_rate": 9.970677714563835e-07, |
| "loss": 2.7734, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.975949403812121, |
| "grad_norm": 0.09462135285139084, |
| "learning_rate": 9.507234519800178e-07, |
| "loss": 2.7705, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.9765296353244944, |
| "grad_norm": 0.09560775011777878, |
| "learning_rate": 9.054802632674551e-07, |
| "loss": 2.7691, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.977109866836868, |
| "grad_norm": 0.09410873800516129, |
| "learning_rate": 8.61338371908904e-07, |
| "loss": 2.7787, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.9776900983492414, |
| "grad_norm": 0.09606259316205978, |
| "learning_rate": 8.18297940439383e-07, |
| "loss": 2.7766, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.9782703298616148, |
| "grad_norm": 0.09549134224653244, |
| "learning_rate": 7.763591273382885e-07, |
| "loss": 2.7701, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.9788505613739882, |
| "grad_norm": 0.09225918352603912, |
| "learning_rate": 7.355220870287615e-07, |
| "loss": 2.7635, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.9794307928863617, |
| "grad_norm": 0.09305543452501297, |
| "learning_rate": 6.95786969876988e-07, |
| "loss": 2.7659, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.9800110243987351, |
| "grad_norm": 0.09393244236707687, |
| "learning_rate": 6.571539221918997e-07, |
| "loss": 2.7743, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.9805912559111085, |
| "grad_norm": 0.09278815984725952, |
| "learning_rate": 6.196230862244078e-07, |
| "loss": 2.78, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.9811714874234819, |
| "grad_norm": 0.09347451478242874, |
| "learning_rate": 5.831946001669697e-07, |
| "loss": 2.7747, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.9817517189358554, |
| "grad_norm": 0.09540887176990509, |
| "learning_rate": 5.478685981530894e-07, |
| "loss": 2.7758, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.9823319504482289, |
| "grad_norm": 0.09621070325374603, |
| "learning_rate": 5.136452102567856e-07, |
| "loss": 2.7713, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.9829121819606023, |
| "grad_norm": 0.09409264475107193, |
| "learning_rate": 4.805245624922238e-07, |
| "loss": 2.7778, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.9834924134729757, |
| "grad_norm": 0.09619985520839691, |
| "learning_rate": 4.4850677681301795e-07, |
| "loss": 2.7701, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.9840726449853492, |
| "grad_norm": 0.09401355683803558, |
| "learning_rate": 4.1759197111206344e-07, |
| "loss": 2.7689, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.9846528764977226, |
| "grad_norm": 0.09698129445314407, |
| "learning_rate": 3.877802592209045e-07, |
| "loss": 2.7703, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.985233108010096, |
| "grad_norm": 0.09333529323339462, |
| "learning_rate": 3.590717509093677e-07, |
| "loss": 2.7784, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.9858133395224694, |
| "grad_norm": 0.09353555738925934, |
| "learning_rate": 3.3146655188519557e-07, |
| "loss": 2.7687, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.9863935710348429, |
| "grad_norm": 0.09438835084438324, |
| "learning_rate": 3.0496476379364697e-07, |
| "loss": 2.7665, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.9863935710348429, |
| "eval_loss": 2.735684633255005, |
| "eval_runtime": 3.2561, |
| "eval_samples_per_second": 1329.798, |
| "eval_steps_per_second": 2.764, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.9869738025472163, |
| "grad_norm": 0.09504197537899017, |
| "learning_rate": 2.7956648421703087e-07, |
| "loss": 2.7762, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.9875540340595897, |
| "grad_norm": 0.09602217376232147, |
| "learning_rate": 2.5527180667453963e-07, |
| "loss": 2.7673, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.9881342655719633, |
| "grad_norm": 0.09483738243579865, |
| "learning_rate": 2.3208082062168288e-07, |
| "loss": 2.7705, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.9887144970843367, |
| "grad_norm": 0.09395676851272583, |
| "learning_rate": 2.0999361145008775e-07, |
| "loss": 2.7692, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.9892947285967101, |
| "grad_norm": 0.09432484954595566, |
| "learning_rate": 1.8901026048719902e-07, |
| "loss": 2.7707, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.9898749601090835, |
| "grad_norm": 0.09382540732622147, |
| "learning_rate": 1.6913084499587948e-07, |
| "loss": 2.7788, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.990455191621457, |
| "grad_norm": 0.09619873762130737, |
| "learning_rate": 1.5035543817427663e-07, |
| "loss": 2.7604, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.9910354231338304, |
| "grad_norm": 0.09365525841712952, |
| "learning_rate": 1.3268410915532323e-07, |
| "loss": 2.7785, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.9916156546462038, |
| "grad_norm": 0.09718578308820724, |
| "learning_rate": 1.1611692300680376e-07, |
| "loss": 2.7745, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.9921958861585772, |
| "grad_norm": 0.0956762507557869, |
| "learning_rate": 1.0065394073075494e-07, |
| "loss": 2.7813, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.9927761176709508, |
| "grad_norm": 0.09347262978553772, |
| "learning_rate": 8.629521926353244e-08, |
| "loss": 2.7714, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.9933563491833242, |
| "grad_norm": 0.09415694326162338, |
| "learning_rate": 7.304081147544439e-08, |
| "loss": 2.7837, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.9939365806956976, |
| "grad_norm": 0.09390881657600403, |
| "learning_rate": 6.089076617058486e-08, |
| "loss": 2.7725, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.994516812208071, |
| "grad_norm": 0.09363935142755508, |
| "learning_rate": 4.984512808673402e-08, |
| "loss": 2.776, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.9950970437204445, |
| "grad_norm": 0.0957217812538147, |
| "learning_rate": 3.9903937895091606e-08, |
| "loss": 2.7731, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.9956772752328179, |
| "grad_norm": 0.09717927128076553, |
| "learning_rate": 3.1067232200110426e-08, |
| "loss": 2.7703, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.9962575067451913, |
| "grad_norm": 0.09413953870534897, |
| "learning_rate": 2.333504353952964e-08, |
| "loss": 2.7733, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.9968377382575647, |
| "grad_norm": 0.09774868190288544, |
| "learning_rate": 1.670740038400842e-08, |
| "loss": 2.7658, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.9974179697699382, |
| "grad_norm": 0.09658750146627426, |
| "learning_rate": 1.1184327137292448e-08, |
| "loss": 2.7734, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.9979982012823116, |
| "grad_norm": 0.0932522714138031, |
| "learning_rate": 6.765844135847576e-09, |
| "loss": 2.7708, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.9985784327946851, |
| "grad_norm": 0.09543392807245255, |
| "learning_rate": 3.4519676490596393e-09, |
| "loss": 2.7746, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.9991586643070585, |
| "grad_norm": 0.09391433745622635, |
| "learning_rate": 1.2427098789347111e-09, |
| "loss": 2.7707, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.999738895819432, |
| "grad_norm": 0.0975637212395668, |
| "learning_rate": 1.3807896016571064e-10, |
| "loss": 2.77, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.9999709884243814, |
| "step": 17234, |
| "total_flos": 4.402536853133695e+19, |
| "train_loss": 3.082940493684724, |
| "train_runtime": 20985.9807, |
| "train_samples_per_second": 420.462, |
| "train_steps_per_second": 0.821 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 17234, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.402536853133695e+19, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|