{ "best_metric": 0.18765884637832642, "best_model_checkpoint": "/home/datta0/models/lora_final/Qwen2-7B_metamath_default/checkpoint-13", "epoch": 0.9995949777237748, "eval_steps": 13, "global_step": 617, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016200891049007696, "grad_norm": 4.52362585067749, "learning_rate": 2.3076923076923076e-05, "loss": 0.2546, "step": 1 }, { "epoch": 0.011340623734305387, "grad_norm": 1.5377665758132935, "learning_rate": 0.00016153846153846153, "loss": 0.1753, "step": 7 }, { "epoch": 0.021061158363710003, "eval_loss": 0.18765884637832642, "eval_runtime": 12.39, "eval_samples_per_second": 40.355, "eval_steps_per_second": 5.085, "step": 13 }, { "epoch": 0.022681247468610773, "grad_norm": 2.192612409591675, "learning_rate": 0.0002999979709808197, "loss": 0.1716, "step": 14 }, { "epoch": 0.03402187120291616, "grad_norm": 2.496940851211548, "learning_rate": 0.0002998701612152596, "loss": 0.2034, "step": 21 }, { "epoch": 0.042122316727420006, "eval_loss": 0.2530185580253601, "eval_runtime": 81.8512, "eval_samples_per_second": 6.109, "eval_steps_per_second": 0.77, "step": 26 }, { "epoch": 0.04536249493722155, "grad_norm": 2.3953869342803955, "learning_rate": 0.0002995437011859465, "loss": 0.2398, "step": 28 }, { "epoch": 0.056703118671526935, "grad_norm": 2.8134021759033203, "learning_rate": 0.00029901902360990936, "loss": 0.261, "step": 35 }, { "epoch": 0.06318347509113001, "eval_loss": 0.2844119071960449, "eval_runtime": 86.8656, "eval_samples_per_second": 5.756, "eval_steps_per_second": 0.725, "step": 39 }, { "epoch": 0.06804374240583232, "grad_norm": 2.448476791381836, "learning_rate": 0.00029829682393805085, "loss": 0.2623, "step": 42 }, { "epoch": 0.0793843661401377, "grad_norm": 2.409214973449707, "learning_rate": 0.0002973780594333385, "loss": 0.2864, "step": 49 }, { "epoch": 0.08424463345484001, "eval_loss": 0.3106594681739807, "eval_runtime": 12.4045, "eval_samples_per_second": 40.308, "eval_steps_per_second": 5.079, "step": 52 }, { "epoch": 0.0907249898744431, "grad_norm": 2.424531936645508, "learning_rate": 0.00029626394790197025, "loss": 0.3005, "step": 56 }, { "epoch": 0.10206561360874848, "grad_norm": 2.680947780609131, "learning_rate": 0.00029495596607919305, "loss": 0.2984, "step": 63 }, { "epoch": 0.10530579181855002, "eval_loss": 0.32644107937812805, "eval_runtime": 85.363, "eval_samples_per_second": 5.857, "eval_steps_per_second": 0.738, "step": 65 }, { "epoch": 0.11340623734305387, "grad_norm": 2.4300179481506348, "learning_rate": 0.00029345584767191685, "loss": 0.306, "step": 70 }, { "epoch": 0.12474686107735926, "grad_norm": 2.570291519165039, "learning_rate": 0.0002917655810607161, "loss": 0.318, "step": 77 }, { "epoch": 0.12636695018226002, "eval_loss": 0.33606359362602234, "eval_runtime": 75.386, "eval_samples_per_second": 6.633, "eval_steps_per_second": 0.836, "step": 78 }, { "epoch": 0.13608748481166463, "grad_norm": 2.4007227420806885, "learning_rate": 0.0002898874066642667, "loss": 0.3192, "step": 84 }, { "epoch": 0.14742810854597002, "grad_norm": 2.463517427444458, "learning_rate": 0.00028782381396971003, "loss": 0.3257, "step": 91 }, { "epoch": 0.14742810854597002, "eval_loss": 0.3400190472602844, "eval_runtime": 12.4934, "eval_samples_per_second": 40.021, "eval_steps_per_second": 5.043, "step": 91 }, { "epoch": 0.1587687322802754, "grad_norm": 2.301016330718994, "learning_rate": 0.00028557753823288173, "loss": 0.3139, "step": 98 }, { "epoch": 0.16848926690968002, "eval_loss": 0.33769935369491577, "eval_runtime": 90.0772, "eval_samples_per_second": 5.551, "eval_steps_per_second": 0.699, "step": 104 }, { "epoch": 0.1701093560145808, "grad_norm": 2.232789993286133, "learning_rate": 0.0002831515568527781, "loss": 0.326, "step": 105 }, { "epoch": 0.1814499797488862, "grad_norm": 2.174959897994995, "learning_rate": 0.00028054908542506627, "loss": 0.327, "step": 112 }, { "epoch": 0.18955042527339003, "eval_loss": 0.3428936302661896, "eval_runtime": 89.6799, "eval_samples_per_second": 5.575, "eval_steps_per_second": 0.702, "step": 117 }, { "epoch": 0.19279060348319157, "grad_norm": 2.3661577701568604, "learning_rate": 0.00027777357347986823, "loss": 0.319, "step": 119 }, { "epoch": 0.20413122721749696, "grad_norm": 2.149407386779785, "learning_rate": 0.00027482869990946986, "loss": 0.3219, "step": 126 }, { "epoch": 0.21061158363710003, "eval_loss": 0.3457578420639038, "eval_runtime": 12.3968, "eval_samples_per_second": 40.333, "eval_steps_per_second": 5.082, "step": 130 }, { "epoch": 0.21547185095180235, "grad_norm": 2.0185813903808594, "learning_rate": 0.0002717183680920135, "loss": 0.3251, "step": 133 }, { "epoch": 0.22681247468610774, "grad_norm": 2.2934110164642334, "learning_rate": 0.00026844670071763906, "loss": 0.3325, "step": 140 }, { "epoch": 0.23167274200081003, "eval_loss": 0.34575140476226807, "eval_runtime": 90.2436, "eval_samples_per_second": 5.541, "eval_steps_per_second": 0.698, "step": 143 }, { "epoch": 0.23815309842041313, "grad_norm": 2.0038561820983887, "learning_rate": 0.00026501803432393037, "loss": 0.3276, "step": 147 }, { "epoch": 0.24949372215471852, "grad_norm": 2.029125690460205, "learning_rate": 0.00026143691354791145, "loss": 0.3273, "step": 154 }, { "epoch": 0.25273390036452004, "eval_loss": 0.34069105982780457, "eval_runtime": 82.809, "eval_samples_per_second": 6.038, "eval_steps_per_second": 0.761, "step": 156 }, { "epoch": 0.2608343458890239, "grad_norm": 2.2533740997314453, "learning_rate": 0.00025770808510220956, "loss": 0.3313, "step": 161 }, { "epoch": 0.27217496962332927, "grad_norm": 2.204942226409912, "learning_rate": 0.00025383649148337105, "loss": 0.3328, "step": 168 }, { "epoch": 0.27379505872823007, "eval_loss": 0.3394912779331207, "eval_runtime": 12.4532, "eval_samples_per_second": 40.15, "eval_steps_per_second": 5.059, "step": 169 }, { "epoch": 0.28351559335763465, "grad_norm": 2.162797689437866, "learning_rate": 0.0002498272644206695, "loss": 0.3332, "step": 175 }, { "epoch": 0.29485621709194004, "grad_norm": 2.1584229469299316, "learning_rate": 0.0002456857180740884, "loss": 0.32, "step": 182 }, { "epoch": 0.29485621709194004, "eval_loss": 0.33717018365859985, "eval_runtime": 77.5777, "eval_samples_per_second": 6.445, "eval_steps_per_second": 0.812, "step": 182 }, { "epoch": 0.30619684082624543, "grad_norm": 2.1494665145874023, "learning_rate": 0.0002414173419904956, "loss": 0.3293, "step": 189 }, { "epoch": 0.3159173754556501, "eval_loss": 0.33609288930892944, "eval_runtime": 93.3699, "eval_samples_per_second": 5.355, "eval_steps_per_second": 0.675, "step": 195 }, { "epoch": 0.3175374645605508, "grad_norm": 1.9434106349945068, "learning_rate": 0.00023702779382734566, "loss": 0.32, "step": 196 }, { "epoch": 0.3288780882948562, "grad_norm": 1.9602625370025635, "learning_rate": 0.0002325228918535541, "loss": 0.3182, "step": 203 }, { "epoch": 0.33697853381936005, "eval_loss": 0.3306241035461426, "eval_runtime": 12.4056, "eval_samples_per_second": 40.304, "eval_steps_per_second": 5.078, "step": 208 }, { "epoch": 0.3402187120291616, "grad_norm": 2.2286486625671387, "learning_rate": 0.00022790860723748442, "loss": 0.3218, "step": 210 }, { "epoch": 0.351559335763467, "grad_norm": 2.0589637756347656, "learning_rate": 0.00022319105613226921, "loss": 0.3148, "step": 217 }, { "epoch": 0.3580396921830701, "eval_loss": 0.3287665545940399, "eval_runtime": 77.8166, "eval_samples_per_second": 6.425, "eval_steps_per_second": 0.81, "step": 221 }, { "epoch": 0.3628999594977724, "grad_norm": 1.8553071022033691, "learning_rate": 0.00021837649156895706, "loss": 0.3206, "step": 224 }, { "epoch": 0.37424058323207776, "grad_norm": 2.019782304763794, "learning_rate": 0.00021347129516822945, "loss": 0.3128, "step": 231 }, { "epoch": 0.37910085054678005, "eval_loss": 0.32550591230392456, "eval_runtime": 78.3487, "eval_samples_per_second": 6.382, "eval_steps_per_second": 0.804, "step": 234 }, { "epoch": 0.38558120696638315, "grad_norm": 2.100358247756958, "learning_rate": 0.00020848196868167505, "loss": 0.3111, "step": 238 }, { "epoch": 0.39692183070068854, "grad_norm": 1.8522151708602905, "learning_rate": 0.000203415125373832, "loss": 0.3086, "step": 245 }, { "epoch": 0.4001620089104901, "eval_loss": 0.3193366825580597, "eval_runtime": 12.4578, "eval_samples_per_second": 40.136, "eval_steps_per_second": 5.057, "step": 247 }, { "epoch": 0.4082624544349939, "grad_norm": 2.0124332904815674, "learning_rate": 0.00019827748125642242, "loss": 0.3119, "step": 252 }, { "epoch": 0.4196030781692993, "grad_norm": 2.0748066902160645, "learning_rate": 0.0001930758461863965, "loss": 0.306, "step": 259 }, { "epoch": 0.42122316727420006, "eval_loss": 0.31428423523902893, "eval_runtime": 92.6512, "eval_samples_per_second": 5.397, "eval_steps_per_second": 0.68, "step": 260 }, { "epoch": 0.4309437019036047, "grad_norm": 2.1250007152557373, "learning_rate": 0.0001878171148395872, "loss": 0.3068, "step": 266 }, { "epoch": 0.4422843256379101, "grad_norm": 2.4317753314971924, "learning_rate": 0.00018250825757193848, "loss": 0.3122, "step": 273 }, { "epoch": 0.4422843256379101, "eval_loss": 0.31197118759155273, "eval_runtime": 82.8282, "eval_samples_per_second": 6.037, "eval_steps_per_second": 0.761, "step": 273 }, { "epoch": 0.4536249493722155, "grad_norm": 1.870219349861145, "learning_rate": 0.0001771563111804211, "loss": 0.2982, "step": 280 }, { "epoch": 0.46334548400162007, "eval_loss": 0.30591174960136414, "eval_runtime": 12.4327, "eval_samples_per_second": 40.217, "eval_steps_per_second": 5.067, "step": 286 }, { "epoch": 0.46496557310652087, "grad_norm": 2.021221399307251, "learning_rate": 0.0001717683695758819, "loss": 0.2994, "step": 287 }, { "epoch": 0.47630619684082626, "grad_norm": 1.954991340637207, "learning_rate": 0.00016635157438018983, "loss": 0.3023, "step": 294 }, { "epoch": 0.4844066423653301, "eval_loss": 0.29642820358276367, "eval_runtime": 87.2692, "eval_samples_per_second": 5.729, "eval_steps_per_second": 0.722, "step": 299 }, { "epoch": 0.48764682057513165, "grad_norm": 1.9946343898773193, "learning_rate": 0.0001609131054601416, "loss": 0.2936, "step": 301 }, { "epoch": 0.49898744430943703, "grad_norm": 1.9908640384674072, "learning_rate": 0.00015546017141067432, "loss": 0.2827, "step": 308 }, { "epoch": 0.5054678007290401, "eval_loss": 0.2939640283584595, "eval_runtime": 90.8498, "eval_samples_per_second": 5.504, "eval_steps_per_second": 0.693, "step": 312 }, { "epoch": 0.5103280680437424, "grad_norm": 1.9648362398147583, "learning_rate": 0.00015, "loss": 0.2795, "step": 315 }, { "epoch": 0.5216686917780478, "grad_norm": 1.9000581502914429, "learning_rate": 0.0001445398285893257, "loss": 0.2844, "step": 322 }, { "epoch": 0.5265289590927501, "eval_loss": 0.2863557040691376, "eval_runtime": 12.4189, "eval_samples_per_second": 40.261, "eval_steps_per_second": 5.073, "step": 325 }, { "epoch": 0.5330093155123532, "grad_norm": 1.860827088356018, "learning_rate": 0.0001390868945398584, "loss": 0.274, "step": 329 }, { "epoch": 0.5443499392466585, "grad_norm": 2.1385581493377686, "learning_rate": 0.00013364842561981014, "loss": 0.2733, "step": 336 }, { "epoch": 0.5475901174564601, "eval_loss": 0.28282585740089417, "eval_runtime": 92.2187, "eval_samples_per_second": 5.422, "eval_steps_per_second": 0.683, "step": 338 }, { "epoch": 0.555690562980964, "grad_norm": 2.0149056911468506, "learning_rate": 0.00012823163042411807, "loss": 0.274, "step": 343 }, { "epoch": 0.5670311867152693, "grad_norm": 2.187378406524658, "learning_rate": 0.0001228436888195789, "loss": 0.2711, "step": 350 }, { "epoch": 0.56865127582017, "eval_loss": 0.2776563763618469, "eval_runtime": 90.611, "eval_samples_per_second": 5.518, "eval_steps_per_second": 0.695, "step": 351 }, { "epoch": 0.5783718104495748, "grad_norm": 2.089923143386841, "learning_rate": 0.00011749174242806152, "loss": 0.2683, "step": 357 }, { "epoch": 0.5897124341838801, "grad_norm": 1.8779715299606323, "learning_rate": 0.00011218288516041279, "loss": 0.2714, "step": 364 }, { "epoch": 0.5897124341838801, "eval_loss": 0.2723066508769989, "eval_runtime": 12.4698, "eval_samples_per_second": 40.097, "eval_steps_per_second": 5.052, "step": 364 }, { "epoch": 0.6010530579181855, "grad_norm": 1.779288411140442, "learning_rate": 0.00010692415381360349, "loss": 0.2605, "step": 371 }, { "epoch": 0.6107735925475901, "eval_loss": 0.26519322395324707, "eval_runtime": 84.674, "eval_samples_per_second": 5.905, "eval_steps_per_second": 0.744, "step": 377 }, { "epoch": 0.6123936816524909, "grad_norm": 1.9808777570724487, "learning_rate": 0.00010172251874357757, "loss": 0.258, "step": 378 }, { "epoch": 0.6237343053867963, "grad_norm": 1.8243378400802612, "learning_rate": 9.658487462616794e-05, "loss": 0.2527, "step": 385 }, { "epoch": 0.6318347509113001, "eval_loss": 0.25875651836395264, "eval_runtime": 91.6131, "eval_samples_per_second": 5.458, "eval_steps_per_second": 0.688, "step": 390 }, { "epoch": 0.6350749291211016, "grad_norm": 1.9190093278884888, "learning_rate": 9.151803131832493e-05, "loss": 0.2642, "step": 392 }, { "epoch": 0.6464155528554071, "grad_norm": 1.7457456588745117, "learning_rate": 8.652870483177049e-05, "loss": 0.2526, "step": 399 }, { "epoch": 0.6528959092750102, "eval_loss": 0.2529118061065674, "eval_runtime": 12.4604, "eval_samples_per_second": 40.127, "eval_steps_per_second": 5.056, "step": 403 }, { "epoch": 0.6577561765897124, "grad_norm": 1.8457238674163818, "learning_rate": 8.162350843104291e-05, "loss": 0.2454, "step": 406 }, { "epoch": 0.6690968003240179, "grad_norm": 2.0805020332336426, "learning_rate": 7.680894386773072e-05, "loss": 0.2522, "step": 413 }, { "epoch": 0.6739570676387201, "eval_loss": 0.24899975955486298, "eval_runtime": 72.2912, "eval_samples_per_second": 6.916, "eval_steps_per_second": 0.871, "step": 416 }, { "epoch": 0.6804374240583232, "grad_norm": 1.816220998764038, "learning_rate": 7.209139276251558e-05, "loss": 0.2463, "step": 420 }, { "epoch": 0.6917780477926286, "grad_norm": 1.8046435117721558, "learning_rate": 6.747710814644589e-05, "loss": 0.242, "step": 427 }, { "epoch": 0.6950182260024301, "eval_loss": 0.2447230964899063, "eval_runtime": 87.12, "eval_samples_per_second": 5.739, "eval_steps_per_second": 0.723, "step": 429 }, { "epoch": 0.703118671526934, "grad_norm": 1.6818225383758545, "learning_rate": 6.297220617265435e-05, "loss": 0.2353, "step": 434 }, { "epoch": 0.7144592952612394, "grad_norm": 1.6758326292037964, "learning_rate": 5.858265800950438e-05, "loss": 0.2443, "step": 441 }, { "epoch": 0.7160793843661402, "eval_loss": 0.23998361825942993, "eval_runtime": 12.4908, "eval_samples_per_second": 40.03, "eval_steps_per_second": 5.044, "step": 442 }, { "epoch": 0.7257999189955447, "grad_norm": 1.6476861238479614, "learning_rate": 5.4314281925911634e-05, "loss": 0.2292, "step": 448 }, { "epoch": 0.7371405427298502, "grad_norm": 1.7244728803634644, "learning_rate": 5.0172735579330526e-05, "loss": 0.2273, "step": 455 }, { "epoch": 0.7371405427298502, "eval_loss": 0.23421922326087952, "eval_runtime": 86.8076, "eval_samples_per_second": 5.76, "eval_steps_per_second": 0.726, "step": 455 }, { "epoch": 0.7484811664641555, "grad_norm": 1.8216646909713745, "learning_rate": 4.616350851662895e-05, "loss": 0.2288, "step": 462 }, { "epoch": 0.7582017010935601, "eval_loss": 0.23177292943000793, "eval_runtime": 75.3199, "eval_samples_per_second": 6.638, "eval_steps_per_second": 0.836, "step": 468 }, { "epoch": 0.759821790198461, "grad_norm": 1.586669683456421, "learning_rate": 4.229191489779047e-05, "loss": 0.2292, "step": 469 }, { "epoch": 0.7711624139327663, "grad_norm": 1.7026342153549194, "learning_rate": 3.8563086452088506e-05, "loss": 0.2325, "step": 476 }, { "epoch": 0.7792628594572701, "eval_loss": 0.2292790412902832, "eval_runtime": 12.4917, "eval_samples_per_second": 40.026, "eval_steps_per_second": 5.043, "step": 481 }, { "epoch": 0.7825030376670717, "grad_norm": 1.7042219638824463, "learning_rate": 3.498196567606959e-05, "loss": 0.2251, "step": 483 }, { "epoch": 0.7938436614013771, "grad_norm": 1.819270372390747, "learning_rate": 3.1553299282360966e-05, "loss": 0.2264, "step": 490 }, { "epoch": 0.8003240178209802, "eval_loss": 0.22619818150997162, "eval_runtime": 36.3759, "eval_samples_per_second": 13.745, "eval_steps_per_second": 1.732, "step": 494 }, { "epoch": 0.8051842851356824, "grad_norm": 1.7286969423294067, "learning_rate": 2.828163190798644e-05, "loss": 0.2215, "step": 497 }, { "epoch": 0.8165249088699879, "grad_norm": 1.7360488176345825, "learning_rate": 2.5171300090530106e-05, "loss": 0.2127, "step": 504 }, { "epoch": 0.8213851761846902, "eval_loss": 0.2221684604883194, "eval_runtime": 36.6136, "eval_samples_per_second": 13.656, "eval_steps_per_second": 1.721, "step": 507 }, { "epoch": 0.8278655326042932, "grad_norm": 1.7685576677322388, "learning_rate": 2.2226426520131734e-05, "loss": 0.2131, "step": 511 }, { "epoch": 0.8392061563385986, "grad_norm": 1.4955885410308838, "learning_rate": 1.9450914574933725e-05, "loss": 0.2166, "step": 518 }, { "epoch": 0.8424463345484001, "eval_loss": 0.21989719569683075, "eval_runtime": 12.5227, "eval_samples_per_second": 39.927, "eval_steps_per_second": 5.031, "step": 520 }, { "epoch": 0.850546780072904, "grad_norm": 1.7498217821121216, "learning_rate": 1.6848443147221828e-05, "loss": 0.2091, "step": 525 }, { "epoch": 0.8618874038072094, "grad_norm": 1.6159402132034302, "learning_rate": 1.4422461767118233e-05, "loss": 0.2145, "step": 532 }, { "epoch": 0.8635074929121102, "eval_loss": 0.21832826733589172, "eval_runtime": 37.0243, "eval_samples_per_second": 13.505, "eval_steps_per_second": 1.702, "step": 533 }, { "epoch": 0.8732280275415147, "grad_norm": 1.4355424642562866, "learning_rate": 1.2176186030289936e-05, "loss": 0.2135, "step": 539 }, { "epoch": 0.8845686512758202, "grad_norm": 1.5615161657333374, "learning_rate": 1.011259333573326e-05, "loss": 0.2089, "step": 546 }, { "epoch": 0.8845686512758202, "eval_loss": 0.2167486995458603, "eval_runtime": 36.6749, "eval_samples_per_second": 13.633, "eval_steps_per_second": 1.718, "step": 546 }, { "epoch": 0.8959092750101255, "grad_norm": 1.593320608139038, "learning_rate": 8.234418939283866e-06, "loss": 0.206, "step": 553 }, { "epoch": 0.9056298096395302, "eval_loss": 0.21581247448921204, "eval_runtime": 12.5401, "eval_samples_per_second": 39.872, "eval_steps_per_second": 5.024, "step": 559 }, { "epoch": 0.907249898744431, "grad_norm": 1.4972048997879028, "learning_rate": 6.544152328083152e-06, "loss": 0.2116, "step": 560 }, { "epoch": 0.9185905224787363, "grad_norm": 1.8363592624664307, "learning_rate": 5.044033920806933e-06, "loss": 0.2078, "step": 567 }, { "epoch": 0.9266909680032401, "eval_loss": 0.2153376042842865, "eval_runtime": 36.6671, "eval_samples_per_second": 13.636, "eval_steps_per_second": 1.718, "step": 572 }, { "epoch": 0.9299311462130417, "grad_norm": 1.574928879737854, "learning_rate": 3.7360520980297514e-06, "loss": 0.2076, "step": 574 }, { "epoch": 0.9412717699473471, "grad_norm": 1.6964603662490845, "learning_rate": 2.6219405666614402e-06, "loss": 0.2117, "step": 581 }, { "epoch": 0.9477521263669502, "eval_loss": 0.21471136808395386, "eval_runtime": 36.6697, "eval_samples_per_second": 13.635, "eval_steps_per_second": 1.718, "step": 585 }, { "epoch": 0.9526123936816525, "grad_norm": 1.8814630508422852, "learning_rate": 1.7031760619491353e-06, "loss": 0.207, "step": 588 }, { "epoch": 0.9639530174159578, "grad_norm": 1.683011531829834, "learning_rate": 9.809763900905875e-07, "loss": 0.2125, "step": 595 }, { "epoch": 0.9688132847306602, "eval_loss": 0.2143588364124298, "eval_runtime": 12.559, "eval_samples_per_second": 39.812, "eval_steps_per_second": 5.016, "step": 598 }, { "epoch": 0.9752936411502633, "grad_norm": 1.555221676826477, "learning_rate": 4.562988140535073e-07, "loss": 0.207, "step": 602 }, { "epoch": 0.9866342648845686, "grad_norm": 1.7758488655090332, "learning_rate": 1.298387847403437e-07, "loss": 0.2118, "step": 609 }, { "epoch": 0.9898744430943702, "eval_loss": 0.21421405673027039, "eval_runtime": 36.5871, "eval_samples_per_second": 13.666, "eval_steps_per_second": 1.722, "step": 611 }, { "epoch": 0.9979748886188741, "grad_norm": 1.7762000560760498, "learning_rate": 2.029019180288527e-09, "loss": 0.2121, "step": 616 } ], "logging_steps": 7, "max_steps": 617, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.47773295983788e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }