{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3169845594913716, "eval_steps": 500, "global_step": 4350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009082652134423252, "grad_norm": 2.5449585914611816, "learning_rate": 4.9848622464426284e-05, "loss": 8.7191, "step": 30 }, { "epoch": 0.018165304268846504, "grad_norm": 2.3144371509552, "learning_rate": 4.969724492885256e-05, "loss": 7.4698, "step": 60 }, { "epoch": 0.027247956403269755, "grad_norm": 2.304499626159668, "learning_rate": 4.954586739327884e-05, "loss": 6.5589, "step": 90 }, { "epoch": 0.03633060853769301, "grad_norm": 2.272608757019043, "learning_rate": 4.9394489857705115e-05, "loss": 6.2425, "step": 120 }, { "epoch": 0.045413260672116255, "grad_norm": 2.46329402923584, "learning_rate": 4.9243112322131396e-05, "loss": 6.1459, "step": 150 }, { "epoch": 0.05449591280653951, "grad_norm": 1.8283530473709106, "learning_rate": 4.909173478655768e-05, "loss": 5.969, "step": 180 }, { "epoch": 0.06357856494096276, "grad_norm": 2.1723110675811768, "learning_rate": 4.894035725098395e-05, "loss": 6.008, "step": 210 }, { "epoch": 0.07266121707538602, "grad_norm": 2.5368807315826416, "learning_rate": 4.878897971541024e-05, "loss": 5.8783, "step": 240 }, { "epoch": 0.08174386920980926, "grad_norm": 2.3222858905792236, "learning_rate": 4.8637602179836515e-05, "loss": 5.825, "step": 270 }, { "epoch": 0.09082652134423251, "grad_norm": 2.557065010070801, "learning_rate": 4.8486224644262796e-05, "loss": 5.76, "step": 300 }, { "epoch": 0.09990917347865577, "grad_norm": 2.4016597270965576, "learning_rate": 4.833484710868907e-05, "loss": 5.7039, "step": 330 }, { "epoch": 0.10899182561307902, "grad_norm": 2.6895477771759033, "learning_rate": 4.818346957311535e-05, "loss": 5.5843, "step": 360 }, { "epoch": 0.11807447774750227, "grad_norm": 2.741234064102173, "learning_rate": 4.8032092037541634e-05, "loss": 5.6376, "step": 390 }, { "epoch": 0.1271571298819255, "grad_norm": 2.8266804218292236, "learning_rate": 4.788071450196791e-05, "loss": 5.5649, "step": 420 }, { "epoch": 0.1362397820163488, "grad_norm": 2.792654275894165, "learning_rate": 4.772933696639419e-05, "loss": 5.3651, "step": 450 }, { "epoch": 0.14532243415077203, "grad_norm": 2.7088894844055176, "learning_rate": 4.757795943082047e-05, "loss": 5.4921, "step": 480 }, { "epoch": 0.15440508628519528, "grad_norm": 2.627201795578003, "learning_rate": 4.7426581895246746e-05, "loss": 5.461, "step": 510 }, { "epoch": 0.16348773841961853, "grad_norm": 2.6373610496520996, "learning_rate": 4.727520435967303e-05, "loss": 5.3973, "step": 540 }, { "epoch": 0.17257039055404177, "grad_norm": 2.772226095199585, "learning_rate": 4.71238268240993e-05, "loss": 5.3618, "step": 570 }, { "epoch": 0.18165304268846502, "grad_norm": 2.6005172729492188, "learning_rate": 4.6972449288525583e-05, "loss": 5.4365, "step": 600 }, { "epoch": 0.1907356948228883, "grad_norm": 4.7815260887146, "learning_rate": 4.6821071752951865e-05, "loss": 5.3225, "step": 630 }, { "epoch": 0.19981834695731154, "grad_norm": 2.5871763229370117, "learning_rate": 4.6669694217378146e-05, "loss": 5.3615, "step": 660 }, { "epoch": 0.2089009990917348, "grad_norm": 2.686840534210205, "learning_rate": 4.651831668180443e-05, "loss": 5.3201, "step": 690 }, { "epoch": 0.21798365122615804, "grad_norm": 2.6963067054748535, "learning_rate": 4.63669391462307e-05, "loss": 5.1972, "step": 720 }, { "epoch": 0.22706630336058128, "grad_norm": 2.9284744262695312, "learning_rate": 4.6215561610656984e-05, "loss": 5.3031, "step": 750 }, { "epoch": 0.23614895549500453, "grad_norm": 2.7302122116088867, "learning_rate": 4.606418407508326e-05, "loss": 5.2057, "step": 780 }, { "epoch": 0.2452316076294278, "grad_norm": 2.5760107040405273, "learning_rate": 4.591280653950954e-05, "loss": 5.1767, "step": 810 }, { "epoch": 0.254314259763851, "grad_norm": 2.9804234504699707, "learning_rate": 4.576142900393582e-05, "loss": 5.1875, "step": 840 }, { "epoch": 0.2633969118982743, "grad_norm": 3.311448812484741, "learning_rate": 4.5610051468362096e-05, "loss": 5.0712, "step": 870 }, { "epoch": 0.2724795640326976, "grad_norm": 2.67448091506958, "learning_rate": 4.545867393278838e-05, "loss": 5.1241, "step": 900 }, { "epoch": 0.2815622161671208, "grad_norm": 2.8352444171905518, "learning_rate": 4.530729639721465e-05, "loss": 5.1732, "step": 930 }, { "epoch": 0.29064486830154407, "grad_norm": 2.5969910621643066, "learning_rate": 4.515591886164093e-05, "loss": 5.0828, "step": 960 }, { "epoch": 0.2997275204359673, "grad_norm": 2.8792121410369873, "learning_rate": 4.5004541326067215e-05, "loss": 5.0844, "step": 990 }, { "epoch": 0.30881017257039056, "grad_norm": 2.9506993293762207, "learning_rate": 4.485316379049349e-05, "loss": 5.1764, "step": 1020 }, { "epoch": 0.3178928247048138, "grad_norm": 2.8818390369415283, "learning_rate": 4.470178625491977e-05, "loss": 5.0663, "step": 1050 }, { "epoch": 0.32697547683923706, "grad_norm": 3.128511667251587, "learning_rate": 4.4550408719346046e-05, "loss": 5.1026, "step": 1080 }, { "epoch": 0.33605812897366033, "grad_norm": 3.0155856609344482, "learning_rate": 4.4399031183772334e-05, "loss": 5.0686, "step": 1110 }, { "epoch": 0.34514078110808355, "grad_norm": 2.811448097229004, "learning_rate": 4.424765364819861e-05, "loss": 5.0351, "step": 1140 }, { "epoch": 0.3542234332425068, "grad_norm": 2.9916000366210938, "learning_rate": 4.409627611262489e-05, "loss": 5.1651, "step": 1170 }, { "epoch": 0.36330608537693004, "grad_norm": 2.9689950942993164, "learning_rate": 4.394489857705117e-05, "loss": 5.1457, "step": 1200 }, { "epoch": 0.3723887375113533, "grad_norm": 2.7896862030029297, "learning_rate": 4.3793521041477446e-05, "loss": 5.0049, "step": 1230 }, { "epoch": 0.3814713896457766, "grad_norm": 2.790712833404541, "learning_rate": 4.364214350590373e-05, "loss": 4.9943, "step": 1260 }, { "epoch": 0.3905540417801998, "grad_norm": 2.9977900981903076, "learning_rate": 4.349076597033e-05, "loss": 4.996, "step": 1290 }, { "epoch": 0.3996366939146231, "grad_norm": 3.504183530807495, "learning_rate": 4.333938843475628e-05, "loss": 4.9611, "step": 1320 }, { "epoch": 0.4087193460490463, "grad_norm": 2.737821578979492, "learning_rate": 4.3188010899182565e-05, "loss": 4.9541, "step": 1350 }, { "epoch": 0.4178019981834696, "grad_norm": 3.0585217475891113, "learning_rate": 4.303663336360884e-05, "loss": 4.9014, "step": 1380 }, { "epoch": 0.4268846503178928, "grad_norm": 3.004413604736328, "learning_rate": 4.288525582803512e-05, "loss": 4.9703, "step": 1410 }, { "epoch": 0.4359673024523161, "grad_norm": 2.9328274726867676, "learning_rate": 4.27338782924614e-05, "loss": 4.9637, "step": 1440 }, { "epoch": 0.44504995458673935, "grad_norm": 2.93721604347229, "learning_rate": 4.258250075688768e-05, "loss": 4.8024, "step": 1470 }, { "epoch": 0.45413260672116257, "grad_norm": 3.0333001613616943, "learning_rate": 4.243112322131396e-05, "loss": 4.8555, "step": 1500 }, { "epoch": 0.46321525885558584, "grad_norm": 3.3445775508880615, "learning_rate": 4.227974568574024e-05, "loss": 4.8035, "step": 1530 }, { "epoch": 0.47229791099000906, "grad_norm": 2.9364359378814697, "learning_rate": 4.212836815016652e-05, "loss": 4.9296, "step": 1560 }, { "epoch": 0.48138056312443234, "grad_norm": 2.755453586578369, "learning_rate": 4.1976990614592796e-05, "loss": 4.8051, "step": 1590 }, { "epoch": 0.4904632152588556, "grad_norm": 3.0365066528320312, "learning_rate": 4.182561307901908e-05, "loss": 4.7833, "step": 1620 }, { "epoch": 0.49954586739327883, "grad_norm": 3.2632575035095215, "learning_rate": 4.167423554344536e-05, "loss": 4.837, "step": 1650 }, { "epoch": 0.508628519527702, "grad_norm": 3.310817003250122, "learning_rate": 4.152285800787163e-05, "loss": 4.7417, "step": 1680 }, { "epoch": 0.5177111716621253, "grad_norm": 3.121156692504883, "learning_rate": 4.1371480472297915e-05, "loss": 4.7791, "step": 1710 }, { "epoch": 0.5267938237965486, "grad_norm": 3.200591564178467, "learning_rate": 4.122010293672419e-05, "loss": 4.8619, "step": 1740 }, { "epoch": 0.5358764759309719, "grad_norm": 3.1420202255249023, "learning_rate": 4.106872540115047e-05, "loss": 4.7576, "step": 1770 }, { "epoch": 0.5449591280653951, "grad_norm": 3.2239160537719727, "learning_rate": 4.091734786557675e-05, "loss": 4.7767, "step": 1800 }, { "epoch": 0.5540417801998183, "grad_norm": 2.9624414443969727, "learning_rate": 4.076597033000303e-05, "loss": 4.8608, "step": 1830 }, { "epoch": 0.5631244323342416, "grad_norm": 3.14367938041687, "learning_rate": 4.061459279442931e-05, "loss": 4.7909, "step": 1860 }, { "epoch": 0.5722070844686649, "grad_norm": 3.664564371109009, "learning_rate": 4.046321525885558e-05, "loss": 4.7325, "step": 1890 }, { "epoch": 0.5812897366030881, "grad_norm": 2.9251296520233154, "learning_rate": 4.0311837723281864e-05, "loss": 4.8017, "step": 1920 }, { "epoch": 0.5903723887375113, "grad_norm": 2.8796215057373047, "learning_rate": 4.0160460187708146e-05, "loss": 4.7124, "step": 1950 }, { "epoch": 0.5994550408719346, "grad_norm": 3.0257513523101807, "learning_rate": 4.000908265213443e-05, "loss": 4.7311, "step": 1980 }, { "epoch": 0.6085376930063578, "grad_norm": 3.096799612045288, "learning_rate": 3.985770511656071e-05, "loss": 4.6568, "step": 2010 }, { "epoch": 0.6176203451407811, "grad_norm": 3.1430232524871826, "learning_rate": 3.970632758098698e-05, "loss": 4.6451, "step": 2040 }, { "epoch": 0.6267029972752044, "grad_norm": 3.0216684341430664, "learning_rate": 3.9554950045413265e-05, "loss": 4.6565, "step": 2070 }, { "epoch": 0.6357856494096276, "grad_norm": 3.0199525356292725, "learning_rate": 3.940357250983954e-05, "loss": 4.6988, "step": 2100 }, { "epoch": 0.6448683015440508, "grad_norm": 2.9998953342437744, "learning_rate": 3.925219497426582e-05, "loss": 4.6654, "step": 2130 }, { "epoch": 0.6539509536784741, "grad_norm": 3.15533447265625, "learning_rate": 3.91008174386921e-05, "loss": 4.616, "step": 2160 }, { "epoch": 0.6630336058128974, "grad_norm": 2.8745930194854736, "learning_rate": 3.894943990311838e-05, "loss": 4.649, "step": 2190 }, { "epoch": 0.6721162579473207, "grad_norm": 3.0759665966033936, "learning_rate": 3.879806236754466e-05, "loss": 4.6054, "step": 2220 }, { "epoch": 0.6811989100817438, "grad_norm": 3.0508482456207275, "learning_rate": 3.864668483197093e-05, "loss": 4.4922, "step": 2250 }, { "epoch": 0.6902815622161671, "grad_norm": 2.9260127544403076, "learning_rate": 3.8495307296397214e-05, "loss": 4.6469, "step": 2280 }, { "epoch": 0.6993642143505904, "grad_norm": 2.924952268600464, "learning_rate": 3.8343929760823496e-05, "loss": 4.6164, "step": 2310 }, { "epoch": 0.7084468664850136, "grad_norm": 3.056288480758667, "learning_rate": 3.819255222524977e-05, "loss": 4.5877, "step": 2340 }, { "epoch": 0.7175295186194369, "grad_norm": 4.257227420806885, "learning_rate": 3.804117468967605e-05, "loss": 4.6301, "step": 2370 }, { "epoch": 0.7266121707538601, "grad_norm": 3.282137155532837, "learning_rate": 3.788979715410233e-05, "loss": 4.4623, "step": 2400 }, { "epoch": 0.7356948228882834, "grad_norm": 2.945059299468994, "learning_rate": 3.7738419618528615e-05, "loss": 4.6267, "step": 2430 }, { "epoch": 0.7447774750227066, "grad_norm": 3.1374645233154297, "learning_rate": 3.7587042082954896e-05, "loss": 4.6835, "step": 2460 }, { "epoch": 0.7538601271571299, "grad_norm": 3.21016001701355, "learning_rate": 3.743566454738117e-05, "loss": 4.5581, "step": 2490 }, { "epoch": 0.7629427792915532, "grad_norm": 2.8072383403778076, "learning_rate": 3.728428701180745e-05, "loss": 4.571, "step": 2520 }, { "epoch": 0.7720254314259763, "grad_norm": 2.9735002517700195, "learning_rate": 3.713290947623373e-05, "loss": 4.5013, "step": 2550 }, { "epoch": 0.7811080835603996, "grad_norm": 3.182706832885742, "learning_rate": 3.698153194066001e-05, "loss": 4.534, "step": 2580 }, { "epoch": 0.7901907356948229, "grad_norm": 2.958193778991699, "learning_rate": 3.683015440508629e-05, "loss": 4.5697, "step": 2610 }, { "epoch": 0.7992733878292462, "grad_norm": 2.950946569442749, "learning_rate": 3.6678776869512564e-05, "loss": 4.6066, "step": 2640 }, { "epoch": 0.8083560399636693, "grad_norm": 2.9701859951019287, "learning_rate": 3.6527399333938846e-05, "loss": 4.5934, "step": 2670 }, { "epoch": 0.8174386920980926, "grad_norm": 3.2177681922912598, "learning_rate": 3.637602179836512e-05, "loss": 4.5418, "step": 2700 }, { "epoch": 0.8265213442325159, "grad_norm": 2.7435505390167236, "learning_rate": 3.62246442627914e-05, "loss": 4.5485, "step": 2730 }, { "epoch": 0.8356039963669392, "grad_norm": 3.4409849643707275, "learning_rate": 3.607326672721768e-05, "loss": 4.4268, "step": 2760 }, { "epoch": 0.8446866485013624, "grad_norm": 3.803256034851074, "learning_rate": 3.592188919164396e-05, "loss": 4.5643, "step": 2790 }, { "epoch": 0.8537693006357856, "grad_norm": 3.0399341583251953, "learning_rate": 3.5770511656070246e-05, "loss": 4.4783, "step": 2820 }, { "epoch": 0.8628519527702089, "grad_norm": 2.9948980808258057, "learning_rate": 3.561913412049652e-05, "loss": 4.4929, "step": 2850 }, { "epoch": 0.8719346049046321, "grad_norm": 3.400299549102783, "learning_rate": 3.54677565849228e-05, "loss": 4.4803, "step": 2880 }, { "epoch": 0.8810172570390554, "grad_norm": 2.9282257556915283, "learning_rate": 3.531637904934908e-05, "loss": 4.4554, "step": 2910 }, { "epoch": 0.8900999091734787, "grad_norm": 2.957598924636841, "learning_rate": 3.516500151377536e-05, "loss": 4.5324, "step": 2940 }, { "epoch": 0.8991825613079019, "grad_norm": 2.9992153644561768, "learning_rate": 3.501362397820164e-05, "loss": 4.508, "step": 2970 }, { "epoch": 0.9082652134423251, "grad_norm": 3.1509618759155273, "learning_rate": 3.4862246442627914e-05, "loss": 4.4265, "step": 3000 }, { "epoch": 0.9173478655767484, "grad_norm": 3.027726888656616, "learning_rate": 3.4710868907054196e-05, "loss": 4.4979, "step": 3030 }, { "epoch": 0.9264305177111717, "grad_norm": 3.0711803436279297, "learning_rate": 3.455949137148047e-05, "loss": 4.4946, "step": 3060 }, { "epoch": 0.935513169845595, "grad_norm": 2.982269287109375, "learning_rate": 3.440811383590675e-05, "loss": 4.3433, "step": 3090 }, { "epoch": 0.9445958219800181, "grad_norm": 2.9734480381011963, "learning_rate": 3.425673630033303e-05, "loss": 4.453, "step": 3120 }, { "epoch": 0.9536784741144414, "grad_norm": 2.985030174255371, "learning_rate": 3.410535876475931e-05, "loss": 4.3705, "step": 3150 }, { "epoch": 0.9627611262488647, "grad_norm": 3.1812829971313477, "learning_rate": 3.395398122918559e-05, "loss": 4.3414, "step": 3180 }, { "epoch": 0.971843778383288, "grad_norm": 3.415923595428467, "learning_rate": 3.380260369361187e-05, "loss": 4.522, "step": 3210 }, { "epoch": 0.9809264305177112, "grad_norm": 3.176737070083618, "learning_rate": 3.3651226158038145e-05, "loss": 4.4112, "step": 3240 }, { "epoch": 0.9900090826521344, "grad_norm": 3.1306254863739014, "learning_rate": 3.3499848622464433e-05, "loss": 4.5104, "step": 3270 }, { "epoch": 0.9990917347865577, "grad_norm": 3.216395616531372, "learning_rate": 3.334847108689071e-05, "loss": 4.3244, "step": 3300 }, { "epoch": 1.008174386920981, "grad_norm": 3.1889307498931885, "learning_rate": 3.319709355131699e-05, "loss": 4.3521, "step": 3330 }, { "epoch": 1.017257039055404, "grad_norm": 2.8001787662506104, "learning_rate": 3.3045716015743264e-05, "loss": 4.3047, "step": 3360 }, { "epoch": 1.0263396911898275, "grad_norm": 3.5796685218811035, "learning_rate": 3.2894338480169546e-05, "loss": 4.1921, "step": 3390 }, { "epoch": 1.0354223433242506, "grad_norm": 3.725538730621338, "learning_rate": 3.274296094459583e-05, "loss": 4.3203, "step": 3420 }, { "epoch": 1.044504995458674, "grad_norm": 2.9058167934417725, "learning_rate": 3.25915834090221e-05, "loss": 4.385, "step": 3450 }, { "epoch": 1.0535876475930972, "grad_norm": 3.120119333267212, "learning_rate": 3.244020587344838e-05, "loss": 4.2883, "step": 3480 }, { "epoch": 1.0626702997275204, "grad_norm": 3.230036735534668, "learning_rate": 3.228882833787466e-05, "loss": 4.3602, "step": 3510 }, { "epoch": 1.0717529518619437, "grad_norm": 3.482921600341797, "learning_rate": 3.213745080230094e-05, "loss": 4.3984, "step": 3540 }, { "epoch": 1.080835603996367, "grad_norm": 3.0121572017669678, "learning_rate": 3.198607326672722e-05, "loss": 4.3864, "step": 3570 }, { "epoch": 1.0899182561307903, "grad_norm": 3.277411460876465, "learning_rate": 3.1834695731153495e-05, "loss": 4.2294, "step": 3600 }, { "epoch": 1.0990009082652135, "grad_norm": 3.0383167266845703, "learning_rate": 3.168331819557978e-05, "loss": 4.2759, "step": 3630 }, { "epoch": 1.1080835603996366, "grad_norm": 3.3026745319366455, "learning_rate": 3.153194066000605e-05, "loss": 4.3093, "step": 3660 }, { "epoch": 1.11716621253406, "grad_norm": 2.954747200012207, "learning_rate": 3.138056312443234e-05, "loss": 4.2476, "step": 3690 }, { "epoch": 1.1262488646684832, "grad_norm": 3.2137765884399414, "learning_rate": 3.1229185588858614e-05, "loss": 4.2858, "step": 3720 }, { "epoch": 1.1353315168029066, "grad_norm": 3.4028799533843994, "learning_rate": 3.1077808053284896e-05, "loss": 4.3652, "step": 3750 }, { "epoch": 1.1444141689373297, "grad_norm": 3.0039563179016113, "learning_rate": 3.092643051771118e-05, "loss": 4.4106, "step": 3780 }, { "epoch": 1.1534968210717529, "grad_norm": 2.973820209503174, "learning_rate": 3.077505298213745e-05, "loss": 4.1827, "step": 3810 }, { "epoch": 1.1625794732061763, "grad_norm": 2.99037766456604, "learning_rate": 3.062367544656373e-05, "loss": 4.3092, "step": 3840 }, { "epoch": 1.1716621253405994, "grad_norm": 3.181398391723633, "learning_rate": 3.047229791099001e-05, "loss": 4.417, "step": 3870 }, { "epoch": 1.1807447774750228, "grad_norm": 3.1933484077453613, "learning_rate": 3.032092037541629e-05, "loss": 4.2361, "step": 3900 }, { "epoch": 1.189827429609446, "grad_norm": 3.4427855014801025, "learning_rate": 3.0169542839842567e-05, "loss": 4.2687, "step": 3930 }, { "epoch": 1.1989100817438691, "grad_norm": 3.0683298110961914, "learning_rate": 3.001816530426885e-05, "loss": 4.2748, "step": 3960 }, { "epoch": 1.2079927338782925, "grad_norm": 3.044698715209961, "learning_rate": 2.9866787768695127e-05, "loss": 4.2671, "step": 3990 }, { "epoch": 1.2170753860127157, "grad_norm": 3.1354904174804688, "learning_rate": 2.9715410233121405e-05, "loss": 4.2635, "step": 4020 }, { "epoch": 1.226158038147139, "grad_norm": 3.282745361328125, "learning_rate": 2.9564032697547683e-05, "loss": 4.3544, "step": 4050 }, { "epoch": 1.2352406902815622, "grad_norm": 3.369798183441162, "learning_rate": 2.941265516197396e-05, "loss": 4.1993, "step": 4080 }, { "epoch": 1.2443233424159854, "grad_norm": 3.395785331726074, "learning_rate": 2.9261277626400242e-05, "loss": 4.1131, "step": 4110 }, { "epoch": 1.2534059945504088, "grad_norm": 3.500697135925293, "learning_rate": 2.9109900090826524e-05, "loss": 4.192, "step": 4140 }, { "epoch": 1.262488646684832, "grad_norm": 2.94278621673584, "learning_rate": 2.8958522555252805e-05, "loss": 4.2863, "step": 4170 }, { "epoch": 1.2715712988192553, "grad_norm": 3.3217315673828125, "learning_rate": 2.8807145019679083e-05, "loss": 4.1763, "step": 4200 }, { "epoch": 1.2806539509536785, "grad_norm": 3.232830762863159, "learning_rate": 2.865576748410536e-05, "loss": 4.2595, "step": 4230 }, { "epoch": 1.2897366030881017, "grad_norm": 3.3042378425598145, "learning_rate": 2.850438994853164e-05, "loss": 4.2393, "step": 4260 }, { "epoch": 1.298819255222525, "grad_norm": 3.83151912689209, "learning_rate": 2.835301241295792e-05, "loss": 4.3005, "step": 4290 }, { "epoch": 1.3079019073569482, "grad_norm": 3.245086431503296, "learning_rate": 2.82016348773842e-05, "loss": 4.205, "step": 4320 }, { "epoch": 1.3169845594913716, "grad_norm": 3.4392285346984863, "learning_rate": 2.8050257341810477e-05, "loss": 4.1964, "step": 4350 } ], "logging_steps": 30, "max_steps": 9909, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 30, "total_flos": 1136555016192000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }