{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.998691442030882, "eval_steps": 500, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002093692750588851, "grad_norm": 3522.287948369491, "learning_rate": 1.0416666666666666e-08, "logits/chosen": 5002.53564453125, "logits/rejected": 4591.72021484375, "logps/chosen": -265.6396789550781, "logps/rejected": -206.22401428222656, "loss": 222.1204, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02093692750588851, "grad_norm": 3570.1637579903136, "learning_rate": 1.0416666666666667e-07, "logits/chosen": 5976.8369140625, "logits/rejected": 5015.12548828125, "logps/chosen": -292.30426025390625, "logps/rejected": -259.09771728515625, "loss": 261.6898, "rewards/accuracies": 0.4583333432674408, "rewards/chosen": 0.002565481700003147, "rewards/margins": 0.00030597145087085664, "rewards/rejected": 0.002259510336443782, "step": 10 }, { "epoch": 0.04187385501177702, "grad_norm": 1740.0232034178, "learning_rate": 2.0833333333333333e-07, "logits/chosen": 5865.3740234375, "logits/rejected": 4845.28564453125, "logps/chosen": -283.95458984375, "logps/rejected": -243.2721710205078, "loss": 247.7889, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.06638871878385544, "rewards/margins": 0.0017764812801033258, "rewards/rejected": 0.0646122470498085, "step": 20 }, { "epoch": 0.06281078251766553, "grad_norm": 1350.7689845691493, "learning_rate": 3.1249999999999997e-07, "logits/chosen": 5561.669921875, "logits/rejected": 4845.861328125, "logps/chosen": -256.84765625, "logps/rejected": -223.6597137451172, "loss": 235.7881, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.15389783680438995, "rewards/margins": 0.002473007421940565, "rewards/rejected": 0.1514248251914978, "step": 30 }, { "epoch": 0.08374771002355404, "grad_norm": 1024.0536736135682, "learning_rate": 4.1666666666666667e-07, "logits/chosen": 5555.7802734375, "logits/rejected": 4829.6064453125, "logps/chosen": -253.9397735595703, "logps/rejected": -237.85189819335938, "loss": 235.6024, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": 0.20017290115356445, "rewards/margins": -0.005847611464560032, "rewards/rejected": 0.2060205191373825, "step": 40 }, { "epoch": 0.10468463752944256, "grad_norm": 1139.3651978834764, "learning_rate": 4.999731868769026e-07, "logits/chosen": 5772.9248046875, "logits/rejected": 5017.52197265625, "logps/chosen": -256.2254943847656, "logps/rejected": -243.820556640625, "loss": 238.5566, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.2239224910736084, "rewards/margins": -0.01306430995464325, "rewards/rejected": 0.23698678612709045, "step": 50 }, { "epoch": 0.12562156503533106, "grad_norm": 949.2424330626906, "learning_rate": 4.990353313429303e-07, "logits/chosen": 5492.53173828125, "logits/rejected": 4689.26611328125, "logps/chosen": -251.794921875, "logps/rejected": -223.1071014404297, "loss": 226.0058, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": 0.266275554895401, "rewards/margins": -0.017442751675844193, "rewards/rejected": 0.2837182879447937, "step": 60 }, { "epoch": 0.14655849254121958, "grad_norm": 954.6500714792113, "learning_rate": 4.967625656594781e-07, "logits/chosen": 5274.14453125, "logits/rejected": 4874.0908203125, "logps/chosen": -250.61135864257812, "logps/rejected": -238.37997436523438, "loss": 225.2396, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": 0.27696576714515686, "rewards/margins": -0.007267093751579523, "rewards/rejected": 0.28423285484313965, "step": 70 }, { "epoch": 0.16749542004710807, "grad_norm": 932.4135888762232, "learning_rate": 4.93167072587771e-07, "logits/chosen": 5357.58056640625, "logits/rejected": 4600.1162109375, "logps/chosen": -245.84445190429688, "logps/rejected": -223.32791137695312, "loss": 224.8782, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.3024243712425232, "rewards/margins": 0.004978764336556196, "rewards/rejected": 0.2974456250667572, "step": 80 }, { "epoch": 0.1884323475529966, "grad_norm": 1000.848976733069, "learning_rate": 4.882681251368548e-07, "logits/chosen": 5479.4873046875, "logits/rejected": 4642.76416015625, "logps/chosen": -245.2469482421875, "logps/rejected": -221.87753295898438, "loss": 225.3665, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.32460635900497437, "rewards/margins": -0.0063544088043272495, "rewards/rejected": 0.33096081018447876, "step": 90 }, { "epoch": 0.2093692750588851, "grad_norm": 988.1458235134902, "learning_rate": 4.820919832540181e-07, "logits/chosen": 5540.859375, "logits/rejected": 5128.6162109375, "logps/chosen": -247.3248748779297, "logps/rejected": -237.8451385498047, "loss": 221.1634, "rewards/accuracies": 0.43437498807907104, "rewards/chosen": 0.32543858885765076, "rewards/margins": -0.04126424342393875, "rewards/rejected": 0.3667028546333313, "step": 100 }, { "epoch": 0.23030620256477363, "grad_norm": 968.9316114751973, "learning_rate": 4.7467175306295647e-07, "logits/chosen": 5704.5849609375, "logits/rejected": 4630.22412109375, "logps/chosen": -239.6920928955078, "logps/rejected": -225.50222778320312, "loss": 225.2349, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.3493049740791321, "rewards/margins": 0.01561106275767088, "rewards/rejected": 0.33369389176368713, "step": 110 }, { "epoch": 0.2512431300706621, "grad_norm": 944.9549945207706, "learning_rate": 4.6604720940421207e-07, "logits/chosen": 5287.7451171875, "logits/rejected": 4921.56884765625, "logps/chosen": -233.7930908203125, "logps/rejected": -231.26657104492188, "loss": 229.5115, "rewards/accuracies": 0.5093749761581421, "rewards/chosen": 0.33385908603668213, "rewards/margins": -0.022233102470636368, "rewards/rejected": 0.356092244386673, "step": 120 }, { "epoch": 0.2721800575765506, "grad_norm": 1276.812459526022, "learning_rate": 4.5626458262912735e-07, "logits/chosen": 5355.15576171875, "logits/rejected": 4776.7998046875, "logps/chosen": -236.29244995117188, "logps/rejected": -233.30307006835938, "loss": 221.8784, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.322248637676239, "rewards/margins": -0.03480709344148636, "rewards/rejected": 0.3570557236671448, "step": 130 }, { "epoch": 0.29311698508243916, "grad_norm": 899.787701651727, "learning_rate": 4.453763107901675e-07, "logits/chosen": 5164.56640625, "logits/rejected": 4602.421875, "logps/chosen": -238.9141845703125, "logps/rejected": -216.98031616210938, "loss": 229.5713, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.3299064338207245, "rewards/margins": -0.011618844233453274, "rewards/rejected": 0.34152525663375854, "step": 140 }, { "epoch": 0.31405391258832765, "grad_norm": 1078.422410282313, "learning_rate": 4.3344075855595097e-07, "logits/chosen": 5692.89453125, "logits/rejected": 4704.0322265625, "logps/chosen": -256.35137939453125, "logps/rejected": -216.76150512695312, "loss": 214.8497, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.32763591408729553, "rewards/margins": -0.019656497985124588, "rewards/rejected": 0.347292423248291, "step": 150 }, { "epoch": 0.33499084009421615, "grad_norm": 912.0955307736757, "learning_rate": 4.2052190435769554e-07, "logits/chosen": 5401.46875, "logits/rejected": 4505.7705078125, "logps/chosen": -245.025146484375, "logps/rejected": -219.9535369873047, "loss": 229.4244, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.3956901431083679, "rewards/margins": 0.03144276887178421, "rewards/rejected": 0.3642473816871643, "step": 160 }, { "epoch": 0.3559277676001047, "grad_norm": 954.243569931957, "learning_rate": 4.0668899744407567e-07, "logits/chosen": 5647.60400390625, "logits/rejected": 4826.2236328125, "logps/chosen": -251.16824340820312, "logps/rejected": -207.45095825195312, "loss": 210.6987, "rewards/accuracies": 0.515625, "rewards/chosen": 0.3114745318889618, "rewards/margins": -0.020466681569814682, "rewards/rejected": 0.3319412171840668, "step": 170 }, { "epoch": 0.3768646951059932, "grad_norm": 874.3975161439175, "learning_rate": 3.920161866827889e-07, "logits/chosen": 5324.474609375, "logits/rejected": 4590.5234375, "logps/chosen": -253.6103057861328, "logps/rejected": -212.4313201904297, "loss": 219.7222, "rewards/accuracies": 0.46875, "rewards/chosen": 0.33684980869293213, "rewards/margins": -0.03864480182528496, "rewards/rejected": 0.3754945993423462, "step": 180 }, { "epoch": 0.39780162261188173, "grad_norm": 890.3703764703043, "learning_rate": 3.765821230985757e-07, "logits/chosen": 5454.69677734375, "logits/rejected": 4614.9931640625, "logps/chosen": -234.9591827392578, "logps/rejected": -210.7035369873047, "loss": 217.6231, "rewards/accuracies": 0.46562498807907104, "rewards/chosen": 0.3479231297969818, "rewards/margins": -0.023874130100011826, "rewards/rejected": 0.3717973232269287, "step": 190 }, { "epoch": 0.4187385501177702, "grad_norm": 959.127096720511, "learning_rate": 3.604695382782159e-07, "logits/chosen": 5091.693359375, "logits/rejected": 4443.45703125, "logps/chosen": -221.46017456054688, "logps/rejected": -207.1079864501953, "loss": 222.0456, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": 0.32890671491622925, "rewards/margins": -0.028620053082704544, "rewards/rejected": 0.3575268089771271, "step": 200 }, { "epoch": 0.4396754776236587, "grad_norm": 977.743288922772, "learning_rate": 3.4376480090239047e-07, "logits/chosen": 5715.4208984375, "logits/rejected": 4880.05615234375, "logps/chosen": -251.9993896484375, "logps/rejected": -224.98220825195312, "loss": 225.9833, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.3476799726486206, "rewards/margins": -0.019533518701791763, "rewards/rejected": 0.36721348762512207, "step": 210 }, { "epoch": 0.46061240512954726, "grad_norm": 946.0452194833395, "learning_rate": 3.265574537815398e-07, "logits/chosen": 5137.0146484375, "logits/rejected": 4787.4306640625, "logps/chosen": -237.6749725341797, "logps/rejected": -222.62771606445312, "loss": 228.3879, "rewards/accuracies": 0.484375, "rewards/chosen": 0.3344074785709381, "rewards/margins": -0.04275032505393028, "rewards/rejected": 0.3771578073501587, "step": 220 }, { "epoch": 0.48154933263543576, "grad_norm": 1021.1755452410538, "learning_rate": 3.0893973387735683e-07, "logits/chosen": 5326.5107421875, "logits/rejected": 4527.2001953125, "logps/chosen": -231.9087677001953, "logps/rejected": -209.6526641845703, "loss": 212.3704, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": 0.36360007524490356, "rewards/margins": 0.00580978998914361, "rewards/rejected": 0.3577902913093567, "step": 230 }, { "epoch": 0.5024862601413242, "grad_norm": 931.7653919091188, "learning_rate": 2.910060778827554e-07, "logits/chosen": 5480.38134765625, "logits/rejected": 4673.6044921875, "logps/chosen": -230.7592010498047, "logps/rejected": -211.5625457763672, "loss": 222.4007, "rewards/accuracies": 0.512499988079071, "rewards/chosen": 0.3481771647930145, "rewards/margins": -0.034006811678409576, "rewards/rejected": 0.3821839988231659, "step": 240 }, { "epoch": 0.5234231876472127, "grad_norm": 893.8551271831342, "learning_rate": 2.7285261601056697e-07, "logits/chosen": 5610.1064453125, "logits/rejected": 4743.2900390625, "logps/chosen": -247.9258270263672, "logps/rejected": -224.725341796875, "loss": 218.3257, "rewards/accuracies": 0.5, "rewards/chosen": 0.38022834062576294, "rewards/margins": 0.006729437503963709, "rewards/rejected": 0.37349894642829895, "step": 250 }, { "epoch": 0.5443601151531012, "grad_norm": 954.9590918842988, "learning_rate": 2.5457665670441937e-07, "logits/chosen": 5426.6806640625, "logits/rejected": 4926.78076171875, "logps/chosen": -248.59634399414062, "logps/rejected": -222.47549438476562, "loss": 216.7827, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": 0.37472763657569885, "rewards/margins": -0.011340203694999218, "rewards/rejected": 0.38606783747673035, "step": 260 }, { "epoch": 0.5652970426589898, "grad_norm": 980.2338179672311, "learning_rate": 2.3627616503391812e-07, "logits/chosen": 5509.77490234375, "logits/rejected": 5063.6748046875, "logps/chosen": -237.590576171875, "logps/rejected": -221.60397338867188, "loss": 214.9091, "rewards/accuracies": 0.515625, "rewards/chosen": 0.37023288011550903, "rewards/margins": -0.017692191526293755, "rewards/rejected": 0.38792508840560913, "step": 270 }, { "epoch": 0.5862339701648783, "grad_norm": 882.5545431851971, "learning_rate": 2.1804923757009882e-07, "logits/chosen": 5514.5966796875, "logits/rejected": 4599.09765625, "logps/chosen": -245.1168212890625, "logps/rejected": -208.6852569580078, "loss": 216.9057, "rewards/accuracies": 0.5062500238418579, "rewards/chosen": 0.35608822107315063, "rewards/margins": -0.02490018866956234, "rewards/rejected": 0.3809884190559387, "step": 280 }, { "epoch": 0.6071708976707668, "grad_norm": 1027.801450351746, "learning_rate": 1.9999357655598891e-07, "logits/chosen": 5770.27978515625, "logits/rejected": 5421.36083984375, "logps/chosen": -252.1714324951172, "logps/rejected": -246.72555541992188, "loss": 225.9217, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": 0.3926265239715576, "rewards/margins": -0.11933328956365585, "rewards/rejected": 0.5119598507881165, "step": 290 }, { "epoch": 0.6281078251766553, "grad_norm": 983.8896941735266, "learning_rate": 1.8220596619089573e-07, "logits/chosen": 5291.69921875, "logits/rejected": 4989.41064453125, "logps/chosen": -236.5386962890625, "logps/rejected": -229.07266235351562, "loss": 226.711, "rewards/accuracies": 0.48124998807907104, "rewards/chosen": 0.3481127917766571, "rewards/margins": -0.07034964859485626, "rewards/rejected": 0.4184624254703522, "step": 300 }, { "epoch": 0.6490447526825438, "grad_norm": 972.552350701513, "learning_rate": 1.647817538357072e-07, "logits/chosen": 5905.65380859375, "logits/rejected": 5196.03125, "logps/chosen": -250.03976440429688, "logps/rejected": -230.9469757080078, "loss": 216.3124, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.40789794921875, "rewards/margins": 0.021800601854920387, "rewards/rejected": 0.38609737157821655, "step": 310 }, { "epoch": 0.6699816801884323, "grad_norm": 923.1332436203306, "learning_rate": 1.478143389201113e-07, "logits/chosen": 5463.5693359375, "logits/rejected": 4749.58837890625, "logps/chosen": -235.2841796875, "logps/rejected": -219.2712860107422, "loss": 217.7832, "rewards/accuracies": 0.4593749940395355, "rewards/chosen": 0.3657926023006439, "rewards/margins": -0.039618875831365585, "rewards/rejected": 0.4054114818572998, "step": 320 }, { "epoch": 0.6909186076943209, "grad_norm": 965.0967343991006, "learning_rate": 1.3139467229135998e-07, "logits/chosen": 5287.45947265625, "logits/rejected": 4928.7021484375, "logps/chosen": -241.1490478515625, "logps/rejected": -224.2013397216797, "loss": 219.6799, "rewards/accuracies": 0.453125, "rewards/chosen": 0.35635194182395935, "rewards/margins": -0.044928766787052155, "rewards/rejected": 0.4012807309627533, "step": 330 }, { "epoch": 0.7118555352002094, "grad_norm": 915.8564126507746, "learning_rate": 1.1561076868822755e-07, "logits/chosen": 5126.6953125, "logits/rejected": 4468.2080078125, "logps/chosen": -218.98098754882812, "logps/rejected": -202.6676483154297, "loss": 211.1805, "rewards/accuracies": 0.515625, "rewards/chosen": 0.3858596980571747, "rewards/margins": 0.0005099147674627602, "rewards/rejected": 0.3853498101234436, "step": 340 }, { "epoch": 0.7327924627060979, "grad_norm": 914.8488378330155, "learning_rate": 1.0054723495346482e-07, "logits/chosen": 5789.267578125, "logits/rejected": 4720.3525390625, "logps/chosen": -260.73028564453125, "logps/rejected": -228.9587860107422, "loss": 222.0229, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.40184181928634644, "rewards/margins": 0.008045284077525139, "rewards/rejected": 0.39379650354385376, "step": 350 }, { "epoch": 0.7537293902119864, "grad_norm": 876.5437019392028, "learning_rate": 8.628481651367875e-08, "logits/chosen": 5485.41943359375, "logits/rejected": 4798.8369140625, "logps/chosen": -239.21206665039062, "logps/rejected": -218.1109161376953, "loss": 212.415, "rewards/accuracies": 0.5, "rewards/chosen": 0.3851151466369629, "rewards/margins": -0.016298171132802963, "rewards/rejected": 0.40141329169273376, "step": 360 }, { "epoch": 0.7746663177178749, "grad_norm": 862.6497945839665, "learning_rate": 7.289996455765748e-08, "logits/chosen": 5362.8544921875, "logits/rejected": 4595.103515625, "logps/chosen": -254.4463653564453, "logps/rejected": -221.9584197998047, "loss": 216.0785, "rewards/accuracies": 0.528124988079071, "rewards/chosen": 0.34346675872802734, "rewards/margins": -0.006593307945877314, "rewards/rejected": 0.3500600755214691, "step": 370 }, { "epoch": 0.7956032452237635, "grad_norm": 932.2858564856045, "learning_rate": 6.046442623320145e-08, "logits/chosen": 5608.267578125, "logits/rejected": 4909.3837890625, "logps/chosen": -249.43460083007812, "logps/rejected": -211.33786010742188, "loss": 222.1436, "rewards/accuracies": 0.47187501192092896, "rewards/chosen": 0.3526684641838074, "rewards/margins": -0.04252925515174866, "rewards/rejected": 0.39519768953323364, "step": 380 }, { "epoch": 0.816540172729652, "grad_norm": 957.8822198142358, "learning_rate": 4.904486005914027e-08, "logits/chosen": 5878.46630859375, "logits/rejected": 5202.1943359375, "logps/chosen": -251.19577026367188, "logps/rejected": -233.38088989257812, "loss": 223.1316, "rewards/accuracies": 0.49687498807907104, "rewards/chosen": 0.40553656220436096, "rewards/margins": -0.008421207778155804, "rewards/rejected": 0.41395774483680725, "step": 390 }, { "epoch": 0.8374771002355405, "grad_norm": 893.9334894466286, "learning_rate": 3.8702478614051345e-08, "logits/chosen": 5456.71435546875, "logits/rejected": 4864.9521484375, "logps/chosen": -250.51095581054688, "logps/rejected": -213.61160278320312, "loss": 220.8271, "rewards/accuracies": 0.4906249940395355, "rewards/chosen": 0.4039316773414612, "rewards/margins": -0.01853012666106224, "rewards/rejected": 0.4224618077278137, "step": 400 }, { "epoch": 0.8584140277414289, "grad_norm": 917.1537249021089, "learning_rate": 2.9492720416985e-08, "logits/chosen": 5479.6455078125, "logits/rejected": 4854.544921875, "logps/chosen": -236.3294677734375, "logps/rejected": -217.6251220703125, "loss": 221.5988, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.4553787112236023, "rewards/margins": -0.1794721484184265, "rewards/rejected": 0.6348508596420288, "step": 410 }, { "epoch": 0.8793509552473174, "grad_norm": 963.815826471618, "learning_rate": 2.1464952759020856e-08, "logits/chosen": 5807.9521484375, "logits/rejected": 5086.6845703125, "logps/chosen": -253.22854614257812, "logps/rejected": -232.03866577148438, "loss": 218.956, "rewards/accuracies": 0.4312500059604645, "rewards/chosen": 0.38750648498535156, "rewards/margins": -0.016989920288324356, "rewards/rejected": 0.40449637174606323, "step": 420 }, { "epoch": 0.9002878827532059, "grad_norm": 1577.2268425412074, "learning_rate": 1.4662207078575684e-08, "logits/chosen": 5714.33251953125, "logits/rejected": 4623.4052734375, "logps/chosen": -248.07272338867188, "logps/rejected": -219.09396362304688, "loss": 211.869, "rewards/accuracies": 0.53125, "rewards/chosen": 0.39529919624328613, "rewards/margins": 0.011650885455310345, "rewards/rejected": 0.3836483359336853, "step": 430 }, { "epoch": 0.9212248102590945, "grad_norm": 890.353373232557, "learning_rate": 9.12094829893642e-09, "logits/chosen": 5540.88134765625, "logits/rejected": 5293.84912109375, "logps/chosen": -243.41299438476562, "logps/rejected": -236.95791625976562, "loss": 216.8079, "rewards/accuracies": 0.4468750059604645, "rewards/chosen": 0.35130536556243896, "rewards/margins": -0.039575204253196716, "rewards/rejected": 0.3908805251121521, "step": 440 }, { "epoch": 0.942161737764983, "grad_norm": 910.2914323549969, "learning_rate": 4.8708793644441086e-09, "logits/chosen": 5690.5556640625, "logits/rejected": 4916.1923828125, "logps/chosen": -252.1903839111328, "logps/rejected": -218.7462158203125, "loss": 213.594, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": 0.3814484477043152, "rewards/margins": -0.04559388756752014, "rewards/rejected": 0.4270423352718353, "step": 450 }, { "epoch": 0.9630986652708715, "grad_norm": 930.7189662835303, "learning_rate": 1.9347820230782295e-09, "logits/chosen": 5878.43408203125, "logits/rejected": 4987.50341796875, "logps/chosen": -272.1065979003906, "logps/rejected": -240.8032989501953, "loss": 232.251, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": 0.4089243412017822, "rewards/margins": -0.040169842541217804, "rewards/rejected": 0.44909414649009705, "step": 460 }, { "epoch": 0.98403559277676, "grad_norm": 942.5169399812006, "learning_rate": 3.2839470889836627e-10, "logits/chosen": 5481.58837890625, "logits/rejected": 4879.6376953125, "logps/chosen": -240.8720245361328, "logps/rejected": -215.34402465820312, "loss": 214.6576, "rewards/accuracies": 0.4781250059604645, "rewards/chosen": 0.3996717035770416, "rewards/margins": -0.024916449561715126, "rewards/rejected": 0.424588143825531, "step": 470 }, { "epoch": 0.998691442030882, "step": 477, "total_flos": 0.0, "train_loss": 222.93432777132878, "train_runtime": 12805.5892, "train_samples_per_second": 4.774, "train_steps_per_second": 0.037 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }