{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -2.8386030197143555, "logits/rejected": -2.823939323425293, "logps/chosen": -324.3727722167969, "logps/rejected": -231.64634704589844, "loss": 0.2826, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.8247194290161133, "logits/rejected": -2.750765800476074, "logps/chosen": -275.7482604980469, "logps/rejected": -253.39404296875, "loss": 0.2847, "rewards/accuracies": 0.4513888955116272, "rewards/chosen": 0.00012852638610638678, "rewards/margins": -0.0004244056181050837, "rewards/rejected": 0.0005529320333153009, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.7973198890686035, "logits/rejected": -2.779845714569092, "logps/chosen": -261.89483642578125, "logps/rejected": -257.04736328125, "loss": 0.2856, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": 0.0005934558575972915, "rewards/margins": 0.0017298649763688445, "rewards/rejected": -0.001136409118771553, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.783583164215088, "logits/rejected": -2.777108907699585, "logps/chosen": -294.8003234863281, "logps/rejected": -259.10296630859375, "loss": 0.2889, "rewards/accuracies": 0.65625, "rewards/chosen": 0.0027175676077604294, "rewards/margins": 0.011478239670395851, "rewards/rejected": -0.008760671131312847, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.802429676055908, "logits/rejected": -2.7715487480163574, "logps/chosen": -284.63958740234375, "logps/rejected": -264.9128112792969, "loss": 0.2823, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.007285858038812876, "rewards/margins": 0.022248882800340652, "rewards/rejected": -0.029534736648201942, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.999733114418725e-07, "logits/chosen": -2.781130790710449, "logits/rejected": -2.718773126602173, "logps/chosen": -284.725341796875, "logps/rejected": -255.60073852539062, "loss": 0.2671, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.023446276783943176, "rewards/margins": 0.06585647165775299, "rewards/rejected": -0.08930274099111557, "step": 50 }, { "epoch": 0.13, "learning_rate": 4.990398100856366e-07, "logits/chosen": -2.8104348182678223, "logits/rejected": -2.788311243057251, "logps/chosen": -297.0313720703125, "logps/rejected": -266.0052795410156, "loss": 0.2428, "rewards/accuracies": 0.606249988079071, "rewards/chosen": -0.10381942987442017, "rewards/margins": 0.084610715508461, "rewards/rejected": -0.18843016028404236, "step": 60 }, { "epoch": 0.15, "learning_rate": 4.967775735898179e-07, "logits/chosen": -2.704342842102051, "logits/rejected": -2.6683297157287598, "logps/chosen": -276.36395263671875, "logps/rejected": -271.9848327636719, "loss": 0.2192, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.16314834356307983, "rewards/margins": 0.17039458453655243, "rewards/rejected": -0.33354294300079346, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.931986719649298e-07, "logits/chosen": -2.7222819328308105, "logits/rejected": -2.7045040130615234, "logps/chosen": -298.33831787109375, "logps/rejected": -293.718017578125, "loss": 0.1999, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30510228872299194, "rewards/margins": 0.1686253696680069, "rewards/rejected": -0.47372761368751526, "step": 80 }, { "epoch": 0.19, "learning_rate": 4.883222001996351e-07, "logits/chosen": -2.759632110595703, "logits/rejected": -2.734144449234009, "logps/chosen": -331.0855712890625, "logps/rejected": -346.59991455078125, "loss": 0.1682, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.45922285318374634, "rewards/margins": 0.3295659124851227, "rewards/rejected": -0.7887887954711914, "step": 90 }, { "epoch": 0.21, "learning_rate": 4.821741763807186e-07, "logits/chosen": -2.6711103916168213, "logits/rejected": -2.664060115814209, "logps/chosen": -336.68927001953125, "logps/rejected": -331.12799072265625, "loss": 0.1643, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.4235810339450836, "rewards/margins": 0.24597103893756866, "rewards/rejected": -0.6695520281791687, "step": 100 }, { "epoch": 0.21, "eval_logits/chosen": -2.790248394012451, "eval_logits/rejected": -2.7691245079040527, "eval_logps/chosen": -297.79962158203125, "eval_logps/rejected": -337.0708923339844, "eval_loss": 0.15584461390972137, "eval_rewards/accuracies": 0.74609375, "eval_rewards/chosen": -0.4075998365879059, "eval_rewards/margins": 0.38957637548446655, "eval_rewards/rejected": -0.79717618227005, "eval_runtime": 53.5413, "eval_samples_per_second": 37.354, "eval_steps_per_second": 0.598, "step": 100 }, { "epoch": 0.23, "learning_rate": 4.747874028753375e-07, "logits/chosen": -2.7711846828460693, "logits/rejected": -2.7162532806396484, "logps/chosen": -322.896484375, "logps/rejected": -321.31158447265625, "loss": 0.1423, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4842161536216736, "rewards/margins": 0.4429897367954254, "rewards/rejected": -0.9272058606147766, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.662012913161997e-07, "logits/chosen": -2.6857857704162598, "logits/rejected": -2.664361000061035, "logps/chosen": -340.3297119140625, "logps/rejected": -381.2372741699219, "loss": 0.1325, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7687980532646179, "rewards/margins": 0.4345701336860657, "rewards/rejected": -1.203368067741394, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.5646165232345103e-07, "logits/chosen": -2.679908037185669, "logits/rejected": -2.661154270172119, "logps/chosen": -350.47247314453125, "logps/rejected": -386.91656494140625, "loss": 0.1191, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.8761329650878906, "rewards/margins": 0.5328775644302368, "rewards/rejected": -1.4090105295181274, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.456204510851956e-07, "logits/chosen": -2.622180461883545, "logits/rejected": -2.604306697845459, "logps/chosen": -338.3455505371094, "logps/rejected": -356.08990478515625, "loss": 0.1244, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.8404749035835266, "rewards/margins": 0.39392346143722534, "rewards/rejected": -1.2343984842300415, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.337355301007335e-07, "logits/chosen": -2.541025400161743, "logits/rejected": -2.5166730880737305, "logps/chosen": -345.60760498046875, "logps/rejected": -372.7431335449219, "loss": 0.1258, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7515507936477661, "rewards/margins": 0.42334675788879395, "rewards/rejected": -1.17489755153656, "step": 150 }, { "epoch": 0.33, "learning_rate": 4.2087030056579986e-07, "logits/chosen": -2.5379557609558105, "logits/rejected": -2.528388261795044, "logps/chosen": -345.44384765625, "logps/rejected": -388.0000915527344, "loss": 0.1209, "rewards/accuracies": 0.65625, "rewards/chosen": -0.8115363121032715, "rewards/margins": 0.39176443219184875, "rewards/rejected": -1.2033007144927979, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.070934040463998e-07, "logits/chosen": -2.508551836013794, "logits/rejected": -2.4616193771362305, "logps/chosen": -371.34246826171875, "logps/rejected": -380.660888671875, "loss": 0.1105, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.9994179606437683, "rewards/margins": 0.4626193940639496, "rewards/rejected": -1.4620373249053955, "step": 170 }, { "epoch": 0.38, "learning_rate": 3.9247834624635404e-07, "logits/chosen": -2.51965594291687, "logits/rejected": -2.5132761001586914, "logps/chosen": -332.5484924316406, "logps/rejected": -384.0250549316406, "loss": 0.1124, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9845203161239624, "rewards/margins": 0.4795452654361725, "rewards/rejected": -1.4640657901763916, "step": 180 }, { "epoch": 0.4, "learning_rate": 3.7710310482256523e-07, "logits/chosen": -2.55594539642334, "logits/rejected": -2.5516602993011475, "logps/chosen": -353.2313537597656, "logps/rejected": -384.13861083984375, "loss": 0.1058, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.9792869687080383, "rewards/margins": 0.40680208802223206, "rewards/rejected": -1.3860890865325928, "step": 190 }, { "epoch": 0.42, "learning_rate": 3.610497133404795e-07, "logits/chosen": -2.5069775581359863, "logits/rejected": -2.5189363956451416, "logps/chosen": -398.85382080078125, "logps/rejected": -431.91455078125, "loss": 0.1003, "rewards/accuracies": 0.65625, "rewards/chosen": -1.2246520519256592, "rewards/margins": 0.3960326015949249, "rewards/rejected": -1.6206846237182617, "step": 200 }, { "epoch": 0.42, "eval_logits/chosen": -2.5340371131896973, "eval_logits/rejected": -2.513735294342041, "eval_logps/chosen": -384.15533447265625, "eval_logps/rejected": -450.7552185058594, "eval_loss": 0.0996941402554512, "eval_rewards/accuracies": 0.703125, "eval_rewards/chosen": -1.2711572647094727, "eval_rewards/margins": 0.6628624200820923, "eval_rewards/rejected": -1.934019684791565, "eval_runtime": 53.511, "eval_samples_per_second": 37.375, "eval_steps_per_second": 0.598, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.4440382358952115e-07, "logits/chosen": -2.455578565597534, "logits/rejected": -2.446720838546753, "logps/chosen": -391.07830810546875, "logps/rejected": -428.397705078125, "loss": 0.1038, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.3349438905715942, "rewards/margins": 0.5562185645103455, "rewards/rejected": -1.8911622762680054, "step": 210 }, { "epoch": 0.46, "learning_rate": 3.272542485937368e-07, "logits/chosen": -2.551090955734253, "logits/rejected": -2.529384136199951, "logps/chosen": -385.6699523925781, "logps/rejected": -405.87615966796875, "loss": 0.1138, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -1.1878398656845093, "rewards/margins": 0.44445449113845825, "rewards/rejected": -1.6322942972183228, "step": 220 }, { "epoch": 0.48, "learning_rate": 3.096924887558854e-07, "logits/chosen": -2.5678157806396484, "logits/rejected": -2.5255255699157715, "logps/chosen": -411.07745361328125, "logps/rejected": -404.2816467285156, "loss": 0.1149, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.9748584628105164, "rewards/margins": 0.47213855385780334, "rewards/rejected": -1.446997046470642, "step": 230 }, { "epoch": 0.5, "learning_rate": 2.9181224366319943e-07, "logits/chosen": -2.4429595470428467, "logits/rejected": -2.4049136638641357, "logps/chosen": -386.62530517578125, "logps/rejected": -397.7767028808594, "loss": 0.1092, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1572192907333374, "rewards/margins": 0.4687051773071289, "rewards/rejected": -1.6259244680404663, "step": 240 }, { "epoch": 0.52, "learning_rate": 2.7370891215954565e-07, "logits/chosen": -2.400578260421753, "logits/rejected": -2.3846592903137207, "logps/chosen": -413.29266357421875, "logps/rejected": -441.35748291015625, "loss": 0.0928, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.3619310855865479, "rewards/margins": 0.6331827044487, "rewards/rejected": -1.9951136112213135, "step": 250 }, { "epoch": 0.54, "learning_rate": 2.55479083351317e-07, "logits/chosen": -2.435859203338623, "logits/rejected": -2.4128081798553467, "logps/chosen": -418.8388671875, "logps/rejected": -462.96282958984375, "loss": 0.097, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.2928632497787476, "rewards/margins": 0.7572471499443054, "rewards/rejected": -2.050110340118408, "step": 260 }, { "epoch": 0.56, "learning_rate": 2.3722002126275822e-07, "logits/chosen": -2.3607535362243652, "logits/rejected": -2.3512327671051025, "logps/chosen": -393.47845458984375, "logps/rejected": -424.65692138671875, "loss": 0.0942, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.2448090314865112, "rewards/margins": 0.5817195177078247, "rewards/rejected": -1.8265281915664673, "step": 270 }, { "epoch": 0.59, "learning_rate": 2.19029145890313e-07, "logits/chosen": -2.384596586227417, "logits/rejected": -2.357322931289673, "logps/chosen": -401.50152587890625, "logps/rejected": -447.069580078125, "loss": 0.0894, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.463189721107483, "rewards/margins": 0.627885103225708, "rewards/rejected": -2.0910747051239014, "step": 280 }, { "epoch": 0.61, "learning_rate": 2.0100351342479216e-07, "logits/chosen": -2.3855137825012207, "logits/rejected": -2.334260940551758, "logps/chosen": -441.15118408203125, "logps/rejected": -456.8433532714844, "loss": 0.0895, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5582000017166138, "rewards/margins": 0.6062092185020447, "rewards/rejected": -2.1644091606140137, "step": 290 }, { "epoch": 0.63, "learning_rate": 1.8323929841460178e-07, "logits/chosen": -2.4264094829559326, "logits/rejected": -2.403550624847412, "logps/chosen": -412.9310607910156, "logps/rejected": -471.4112854003906, "loss": 0.0953, "rewards/accuracies": 0.71875, "rewards/chosen": -1.2313965559005737, "rewards/margins": 0.6434706449508667, "rewards/rejected": -1.8748672008514404, "step": 300 }, { "epoch": 0.63, "eval_logits/chosen": -2.4030282497406006, "eval_logits/rejected": -2.3836517333984375, "eval_logps/chosen": -377.3980712890625, "eval_logps/rejected": -449.78228759765625, "eval_loss": 0.10235561430454254, "eval_rewards/accuracies": 0.75390625, "eval_rewards/chosen": -1.2035841941833496, "eval_rewards/margins": 0.7207058072090149, "eval_rewards/rejected": -1.9242901802062988, "eval_runtime": 53.5723, "eval_samples_per_second": 37.333, "eval_steps_per_second": 0.597, "step": 300 }, { "epoch": 0.65, "learning_rate": 1.6583128063291573e-07, "logits/chosen": -2.3959908485412598, "logits/rejected": -2.366027593612671, "logps/chosen": -389.87841796875, "logps/rejected": -428.79150390625, "loss": 0.0967, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2791574001312256, "rewards/margins": 0.5353385806083679, "rewards/rejected": -1.8144958019256592, "step": 310 }, { "epoch": 0.67, "learning_rate": 1.488723393865766e-07, "logits/chosen": -2.315176010131836, "logits/rejected": -2.303180694580078, "logps/chosen": -419.81304931640625, "logps/rejected": -451.9205627441406, "loss": 0.0913, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.3704838752746582, "rewards/margins": 0.4932515621185303, "rewards/rejected": -1.8637354373931885, "step": 320 }, { "epoch": 0.69, "learning_rate": 1.3245295796480788e-07, "logits/chosen": -2.3155629634857178, "logits/rejected": -2.306206226348877, "logps/chosen": -373.34173583984375, "logps/rejected": -451.43304443359375, "loss": 0.094, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.2377197742462158, "rewards/margins": 0.7202552556991577, "rewards/rejected": -1.957975149154663, "step": 330 }, { "epoch": 0.71, "learning_rate": 1.1666074087171627e-07, "logits/chosen": -2.3178515434265137, "logits/rejected": -2.317112684249878, "logps/chosen": -421.288330078125, "logps/rejected": -464.2798767089844, "loss": 0.1012, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.3072739839553833, "rewards/margins": 0.6341418027877808, "rewards/rejected": -1.941415786743164, "step": 340 }, { "epoch": 0.73, "learning_rate": 1.0157994641835734e-07, "logits/chosen": -2.352154016494751, "logits/rejected": -2.310459852218628, "logps/chosen": -371.04180908203125, "logps/rejected": -418.411376953125, "loss": 0.0964, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2016589641571045, "rewards/margins": 0.6332089900970459, "rewards/rejected": -1.8348678350448608, "step": 350 }, { "epoch": 0.75, "learning_rate": 8.729103716819111e-08, "logits/chosen": -2.3340022563934326, "logits/rejected": -2.2888753414154053, "logps/chosen": -399.73870849609375, "logps/rejected": -433.62939453125, "loss": 0.103, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.3542587757110596, "rewards/margins": 0.6527735590934753, "rewards/rejected": -2.0070323944091797, "step": 360 }, { "epoch": 0.77, "learning_rate": 7.387025063449081e-08, "logits/chosen": -2.305725574493408, "logits/rejected": -2.2590928077697754, "logps/chosen": -424.70269775390625, "logps/rejected": -478.83160400390625, "loss": 0.0832, "rewards/accuracies": 0.71875, "rewards/chosen": -1.3823884725570679, "rewards/margins": 0.7607783079147339, "rewards/rejected": -2.143167018890381, "step": 370 }, { "epoch": 0.79, "learning_rate": 6.138919252022435e-08, "logits/chosen": -2.3276476860046387, "logits/rejected": -2.3130292892456055, "logps/chosen": -431.13568115234375, "logps/rejected": -477.88824462890625, "loss": 0.0903, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.4548090696334839, "rewards/margins": 0.671941876411438, "rewards/rejected": -2.126750946044922, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.991445467064689e-08, "logits/chosen": -2.2263472080230713, "logits/rejected": -2.1942696571350098, "logps/chosen": -418.37335205078125, "logps/rejected": -485.0545349121094, "loss": 0.0883, "rewards/accuracies": 0.75, "rewards/chosen": -1.488586187362671, "rewards/margins": 0.7860161662101746, "rewards/rejected": -2.2746024131774902, "step": 390 }, { "epoch": 0.84, "learning_rate": 3.9507259776993954e-08, "logits/chosen": -2.319228410720825, "logits/rejected": -2.2877087593078613, "logps/chosen": -417.96875, "logps/rejected": -461.0101623535156, "loss": 0.0811, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.4231641292572021, "rewards/margins": 0.647831916809082, "rewards/rejected": -2.0709962844848633, "step": 400 }, { "epoch": 0.84, "eval_logits/chosen": -2.3254384994506836, "eval_logits/rejected": -2.301893472671509, "eval_logps/chosen": -393.03472900390625, "eval_logps/rejected": -475.715087890625, "eval_loss": 0.09447792172431946, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -1.3599507808685303, "eval_rewards/margins": 0.8236675262451172, "eval_rewards/rejected": -2.1836180686950684, "eval_runtime": 53.5742, "eval_samples_per_second": 37.331, "eval_steps_per_second": 0.597, "step": 400 }, { "epoch": 0.86, "learning_rate": 3.022313472693447e-08, "logits/chosen": -2.3134891986846924, "logits/rejected": -2.2576441764831543, "logps/chosen": -405.07867431640625, "logps/rejected": -426.08770751953125, "loss": 0.088, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.304164171218872, "rewards/margins": 0.7416768074035645, "rewards/rejected": -2.0458409786224365, "step": 410 }, { "epoch": 0.88, "learning_rate": 2.2111614344599684e-08, "logits/chosen": -2.3239588737487793, "logits/rejected": -2.2752654552459717, "logps/chosen": -434.28118896484375, "logps/rejected": -482.84234619140625, "loss": 0.0896, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.4340513944625854, "rewards/margins": 0.8941879272460938, "rewards/rejected": -2.3282394409179688, "step": 420 }, { "epoch": 0.9, "learning_rate": 1.521597710086439e-08, "logits/chosen": -2.278296947479248, "logits/rejected": -2.2763679027557373, "logps/chosen": -423.744384765625, "logps/rejected": -485.7794494628906, "loss": 0.0868, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4879920482635498, "rewards/margins": 0.6670708656311035, "rewards/rejected": -2.1550629138946533, "step": 430 }, { "epoch": 0.92, "learning_rate": 9.57301420397924e-09, "logits/chosen": -2.26120924949646, "logits/rejected": -2.2485973834991455, "logps/chosen": -404.76959228515625, "logps/rejected": -461.03448486328125, "loss": 0.0892, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.4389055967330933, "rewards/margins": 0.6930050253868103, "rewards/rejected": -2.131910800933838, "step": 440 }, { "epoch": 0.94, "learning_rate": 5.212833302556258e-09, "logits/chosen": -2.2681469917297363, "logits/rejected": -2.275200366973877, "logps/chosen": -404.1940612792969, "logps/rejected": -463.80401611328125, "loss": 0.0902, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.5075231790542603, "rewards/margins": 0.6551094055175781, "rewards/rejected": -2.162632703781128, "step": 450 }, { "epoch": 0.96, "learning_rate": 2.158697848236607e-09, "logits/chosen": -2.2567198276519775, "logits/rejected": -2.215657949447632, "logps/chosen": -404.21527099609375, "logps/rejected": -441.24945068359375, "loss": 0.0867, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.5105773210525513, "rewards/margins": 0.5308315753936768, "rewards/rejected": -2.0414090156555176, "step": 460 }, { "epoch": 0.98, "learning_rate": 4.269029751107489e-10, "logits/chosen": -2.2435102462768555, "logits/rejected": -2.2021100521087646, "logps/chosen": -399.60418701171875, "logps/rejected": -474.943359375, "loss": 0.0902, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.4095227718353271, "rewards/margins": 0.8036805391311646, "rewards/rejected": -2.213203191757202, "step": 470 }, { "epoch": 1.0, "step": 478, "total_flos": 0.0, "train_loss": 0.13007899894375183, "train_runtime": 3956.3918, "train_samples_per_second": 15.452, "train_steps_per_second": 0.121 } ], "logging_steps": 10, "max_steps": 478, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "trial_name": null, "trial_params": null }