{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 478, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -2.847970962524414, "logits/rejected": -2.79160213470459, "logps/chosen": -284.9612731933594, "logps/rejected": -276.45928955078125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.754901647567749, "logits/rejected": -2.7529661655426025, "logps/chosen": -249.956298828125, "logps/rejected": -223.05245971679688, "loss": 0.6931, "rewards/accuracies": 0.3958333432674408, "rewards/chosen": -8.542059367755428e-05, "rewards/margins": -4.0294162317877635e-05, "rewards/rejected": -4.512643499765545e-05, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -2.7449066638946533, "logits/rejected": -2.745481014251709, "logps/chosen": -257.4268493652344, "logps/rejected": -247.520751953125, "loss": 0.6925, "rewards/accuracies": 0.4937500059604645, "rewards/chosen": 0.00028673160704784095, "rewards/margins": 0.0011877163778990507, "rewards/rejected": -0.0009009848581627011, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -2.8009085655212402, "logits/rejected": -2.7534918785095215, "logps/chosen": -300.4103088378906, "logps/rejected": -261.89532470703125, "loss": 0.6882, "rewards/accuracies": 0.71875, "rewards/chosen": 0.0016673363279551268, "rewards/margins": 0.009702490642666817, "rewards/rejected": -0.008035155013203621, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -2.7635364532470703, "logits/rejected": -2.751422882080078, "logps/chosen": -256.6298522949219, "logps/rejected": -274.86297607421875, "loss": 0.6805, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0019601243548095226, "rewards/margins": 0.025836413726210594, "rewards/rejected": -0.027796542271971703, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.999733114418725e-07, "logits/chosen": -2.7672626972198486, "logits/rejected": -2.7396867275238037, "logps/chosen": -284.4268798828125, "logps/rejected": -256.52667236328125, "loss": 0.6675, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.023474793881177902, "rewards/margins": 0.06475953012704849, "rewards/rejected": -0.0882343202829361, "step": 50 }, { "epoch": 0.13, "learning_rate": 4.990398100856366e-07, "logits/chosen": -2.7358150482177734, "logits/rejected": -2.724313259124756, "logps/chosen": -281.9308166503906, "logps/rejected": -256.6224670410156, "loss": 0.6443, "rewards/accuracies": 0.6875, "rewards/chosen": -0.060463108122348785, "rewards/margins": 0.1052827388048172, "rewards/rejected": -0.1657458394765854, "step": 60 }, { "epoch": 0.15, "learning_rate": 4.967775735898179e-07, "logits/chosen": -2.781935453414917, "logits/rejected": -2.739537000656128, "logps/chosen": -291.1555480957031, "logps/rejected": -273.9505920410156, "loss": 0.6246, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.24020154774188995, "rewards/margins": 0.17989788949489594, "rewards/rejected": -0.4200994074344635, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.931986719649298e-07, "logits/chosen": -2.782163143157959, "logits/rejected": -2.7544727325439453, "logps/chosen": -290.7063903808594, "logps/rejected": -333.33160400390625, "loss": 0.5953, "rewards/accuracies": 0.6875, "rewards/chosen": -0.30353400111198425, "rewards/margins": 0.3068069517612457, "rewards/rejected": -0.61034095287323, "step": 80 }, { "epoch": 0.19, "learning_rate": 4.883222001996351e-07, "logits/chosen": -2.8103935718536377, "logits/rejected": -2.7860381603240967, "logps/chosen": -309.4369201660156, "logps/rejected": -328.04937744140625, "loss": 0.5871, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.34070074558258057, "rewards/margins": 0.4278062880039215, "rewards/rejected": -0.7685070037841797, "step": 90 }, { "epoch": 0.21, "learning_rate": 4.821741763807186e-07, "logits/chosen": -2.775650978088379, "logits/rejected": -2.742344379425049, "logps/chosen": -354.2271423339844, "logps/rejected": -372.828369140625, "loss": 0.5691, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6526215672492981, "rewards/margins": 0.4535134732723236, "rewards/rejected": -1.1061351299285889, "step": 100 }, { "epoch": 0.21, "eval_logits/chosen": -2.7190756797790527, "eval_logits/rejected": -2.702101707458496, "eval_logps/chosen": -322.6109924316406, "eval_logps/rejected": -376.20880126953125, "eval_loss": 0.5829024910926819, "eval_rewards/accuracies": 0.7421875, "eval_rewards/chosen": -0.6557134985923767, "eval_rewards/margins": 0.5328419208526611, "eval_rewards/rejected": -1.188555359840393, "eval_runtime": 53.0851, "eval_samples_per_second": 37.675, "eval_steps_per_second": 0.603, "step": 100 }, { "epoch": 0.23, "learning_rate": 4.747874028753375e-07, "logits/chosen": -2.667227268218994, "logits/rejected": -2.6603758335113525, "logps/chosen": -321.42108154296875, "logps/rejected": -396.7526550292969, "loss": 0.5384, "rewards/accuracies": 0.78125, "rewards/chosen": -0.5715780258178711, "rewards/margins": 0.6688358187675476, "rewards/rejected": -1.2404139041900635, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.662012913161997e-07, "logits/chosen": -2.622821807861328, "logits/rejected": -2.583700656890869, "logps/chosen": -340.69219970703125, "logps/rejected": -375.4017333984375, "loss": 0.5579, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.7315243482589722, "rewards/margins": 0.5486994981765747, "rewards/rejected": -1.2802238464355469, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.5646165232345103e-07, "logits/chosen": -2.5822339057922363, "logits/rejected": -2.547309398651123, "logps/chosen": -359.7410583496094, "logps/rejected": -351.17999267578125, "loss": 0.5523, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.6760958433151245, "rewards/margins": 0.4332718849182129, "rewards/rejected": -1.1093676090240479, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.456204510851956e-07, "logits/chosen": -2.458064079284668, "logits/rejected": -2.434985637664795, "logps/chosen": -344.94622802734375, "logps/rejected": -373.15277099609375, "loss": 0.5431, "rewards/accuracies": 0.65625, "rewards/chosen": -0.9658713340759277, "rewards/margins": 0.568038821220398, "rewards/rejected": -1.5339101552963257, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.337355301007335e-07, "logits/chosen": -2.430382490158081, "logits/rejected": -2.411181926727295, "logps/chosen": -362.24664306640625, "logps/rejected": -394.7173767089844, "loss": 0.541, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.970133900642395, "rewards/margins": 0.5773912668228149, "rewards/rejected": -1.5475252866744995, "step": 150 }, { "epoch": 0.33, "learning_rate": 4.2087030056579986e-07, "logits/chosen": -2.3705012798309326, "logits/rejected": -2.3451476097106934, "logps/chosen": -340.9483947753906, "logps/rejected": -381.2392883300781, "loss": 0.5488, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7996856570243835, "rewards/margins": 0.6973718404769897, "rewards/rejected": -1.497057557106018, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.070934040463998e-07, "logits/chosen": -2.298063278198242, "logits/rejected": -2.2643802165985107, "logps/chosen": -356.18292236328125, "logps/rejected": -401.3460998535156, "loss": 0.5395, "rewards/accuracies": 0.75, "rewards/chosen": -0.8752641677856445, "rewards/margins": 0.6319175958633423, "rewards/rejected": -1.5071817636489868, "step": 170 }, { "epoch": 0.38, "learning_rate": 3.9247834624635404e-07, "logits/chosen": -2.3489673137664795, "logits/rejected": -2.294405937194824, "logps/chosen": -366.259765625, "logps/rejected": -413.059326171875, "loss": 0.5228, "rewards/accuracies": 0.75, "rewards/chosen": -0.8981040716171265, "rewards/margins": 0.7530413866043091, "rewards/rejected": -1.651145339012146, "step": 180 }, { "epoch": 0.4, "learning_rate": 3.7710310482256523e-07, "logits/chosen": -2.22472882270813, "logits/rejected": -2.1942319869995117, "logps/chosen": -390.96893310546875, "logps/rejected": -435.68634033203125, "loss": 0.5221, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3375661373138428, "rewards/margins": 0.6510864496231079, "rewards/rejected": -1.9886524677276611, "step": 190 }, { "epoch": 0.42, "learning_rate": 3.610497133404795e-07, "logits/chosen": -2.330658197402954, "logits/rejected": -2.253397226333618, "logps/chosen": -424.68511962890625, "logps/rejected": -460.4125061035156, "loss": 0.5446, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.9010859727859497, "rewards/margins": 0.9040181040763855, "rewards/rejected": -1.8051040172576904, "step": 200 }, { "epoch": 0.42, "eval_logits/chosen": -2.273806571960449, "eval_logits/rejected": -2.2433524131774902, "eval_logps/chosen": -338.0599365234375, "eval_logps/rejected": -420.1078186035156, "eval_loss": 0.5300609469413757, "eval_rewards/accuracies": 0.78125, "eval_rewards/chosen": -0.810202956199646, "eval_rewards/margins": 0.8173429369926453, "eval_rewards/rejected": -1.6275460720062256, "eval_runtime": 53.0552, "eval_samples_per_second": 37.697, "eval_steps_per_second": 0.603, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.4440382358952115e-07, "logits/chosen": -2.26928448677063, "logits/rejected": -2.201911449432373, "logps/chosen": -353.4331970214844, "logps/rejected": -383.96044921875, "loss": 0.5455, "rewards/accuracies": 0.75, "rewards/chosen": -0.8622655868530273, "rewards/margins": 0.5730525255203247, "rewards/rejected": -1.4353179931640625, "step": 210 }, { "epoch": 0.46, "learning_rate": 3.272542485937368e-07, "logits/chosen": -2.2439053058624268, "logits/rejected": -2.206618070602417, "logps/chosen": -370.7458190917969, "logps/rejected": -391.848388671875, "loss": 0.5253, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7618538737297058, "rewards/margins": 0.7462855577468872, "rewards/rejected": -1.5081393718719482, "step": 220 }, { "epoch": 0.48, "learning_rate": 3.096924887558854e-07, "logits/chosen": -2.1762518882751465, "logits/rejected": -2.1476693153381348, "logps/chosen": -382.38946533203125, "logps/rejected": -465.69561767578125, "loss": 0.5132, "rewards/accuracies": 0.71875, "rewards/chosen": -1.005793571472168, "rewards/margins": 0.7425030469894409, "rewards/rejected": -1.7482967376708984, "step": 230 }, { "epoch": 0.5, "learning_rate": 2.9181224366319943e-07, "logits/chosen": -2.1192374229431152, "logits/rejected": -2.0674259662628174, "logps/chosen": -391.3011474609375, "logps/rejected": -484.4254455566406, "loss": 0.5263, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.375982642173767, "rewards/margins": 0.8829982876777649, "rewards/rejected": -2.2589809894561768, "step": 240 }, { "epoch": 0.52, "learning_rate": 2.7370891215954565e-07, "logits/chosen": -2.1064059734344482, "logits/rejected": -2.0222904682159424, "logps/chosen": -397.3945007324219, "logps/rejected": -454.42340087890625, "loss": 0.5111, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3846924304962158, "rewards/margins": 0.8052938580513, "rewards/rejected": -2.18998646736145, "step": 250 }, { "epoch": 0.54, "learning_rate": 2.55479083351317e-07, "logits/chosen": -2.0734519958496094, "logits/rejected": -2.041645050048828, "logps/chosen": -403.8518371582031, "logps/rejected": -443.9764099121094, "loss": 0.5362, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.352430820465088, "rewards/margins": 0.6026407480239868, "rewards/rejected": -1.9550716876983643, "step": 260 }, { "epoch": 0.56, "learning_rate": 2.3722002126275822e-07, "logits/chosen": -2.0378193855285645, "logits/rejected": -2.006934881210327, "logps/chosen": -402.4918518066406, "logps/rejected": -457.62811279296875, "loss": 0.5152, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.2763839960098267, "rewards/margins": 0.744287371635437, "rewards/rejected": -2.0206713676452637, "step": 270 }, { "epoch": 0.59, "learning_rate": 2.19029145890313e-07, "logits/chosen": -2.033855438232422, "logits/rejected": -1.9725334644317627, "logps/chosen": -366.2498474121094, "logps/rejected": -433.2369079589844, "loss": 0.5284, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2484426498413086, "rewards/margins": 0.7818558812141418, "rewards/rejected": -2.0302984714508057, "step": 280 }, { "epoch": 0.61, "learning_rate": 2.0100351342479216e-07, "logits/chosen": -2.018475294113159, "logits/rejected": -1.949302077293396, "logps/chosen": -367.6812438964844, "logps/rejected": -429.4832458496094, "loss": 0.5041, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.210700273513794, "rewards/margins": 0.7417057752609253, "rewards/rejected": -1.9524061679840088, "step": 290 }, { "epoch": 0.63, "learning_rate": 1.8323929841460178e-07, "logits/chosen": -2.01090145111084, "logits/rejected": -1.9497419595718384, "logps/chosen": -396.8717956542969, "logps/rejected": -473.7056579589844, "loss": 0.5094, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.406031847000122, "rewards/margins": 0.7575126886367798, "rewards/rejected": -2.1635446548461914, "step": 300 }, { "epoch": 0.63, "eval_logits/chosen": -2.0371742248535156, "eval_logits/rejected": -1.9920138120651245, "eval_logps/chosen": -394.5289611816406, "eval_logps/rejected": -488.7168884277344, "eval_loss": 0.514569878578186, "eval_rewards/accuracies": 0.765625, "eval_rewards/chosen": -1.374893307685852, "eval_rewards/margins": 0.9387427568435669, "eval_rewards/rejected": -2.313636064529419, "eval_runtime": 53.0256, "eval_samples_per_second": 37.718, "eval_steps_per_second": 0.603, "step": 300 }, { "epoch": 0.65, "learning_rate": 1.6583128063291573e-07, "logits/chosen": -1.9743964672088623, "logits/rejected": -1.8795156478881836, "logps/chosen": -401.95098876953125, "logps/rejected": -473.07586669921875, "loss": 0.4934, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -1.2877211570739746, "rewards/margins": 0.9713341593742371, "rewards/rejected": -2.2590553760528564, "step": 310 }, { "epoch": 0.67, "learning_rate": 1.488723393865766e-07, "logits/chosen": -2.009753704071045, "logits/rejected": -1.9591827392578125, "logps/chosen": -424.99468994140625, "logps/rejected": -447.1941833496094, "loss": 0.5096, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.286123275756836, "rewards/margins": 0.8317530751228333, "rewards/rejected": -2.1178765296936035, "step": 320 }, { "epoch": 0.69, "learning_rate": 1.3245295796480788e-07, "logits/chosen": -2.021080493927002, "logits/rejected": -1.9558074474334717, "logps/chosen": -386.18670654296875, "logps/rejected": -441.7825622558594, "loss": 0.5108, "rewards/accuracies": 0.75, "rewards/chosen": -1.3397352695465088, "rewards/margins": 0.7417815923690796, "rewards/rejected": -2.081516742706299, "step": 330 }, { "epoch": 0.71, "learning_rate": 1.1666074087171627e-07, "logits/chosen": -1.9722802639007568, "logits/rejected": -1.9194387197494507, "logps/chosen": -390.5426330566406, "logps/rejected": -470.82958984375, "loss": 0.5234, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.3929929733276367, "rewards/margins": 0.8960745930671692, "rewards/rejected": -2.2890677452087402, "step": 340 }, { "epoch": 0.73, "learning_rate": 1.0157994641835734e-07, "logits/chosen": -1.9723567962646484, "logits/rejected": -1.9255586862564087, "logps/chosen": -353.8846740722656, "logps/rejected": -450.743408203125, "loss": 0.4932, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.279344916343689, "rewards/margins": 0.8211178779602051, "rewards/rejected": -2.1004626750946045, "step": 350 }, { "epoch": 0.75, "learning_rate": 8.729103716819111e-08, "logits/chosen": -1.9666541814804077, "logits/rejected": -1.8845767974853516, "logps/chosen": -398.8426818847656, "logps/rejected": -476.284912109375, "loss": 0.4746, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.3157447576522827, "rewards/margins": 1.0537182092666626, "rewards/rejected": -2.3694632053375244, "step": 360 }, { "epoch": 0.77, "learning_rate": 7.387025063449081e-08, "logits/chosen": -1.922550916671753, "logits/rejected": -1.8920552730560303, "logps/chosen": -385.36676025390625, "logps/rejected": -481.94219970703125, "loss": 0.4884, "rewards/accuracies": 0.75, "rewards/chosen": -1.3011926412582397, "rewards/margins": 0.9755498766899109, "rewards/rejected": -2.2767422199249268, "step": 370 }, { "epoch": 0.79, "learning_rate": 6.138919252022435e-08, "logits/chosen": -1.964270830154419, "logits/rejected": -1.9201478958129883, "logps/chosen": -422.5608825683594, "logps/rejected": -470.6983337402344, "loss": 0.4982, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.5673155784606934, "rewards/margins": 0.7714017629623413, "rewards/rejected": -2.338717460632324, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.991445467064689e-08, "logits/chosen": -1.9478000402450562, "logits/rejected": -1.9133468866348267, "logps/chosen": -396.41827392578125, "logps/rejected": -474.91168212890625, "loss": 0.4906, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.392188549041748, "rewards/margins": 0.9372695684432983, "rewards/rejected": -2.329457998275757, "step": 390 }, { "epoch": 0.84, "learning_rate": 3.9507259776993954e-08, "logits/chosen": -2.014727830886841, "logits/rejected": -1.9725955724716187, "logps/chosen": -433.93402099609375, "logps/rejected": -496.03948974609375, "loss": 0.5086, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.337740182876587, "rewards/margins": 0.89354008436203, "rewards/rejected": -2.231280565261841, "step": 400 }, { "epoch": 0.84, "eval_logits/chosen": -2.0268406867980957, "eval_logits/rejected": -1.9826929569244385, "eval_logps/chosen": -388.0500183105469, "eval_logps/rejected": -484.0532531738281, "eval_loss": 0.5034094452857971, "eval_rewards/accuracies": 0.76953125, "eval_rewards/chosen": -1.3101037740707397, "eval_rewards/margins": 0.9568960070610046, "eval_rewards/rejected": -2.2669999599456787, "eval_runtime": 53.0612, "eval_samples_per_second": 37.692, "eval_steps_per_second": 0.603, "step": 400 }, { "epoch": 0.86, "learning_rate": 3.022313472693447e-08, "logits/chosen": -1.9932317733764648, "logits/rejected": -1.9669653177261353, "logps/chosen": -391.12274169921875, "logps/rejected": -434.02191162109375, "loss": 0.5097, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3643336296081543, "rewards/margins": 0.6512311100959778, "rewards/rejected": -2.0155646800994873, "step": 410 }, { "epoch": 0.88, "learning_rate": 2.2111614344599684e-08, "logits/chosen": -2.064518928527832, "logits/rejected": -1.9801286458969116, "logps/chosen": -398.71868896484375, "logps/rejected": -479.0596618652344, "loss": 0.4848, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -1.2102010250091553, "rewards/margins": 1.0837668180465698, "rewards/rejected": -2.2939677238464355, "step": 420 }, { "epoch": 0.9, "learning_rate": 1.521597710086439e-08, "logits/chosen": -2.049975633621216, "logits/rejected": -1.996206521987915, "logps/chosen": -411.322509765625, "logps/rejected": -459.893798828125, "loss": 0.492, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.363693356513977, "rewards/margins": 0.7787196636199951, "rewards/rejected": -2.1424131393432617, "step": 430 }, { "epoch": 0.92, "learning_rate": 9.57301420397924e-09, "logits/chosen": -1.9763036966323853, "logits/rejected": -1.950627326965332, "logps/chosen": -419.8603515625, "logps/rejected": -458.17822265625, "loss": 0.4956, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.4159471988677979, "rewards/margins": 0.7630717158317566, "rewards/rejected": -2.179018974304199, "step": 440 }, { "epoch": 0.94, "learning_rate": 5.212833302556258e-09, "logits/chosen": -2.0032382011413574, "logits/rejected": -1.9466326236724854, "logps/chosen": -413.5555114746094, "logps/rejected": -492.5790100097656, "loss": 0.4873, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.3693794012069702, "rewards/margins": 0.907731831073761, "rewards/rejected": -2.277111291885376, "step": 450 }, { "epoch": 0.96, "learning_rate": 2.158697848236607e-09, "logits/chosen": -1.964643120765686, "logits/rejected": -1.9253301620483398, "logps/chosen": -396.19683837890625, "logps/rejected": -466.6449279785156, "loss": 0.4853, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.3089487552642822, "rewards/margins": 0.8880389332771301, "rewards/rejected": -2.1969876289367676, "step": 460 }, { "epoch": 0.98, "learning_rate": 4.269029751107489e-10, "logits/chosen": -2.0099263191223145, "logits/rejected": -1.9355924129486084, "logps/chosen": -420.68408203125, "logps/rejected": -471.353515625, "loss": 0.4977, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.3482139110565186, "rewards/margins": 1.0080922842025757, "rewards/rejected": -2.356306314468384, "step": 470 }, { "epoch": 1.0, "step": 478, "total_flos": 0.0, "train_loss": 0.5420855548092511, "train_runtime": 4282.9885, "train_samples_per_second": 14.274, "train_steps_per_second": 0.112 } ], "logging_steps": 10, "max_steps": 478, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "trial_name": null, "trial_params": null }