{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984301412872841, "eval_steps": 100, "global_step": 477, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -2.5889174938201904, "logits/rejected": -2.4813222885131836, "logps/chosen": -289.8450622558594, "logps/rejected": -264.9564514160156, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 1.0416666666666667e-06, "logits/chosen": -2.388092517852783, "logits/rejected": -2.4257497787475586, "logps/chosen": -260.3330078125, "logps/rejected": -219.36460876464844, "loss": 0.6925, "rewards/accuracies": 0.5277777910232544, "rewards/chosen": 0.006709899753332138, "rewards/margins": 0.001456400495953858, "rewards/rejected": 0.005253499373793602, "step": 10 }, { "epoch": 0.04, "learning_rate": 2.0833333333333334e-06, "logits/chosen": -2.4777274131774902, "logits/rejected": -2.4627153873443604, "logps/chosen": -269.85845947265625, "logps/rejected": -241.0397186279297, "loss": 0.6883, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.036987464874982834, "rewards/margins": 0.010376101359724998, "rewards/rejected": 0.026611363515257835, "step": 20 }, { "epoch": 0.06, "learning_rate": 3.125e-06, "logits/chosen": -2.4684128761291504, "logits/rejected": -2.4569170475006104, "logps/chosen": -248.72634887695312, "logps/rejected": -245.45999145507812, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": 0.04978276044130325, "rewards/margins": 0.02046312391757965, "rewards/rejected": 0.029319632798433304, "step": 30 }, { "epoch": 0.08, "learning_rate": 4.166666666666667e-06, "logits/chosen": -2.350592613220215, "logits/rejected": -2.3819007873535156, "logps/chosen": -286.8092041015625, "logps/rejected": -258.41522216796875, "loss": 0.6638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04874344915151596, "rewards/margins": 0.06839489936828613, "rewards/rejected": -0.019651446491479874, "step": 40 }, { "epoch": 0.1, "learning_rate": 4.999731868769027e-06, "logits/chosen": -2.371358871459961, "logits/rejected": -2.3549230098724365, "logps/chosen": -286.4510192871094, "logps/rejected": -292.4720153808594, "loss": 0.6532, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.040428485721349716, "rewards/margins": 0.11083565652370453, "rewards/rejected": -0.15126413106918335, "step": 50 }, { "epoch": 0.13, "learning_rate": 4.9903533134293035e-06, "logits/chosen": -2.3133959770202637, "logits/rejected": -2.2497916221618652, "logps/chosen": -266.2447204589844, "logps/rejected": -250.51513671875, "loss": 0.6381, "rewards/accuracies": 0.6875, "rewards/chosen": -0.177861288189888, "rewards/margins": 0.15977302193641663, "rewards/rejected": -0.33763426542282104, "step": 60 }, { "epoch": 0.15, "learning_rate": 4.967625656594782e-06, "logits/chosen": -2.359027624130249, "logits/rejected": -2.3221073150634766, "logps/chosen": -293.37640380859375, "logps/rejected": -272.8565979003906, "loss": 0.6222, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.28658825159072876, "rewards/margins": 0.1545184701681137, "rewards/rejected": -0.4411067068576813, "step": 70 }, { "epoch": 0.17, "learning_rate": 4.93167072587771e-06, "logits/chosen": -2.2765626907348633, "logits/rejected": -2.2555174827575684, "logps/chosen": -295.1297607421875, "logps/rejected": -313.0910339355469, "loss": 0.6196, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.2227451503276825, "rewards/margins": 0.2126256674528122, "rewards/rejected": -0.43537086248397827, "step": 80 }, { "epoch": 0.19, "learning_rate": 4.882681251368549e-06, "logits/chosen": -2.245281219482422, "logits/rejected": -2.227741241455078, "logps/chosen": -269.5973205566406, "logps/rejected": -291.8226318359375, "loss": 0.5989, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3465212285518646, "rewards/margins": 0.26312902569770813, "rewards/rejected": -0.6096502542495728, "step": 90 }, { "epoch": 0.21, "learning_rate": 4.8209198325401815e-06, "logits/chosen": -2.2300758361816406, "logits/rejected": -2.1660382747650146, "logps/chosen": -315.1083068847656, "logps/rejected": -333.6819763183594, "loss": 0.5985, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5296173691749573, "rewards/margins": 0.28275665640830994, "rewards/rejected": -0.8123741149902344, "step": 100 }, { "epoch": 0.21, "eval_logits/chosen": -1.999224066734314, "eval_logits/rejected": -2.0060811042785645, "eval_logps/chosen": -312.6618347167969, "eval_logps/rejected": -347.36639404296875, "eval_loss": 0.6166529059410095, "eval_rewards/accuracies": 0.703125, "eval_rewards/chosen": -0.6621690392494202, "eval_rewards/margins": 0.3358937203884125, "eval_rewards/rejected": -0.9980627298355103, "eval_runtime": 73.6215, "eval_samples_per_second": 27.166, "eval_steps_per_second": 0.435, "step": 100 }, { "epoch": 0.23, "learning_rate": 4.746717530629565e-06, "logits/chosen": -2.0477728843688965, "logits/rejected": -2.079465389251709, "logps/chosen": -331.4808349609375, "logps/rejected": -322.7523193359375, "loss": 0.603, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.47515854239463806, "rewards/margins": 0.4070638120174408, "rewards/rejected": -0.8822224736213684, "step": 110 }, { "epoch": 0.25, "learning_rate": 4.660472094042121e-06, "logits/chosen": -1.9152177572250366, "logits/rejected": -1.9233732223510742, "logps/chosen": -306.1752014160156, "logps/rejected": -293.25958251953125, "loss": 0.5842, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.44953522086143494, "rewards/margins": 0.3475509285926819, "rewards/rejected": -0.7970861196517944, "step": 120 }, { "epoch": 0.27, "learning_rate": 4.5626458262912745e-06, "logits/chosen": -1.70361328125, "logits/rejected": -1.6728603839874268, "logps/chosen": -318.77362060546875, "logps/rejected": -358.8970642089844, "loss": 0.5741, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.5537757873535156, "rewards/margins": 0.4414924681186676, "rewards/rejected": -0.9952683448791504, "step": 130 }, { "epoch": 0.29, "learning_rate": 4.453763107901676e-06, "logits/chosen": -1.389970302581787, "logits/rejected": -1.3908889293670654, "logps/chosen": -365.573974609375, "logps/rejected": -389.08026123046875, "loss": 0.5673, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.8941513895988464, "rewards/margins": 0.5348166823387146, "rewards/rejected": -1.4289681911468506, "step": 140 }, { "epoch": 0.31, "learning_rate": 4.33440758555951e-06, "logits/chosen": -1.2829688787460327, "logits/rejected": -1.1666593551635742, "logps/chosen": -331.8448181152344, "logps/rejected": -347.62164306640625, "loss": 0.5587, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.8844560384750366, "rewards/margins": 0.41986608505249023, "rewards/rejected": -1.304322361946106, "step": 150 }, { "epoch": 0.33, "learning_rate": 4.205219043576955e-06, "logits/chosen": -0.9949747323989868, "logits/rejected": -0.5801770687103271, "logps/chosen": -321.4789123535156, "logps/rejected": -394.52911376953125, "loss": 0.5409, "rewards/accuracies": 0.75, "rewards/chosen": -0.5671305060386658, "rewards/margins": 0.7361670732498169, "rewards/rejected": -1.3032976388931274, "step": 160 }, { "epoch": 0.36, "learning_rate": 4.066889974440757e-06, "logits/chosen": -0.5876813530921936, "logits/rejected": -0.3649616539478302, "logps/chosen": -293.5026550292969, "logps/rejected": -311.59423828125, "loss": 0.5916, "rewards/accuracies": 0.65625, "rewards/chosen": -0.5926370024681091, "rewards/margins": 0.5672849416732788, "rewards/rejected": -1.1599220037460327, "step": 170 }, { "epoch": 0.38, "learning_rate": 3.92016186682789e-06, "logits/chosen": -0.8265250325202942, "logits/rejected": -0.7326392531394958, "logps/chosen": -324.5079650878906, "logps/rejected": -388.0706481933594, "loss": 0.5533, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7355499863624573, "rewards/margins": 0.6323290467262268, "rewards/rejected": -1.3678789138793945, "step": 180 }, { "epoch": 0.4, "learning_rate": 3.7658212309857576e-06, "logits/chosen": -0.7887569665908813, "logits/rejected": -0.5450983643531799, "logps/chosen": -341.79412841796875, "logps/rejected": -386.400146484375, "loss": 0.5368, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.7941572666168213, "rewards/margins": 0.5156592130661011, "rewards/rejected": -1.309816598892212, "step": 190 }, { "epoch": 0.42, "learning_rate": 3.604695382782159e-06, "logits/chosen": -0.2513345181941986, "logits/rejected": -0.26383644342422485, "logps/chosen": -360.3321228027344, "logps/rejected": -372.03326416015625, "loss": 0.5302, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8325196504592896, "rewards/margins": 0.6745045185089111, "rewards/rejected": -1.5070240497589111, "step": 200 }, { "epoch": 0.42, "eval_logits/chosen": 0.4000808000564575, "eval_logits/rejected": 0.31158187985420227, "eval_logps/chosen": -334.02044677734375, "eval_logps/rejected": -407.42919921875, "eval_loss": 0.54950350522995, "eval_rewards/accuracies": 0.74609375, "eval_rewards/chosen": -0.8757554292678833, "eval_rewards/margins": 0.7229353189468384, "eval_rewards/rejected": -1.5986907482147217, "eval_runtime": 72.2579, "eval_samples_per_second": 27.679, "eval_steps_per_second": 0.443, "step": 200 }, { "epoch": 0.44, "learning_rate": 3.437648009023905e-06, "logits/chosen": -0.355979859828949, "logits/rejected": -0.13378120958805084, "logps/chosen": -354.21832275390625, "logps/rejected": -387.26171875, "loss": 0.5542, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7991288304328918, "rewards/margins": 0.5489404201507568, "rewards/rejected": -1.348069190979004, "step": 210 }, { "epoch": 0.46, "learning_rate": 3.265574537815398e-06, "logits/chosen": -0.2700692415237427, "logits/rejected": -0.08166289329528809, "logps/chosen": -290.8014831542969, "logps/rejected": -332.3136901855469, "loss": 0.569, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.608197033405304, "rewards/margins": 0.507323145866394, "rewards/rejected": -1.1155202388763428, "step": 220 }, { "epoch": 0.48, "learning_rate": 3.089397338773569e-06, "logits/chosen": -0.06517831236124039, "logits/rejected": 0.004061543848365545, "logps/chosen": -317.51495361328125, "logps/rejected": -371.9200439453125, "loss": 0.5576, "rewards/accuracies": 0.71875, "rewards/chosen": -0.7464595437049866, "rewards/margins": 0.5365883111953735, "rewards/rejected": -1.2830479145050049, "step": 230 }, { "epoch": 0.5, "learning_rate": 2.9100607788275547e-06, "logits/chosen": -0.13817472755908966, "logits/rejected": -0.04675758630037308, "logps/chosen": -334.0376281738281, "logps/rejected": -356.2500915527344, "loss": 0.5592, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.908129096031189, "rewards/margins": 0.4950867295265198, "rewards/rejected": -1.403215765953064, "step": 240 }, { "epoch": 0.52, "learning_rate": 2.72852616010567e-06, "logits/chosen": -0.0858619436621666, "logits/rejected": 0.23496215045452118, "logps/chosen": -347.7973327636719, "logps/rejected": -415.5159606933594, "loss": 0.539, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.9283957481384277, "rewards/margins": 0.7460072636604309, "rewards/rejected": -1.6744029521942139, "step": 250 }, { "epoch": 0.54, "learning_rate": 2.5457665670441937e-06, "logits/chosen": 0.35799893736839294, "logits/rejected": 0.7192636728286743, "logps/chosen": -364.86138916015625, "logps/rejected": -400.7781066894531, "loss": 0.5156, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.0014389753341675, "rewards/margins": 0.67198646068573, "rewards/rejected": -1.673425316810608, "step": 260 }, { "epoch": 0.57, "learning_rate": 2.3627616503391813e-06, "logits/chosen": 0.7610759735107422, "logits/rejected": 0.6359414458274841, "logps/chosen": -394.61541748046875, "logps/rejected": -451.08563232421875, "loss": 0.5183, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.1308982372283936, "rewards/margins": 0.7238563299179077, "rewards/rejected": -1.8547546863555908, "step": 270 }, { "epoch": 0.59, "learning_rate": 2.1804923757009885e-06, "logits/chosen": 1.2029451131820679, "logits/rejected": 1.3649919033050537, "logps/chosen": -380.61932373046875, "logps/rejected": -396.6190490722656, "loss": 0.5485, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.2345253229141235, "rewards/margins": 0.6180087327957153, "rewards/rejected": -1.8525340557098389, "step": 280 }, { "epoch": 0.61, "learning_rate": 1.9999357655598894e-06, "logits/chosen": 0.7018269300460815, "logits/rejected": 0.5022421479225159, "logps/chosen": -329.9044189453125, "logps/rejected": -380.2184143066406, "loss": 0.5464, "rewards/accuracies": 0.71875, "rewards/chosen": -0.9144694209098816, "rewards/margins": 0.6528812050819397, "rewards/rejected": -1.5673506259918213, "step": 290 }, { "epoch": 0.63, "learning_rate": 1.8220596619089576e-06, "logits/chosen": 0.12925171852111816, "logits/rejected": 0.4913701117038727, "logps/chosen": -334.60748291015625, "logps/rejected": -398.9501647949219, "loss": 0.533, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.7113243341445923, "rewards/margins": 0.6202197670936584, "rewards/rejected": -1.3315439224243164, "step": 300 }, { "epoch": 0.63, "eval_logits/chosen": 0.6809147000312805, "eval_logits/rejected": 0.5716233849525452, "eval_logps/chosen": -327.86053466796875, "eval_logps/rejected": -399.1313171386719, "eval_loss": 0.5384255051612854, "eval_rewards/accuracies": 0.76171875, "eval_rewards/chosen": -0.8141559362411499, "eval_rewards/margins": 0.7015555500984192, "eval_rewards/rejected": -1.5157114267349243, "eval_runtime": 72.3068, "eval_samples_per_second": 27.66, "eval_steps_per_second": 0.443, "step": 300 }, { "epoch": 0.65, "learning_rate": 1.647817538357072e-06, "logits/chosen": 0.2949855327606201, "logits/rejected": 0.6500759124755859, "logps/chosen": -364.088134765625, "logps/rejected": -378.37872314453125, "loss": 0.5199, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0496848821640015, "rewards/margins": 0.6011860966682434, "rewards/rejected": -1.6508712768554688, "step": 310 }, { "epoch": 0.67, "learning_rate": 1.4781433892011132e-06, "logits/chosen": 1.0437233448028564, "logits/rejected": 0.9824169278144836, "logps/chosen": -368.3329772949219, "logps/rejected": -443.03125, "loss": 0.5141, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.1911169290542603, "rewards/margins": 0.9159032702445984, "rewards/rejected": -2.107020139694214, "step": 320 }, { "epoch": 0.69, "learning_rate": 1.3139467229135999e-06, "logits/chosen": 0.43222084641456604, "logits/rejected": 0.5403100252151489, "logps/chosen": -406.03656005859375, "logps/rejected": -415.65667724609375, "loss": 0.5332, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.136765718460083, "rewards/margins": 0.6447176337242126, "rewards/rejected": -1.7814832925796509, "step": 330 }, { "epoch": 0.71, "learning_rate": 1.1561076868822756e-06, "logits/chosen": 0.783063530921936, "logits/rejected": 0.8185106515884399, "logps/chosen": -352.68560791015625, "logps/rejected": -389.525390625, "loss": 0.5327, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0253931283950806, "rewards/margins": 0.48898547887802124, "rewards/rejected": -1.514378547668457, "step": 340 }, { "epoch": 0.73, "learning_rate": 1.0054723495346484e-06, "logits/chosen": 0.8168965578079224, "logits/rejected": 0.7234944105148315, "logps/chosen": -320.1710510253906, "logps/rejected": -381.6952209472656, "loss": 0.5137, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.005994200706482, "rewards/margins": 0.8249969482421875, "rewards/rejected": -1.8309911489486694, "step": 350 }, { "epoch": 0.75, "learning_rate": 8.628481651367876e-07, "logits/chosen": 0.688895046710968, "logits/rejected": 1.0163129568099976, "logps/chosen": -380.68157958984375, "logps/rejected": -419.84234619140625, "loss": 0.5228, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.1236674785614014, "rewards/margins": 0.698872447013855, "rewards/rejected": -1.8225399255752563, "step": 360 }, { "epoch": 0.77, "learning_rate": 7.289996455765749e-07, "logits/chosen": 0.6197929382324219, "logits/rejected": 0.7506722807884216, "logps/chosen": -420.45263671875, "logps/rejected": -426.29107666015625, "loss": 0.5353, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.1160409450531006, "rewards/margins": 0.6991242170333862, "rewards/rejected": -1.8151648044586182, "step": 370 }, { "epoch": 0.8, "learning_rate": 6.046442623320145e-07, "logits/chosen": 0.5390521287918091, "logits/rejected": 0.5552736520767212, "logps/chosen": -365.60577392578125, "logps/rejected": -421.53564453125, "loss": 0.5076, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.0088932514190674, "rewards/margins": 0.6299774050712585, "rewards/rejected": -1.6388708353042603, "step": 380 }, { "epoch": 0.82, "learning_rate": 4.904486005914027e-07, "logits/chosen": 0.7070841789245605, "logits/rejected": 0.8798855543136597, "logps/chosen": -369.456787109375, "logps/rejected": -406.3539123535156, "loss": 0.5334, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.0536937713623047, "rewards/margins": 0.5137845277786255, "rewards/rejected": -1.5674784183502197, "step": 390 }, { "epoch": 0.84, "learning_rate": 3.8702478614051353e-07, "logits/chosen": 0.5031794905662537, "logits/rejected": 0.47197189927101135, "logps/chosen": -409.0977478027344, "logps/rejected": -434.04693603515625, "loss": 0.518, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -1.1779779195785522, "rewards/margins": 0.5962620973587036, "rewards/rejected": -1.7742401361465454, "step": 400 }, { "epoch": 0.84, "eval_logits/chosen": 1.195478081703186, "eval_logits/rejected": 1.1053041219711304, "eval_logps/chosen": -351.9891662597656, "eval_logps/rejected": -432.54376220703125, "eval_loss": 0.5275784730911255, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.0554425716400146, "eval_rewards/margins": 0.7943933010101318, "eval_rewards/rejected": -1.849835753440857, "eval_runtime": 72.3032, "eval_samples_per_second": 27.661, "eval_steps_per_second": 0.443, "step": 400 }, { "epoch": 0.86, "learning_rate": 2.9492720416985004e-07, "logits/chosen": 0.8437727093696594, "logits/rejected": 0.800951361656189, "logps/chosen": -389.89471435546875, "logps/rejected": -402.897705078125, "loss": 0.536, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.1922531127929688, "rewards/margins": 0.5932148694992065, "rewards/rejected": -1.7854681015014648, "step": 410 }, { "epoch": 0.88, "learning_rate": 2.1464952759020857e-07, "logits/chosen": 0.8531819581985474, "logits/rejected": 0.7011052966117859, "logps/chosen": -365.4083557128906, "logps/rejected": -429.77392578125, "loss": 0.5293, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -1.1621034145355225, "rewards/margins": 0.5474345684051514, "rewards/rejected": -1.7095378637313843, "step": 420 }, { "epoch": 0.9, "learning_rate": 1.4662207078575685e-07, "logits/chosen": 0.9001744389533997, "logits/rejected": 0.7595891356468201, "logps/chosen": -376.25653076171875, "logps/rejected": -414.7420959472656, "loss": 0.5296, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.0551135540008545, "rewards/margins": 0.7705513834953308, "rewards/rejected": -1.825664758682251, "step": 430 }, { "epoch": 0.92, "learning_rate": 9.120948298936422e-08, "logits/chosen": 0.6783544421195984, "logits/rejected": 0.9758931994438171, "logps/chosen": -374.05902099609375, "logps/rejected": -429.77825927734375, "loss": 0.5249, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.049117088317871, "rewards/margins": 0.7034090161323547, "rewards/rejected": -1.752526044845581, "step": 440 }, { "epoch": 0.94, "learning_rate": 4.870879364444109e-08, "logits/chosen": 0.8478315472602844, "logits/rejected": 0.7550174593925476, "logps/chosen": -369.22027587890625, "logps/rejected": -430.88873291015625, "loss": 0.5406, "rewards/accuracies": 0.71875, "rewards/chosen": -1.088853120803833, "rewards/margins": 0.6529593467712402, "rewards/rejected": -1.7418124675750732, "step": 450 }, { "epoch": 0.96, "learning_rate": 1.93478202307823e-08, "logits/chosen": 0.6956031918525696, "logits/rejected": 0.911687970161438, "logps/chosen": -368.9820251464844, "logps/rejected": -417.018310546875, "loss": 0.516, "rewards/accuracies": 0.78125, "rewards/chosen": -1.0993984937667847, "rewards/margins": 0.6686335206031799, "rewards/rejected": -1.7680320739746094, "step": 460 }, { "epoch": 0.98, "learning_rate": 3.283947088983663e-09, "logits/chosen": 0.7104119658470154, "logits/rejected": 0.7404820322990417, "logps/chosen": -359.8403625488281, "logps/rejected": -435.29803466796875, "loss": 0.509, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0412371158599854, "rewards/margins": 0.8249354362487793, "rewards/rejected": -1.8661725521087646, "step": 470 }, { "epoch": 1.0, "step": 477, "total_flos": 0.0, "train_loss": 0.5624207920498319, "train_runtime": 4926.1363, "train_samples_per_second": 12.41, "train_steps_per_second": 0.097 } ], "logging_steps": 10, "max_steps": 477, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }