{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 625, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.936507936507937e-08, "logits/chosen": -0.2010916769504547, "logits/rejected": 0.09005054831504822, "logps/chosen": -540.942626953125, "logps/rejected": -796.8775634765625, "loss": 0.2182, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.02, "learning_rate": 7.936507936507937e-07, "logits/chosen": -0.15247675776481628, "logits/rejected": -0.14707261323928833, "logps/chosen": -501.8849792480469, "logps/rejected": -774.5216064453125, "loss": 0.2086, "rewards/accuracies": 0.375, "rewards/chosen": 3.564198050298728e-05, "rewards/margins": -2.1308711438905448e-05, "rewards/rejected": 5.695069921785034e-05, "step": 10 }, { "epoch": 0.03, "learning_rate": 1.5873015873015873e-06, "logits/chosen": -0.1420287936925888, "logits/rejected": -0.10379602760076523, "logps/chosen": -489.9579162597656, "logps/rejected": -771.2081298828125, "loss": 0.2095, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0012544477358460426, "rewards/margins": 0.0017321283230558038, "rewards/rejected": -0.0029865759424865246, "step": 20 }, { "epoch": 0.05, "learning_rate": 2.380952380952381e-06, "logits/chosen": -0.16283434629440308, "logits/rejected": -0.14502206444740295, "logps/chosen": -532.1380615234375, "logps/rejected": -800.226806640625, "loss": 0.1996, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.007352855056524277, "rewards/margins": 0.009955727495253086, "rewards/rejected": -0.017308581620454788, "step": 30 }, { "epoch": 0.06, "learning_rate": 3.1746031746031746e-06, "logits/chosen": -0.19371934235095978, "logits/rejected": -0.15917012095451355, "logps/chosen": -529.4437255859375, "logps/rejected": -794.9968872070312, "loss": 0.1897, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.028293650597333908, "rewards/margins": 0.02773173525929451, "rewards/rejected": -0.05602538585662842, "step": 40 }, { "epoch": 0.08, "learning_rate": 3.968253968253968e-06, "logits/chosen": -0.16935278475284576, "logits/rejected": -0.12196620553731918, "logps/chosen": -556.66064453125, "logps/rejected": -913.4127197265625, "loss": 0.1623, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.06831763684749603, "rewards/margins": 0.09133367240428925, "rewards/rejected": -0.15965132415294647, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.761904761904762e-06, "logits/chosen": -0.18819646537303925, "logits/rejected": -0.21151557564735413, "logps/chosen": -641.8372802734375, "logps/rejected": -1064.3094482421875, "loss": 0.1532, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.14167475700378418, "rewards/margins": 0.16748657822608948, "rewards/rejected": -0.30916133522987366, "step": 60 }, { "epoch": 0.11, "learning_rate": 4.998086282661188e-06, "logits/chosen": -0.19726331532001495, "logits/rejected": -0.2387177050113678, "logps/chosen": -630.7346801757812, "logps/rejected": -1035.373046875, "loss": 0.1511, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.13452503085136414, "rewards/margins": 0.14926694333553314, "rewards/rejected": -0.2837919592857361, "step": 70 }, { "epoch": 0.13, "learning_rate": 4.988720025682995e-06, "logits/chosen": -0.23227183520793915, "logits/rejected": -0.15486109256744385, "logps/chosen": -612.7208862304688, "logps/rejected": -1018.8267822265625, "loss": 0.142, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.11648458242416382, "rewards/margins": 0.137832373380661, "rewards/rejected": -0.25431695580482483, "step": 80 }, { "epoch": 0.14, "learning_rate": 4.9715789537359126e-06, "logits/chosen": -0.269733726978302, "logits/rejected": -0.2005140334367752, "logps/chosen": -666.8603515625, "logps/rejected": -959.771484375, "loss": 0.1449, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.1299312561750412, "rewards/margins": 0.12538622319698334, "rewards/rejected": -0.25531744956970215, "step": 90 }, { "epoch": 0.16, "learning_rate": 4.946716615897932e-06, "logits/chosen": -0.21513569355010986, "logits/rejected": -0.22341570258140564, "logps/chosen": -636.3090209960938, "logps/rejected": -982.4715576171875, "loss": 0.1376, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.1491270661354065, "rewards/margins": 0.12507781386375427, "rewards/rejected": -0.27420490980148315, "step": 100 }, { "epoch": 0.18, "learning_rate": 4.9142106826480114e-06, "logits/chosen": -0.26347213983535767, "logits/rejected": -0.1899401843547821, "logps/chosen": -682.718017578125, "logps/rejected": -1051.3671875, "loss": 0.1456, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.1585748940706253, "rewards/margins": 0.13942097127437592, "rewards/rejected": -0.2979958653450012, "step": 110 }, { "epoch": 0.19, "learning_rate": 4.874162703221823e-06, "logits/chosen": -0.28930234909057617, "logits/rejected": -0.21862976253032684, "logps/chosen": -624.7803344726562, "logps/rejected": -1044.067626953125, "loss": 0.1322, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.1492077112197876, "rewards/margins": 0.15546968579292297, "rewards/rejected": -0.3046773374080658, "step": 120 }, { "epoch": 0.21, "learning_rate": 4.826697788369752e-06, "logits/chosen": -0.2196696251630783, "logits/rejected": -0.22875969111919403, "logps/chosen": -621.0065307617188, "logps/rejected": -944.654296875, "loss": 0.1432, "rewards/accuracies": 0.71875, "rewards/chosen": -0.15334677696228027, "rewards/margins": 0.11417926847934723, "rewards/rejected": -0.2675260007381439, "step": 130 }, { "epoch": 0.22, "learning_rate": 4.7719642195082224e-06, "logits/chosen": -0.23348715901374817, "logits/rejected": -0.21704678237438202, "logps/chosen": -599.64453125, "logps/rejected": -991.4964599609375, "loss": 0.1471, "rewards/accuracies": 0.75, "rewards/chosen": -0.13741596043109894, "rewards/margins": 0.13189604878425598, "rewards/rejected": -0.2693119943141937, "step": 140 }, { "epoch": 0.24, "learning_rate": 4.710132985485355e-06, "logits/chosen": -0.2200555056333542, "logits/rejected": -0.20334219932556152, "logps/chosen": -661.56884765625, "logps/rejected": -1054.980712890625, "loss": 0.1376, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.16635623574256897, "rewards/margins": 0.14565840363502502, "rewards/rejected": -0.312014639377594, "step": 150 }, { "epoch": 0.26, "learning_rate": 4.641397248408122e-06, "logits/chosen": -0.20752374827861786, "logits/rejected": -0.24948814511299133, "logps/chosen": -719.3741455078125, "logps/rejected": -1020.0384521484375, "loss": 0.1338, "rewards/accuracies": 0.78125, "rewards/chosen": -0.1749049723148346, "rewards/margins": 0.13863129913806915, "rewards/rejected": -0.31353622674942017, "step": 160 }, { "epoch": 0.27, "learning_rate": 4.5659717401997655e-06, "logits/chosen": -0.2512062191963196, "logits/rejected": -0.25224044919013977, "logps/chosen": -652.2489624023438, "logps/rejected": -1064.6927490234375, "loss": 0.1404, "rewards/accuracies": 0.78125, "rewards/chosen": -0.16943642497062683, "rewards/margins": 0.14737632870674133, "rewards/rejected": -0.3168127238750458, "step": 170 }, { "epoch": 0.29, "learning_rate": 4.4840920917726425e-06, "logits/chosen": -0.26564353704452515, "logits/rejected": -0.31829017400741577, "logps/chosen": -643.8062744140625, "logps/rejected": -1146.453857421875, "loss": 0.1238, "rewards/accuracies": 0.78125, "rewards/chosen": -0.1721421331167221, "rewards/margins": 0.17220233380794525, "rewards/rejected": -0.34434446692466736, "step": 180 }, { "epoch": 0.3, "learning_rate": 4.396014096912182e-06, "logits/chosen": -0.30384334921836853, "logits/rejected": -0.3049886226654053, "logps/chosen": -700.3992919921875, "logps/rejected": -1094.860107421875, "loss": 0.1293, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.190101757645607, "rewards/margins": 0.16493478417396545, "rewards/rejected": -0.35503652691841125, "step": 190 }, { "epoch": 0.32, "learning_rate": 4.302012913171584e-06, "logits/chosen": -0.22822122275829315, "logits/rejected": -0.21796974539756775, "logps/chosen": -703.4880981445312, "logps/rejected": -1098.6363525390625, "loss": 0.1291, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18932084739208221, "rewards/margins": 0.18700966238975525, "rewards/rejected": -0.37633052468299866, "step": 200 }, { "epoch": 0.34, "learning_rate": 4.202382202273702e-06, "logits/chosen": -0.2829930782318115, "logits/rejected": -0.2941269278526306, "logps/chosen": -712.0001220703125, "logps/rejected": -1121.9896240234375, "loss": 0.1212, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.19748489558696747, "rewards/margins": 0.1557844579219818, "rewards/rejected": -0.3532693684101105, "step": 210 }, { "epoch": 0.35, "learning_rate": 4.097433212705492e-06, "logits/chosen": -0.2563570737838745, "logits/rejected": -0.20130082964897156, "logps/chosen": -719.8380126953125, "logps/rejected": -1181.190185546875, "loss": 0.1349, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.2003975212574005, "rewards/margins": 0.1832597851753235, "rewards/rejected": -0.3836573362350464, "step": 220 }, { "epoch": 0.37, "learning_rate": 3.987493807371033e-06, "logits/chosen": -0.19761434197425842, "logits/rejected": -0.24285447597503662, "logps/chosen": -713.5037841796875, "logps/rejected": -1127.9371337890625, "loss": 0.1377, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.20124192535877228, "rewards/margins": 0.16942095756530762, "rewards/rejected": -0.3706628680229187, "step": 230 }, { "epoch": 0.38, "learning_rate": 3.872907439340758e-06, "logits/chosen": -0.22070392966270447, "logits/rejected": -0.2395116537809372, "logps/chosen": -689.1747436523438, "logps/rejected": -1246.12646484375, "loss": 0.1219, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.18724720180034637, "rewards/margins": 0.221848726272583, "rewards/rejected": -0.4090959429740906, "step": 240 }, { "epoch": 0.4, "learning_rate": 3.75403207889666e-06, "logits/chosen": -0.2952207028865814, "logits/rejected": -0.286382257938385, "logps/chosen": -680.5881958007812, "logps/rejected": -1075.620849609375, "loss": 0.1351, "rewards/accuracies": 0.78125, "rewards/chosen": -0.20417407155036926, "rewards/margins": 0.16537046432495117, "rewards/rejected": -0.36954453587532043, "step": 250 }, { "epoch": 0.42, "learning_rate": 3.631239095225417e-06, "logits/chosen": -0.28091198205947876, "logits/rejected": -0.25016146898269653, "logps/chosen": -717.04150390625, "logps/rejected": -1180.3306884765625, "loss": 0.1185, "rewards/accuracies": 0.78125, "rewards/chosen": -0.2006029188632965, "rewards/margins": 0.1787930279970169, "rewards/rejected": -0.3793959617614746, "step": 260 }, { "epoch": 0.43, "learning_rate": 3.5049120962530608e-06, "logits/chosen": -0.23318138718605042, "logits/rejected": -0.1964143067598343, "logps/chosen": -722.5912475585938, "logps/rejected": -1105.978759765625, "loss": 0.1251, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.20104451477527618, "rewards/margins": 0.16880543529987335, "rewards/rejected": -0.36984992027282715, "step": 270 }, { "epoch": 0.45, "learning_rate": 3.3754457302455464e-06, "logits/chosen": -0.21553221344947815, "logits/rejected": -0.2545991837978363, "logps/chosen": -708.6617431640625, "logps/rejected": -1200.54248046875, "loss": 0.1241, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1849481165409088, "rewards/margins": 0.22220449149608612, "rewards/rejected": -0.40715259313583374, "step": 280 }, { "epoch": 0.46, "learning_rate": 3.2432444529190714e-06, "logits/chosen": -0.2878536283969879, "logits/rejected": -0.240807443857193, "logps/chosen": -685.4102783203125, "logps/rejected": -1073.6112060546875, "loss": 0.1278, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.19928036630153656, "rewards/margins": 0.167768195271492, "rewards/rejected": -0.36704859137535095, "step": 290 }, { "epoch": 0.48, "learning_rate": 3.1087212639117057e-06, "logits/chosen": -0.22543080151081085, "logits/rejected": -0.25053372979164124, "logps/chosen": -713.0574340820312, "logps/rejected": -1165.50244140625, "loss": 0.1199, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20086824893951416, "rewards/margins": 0.19784381985664368, "rewards/rejected": -0.39871203899383545, "step": 300 }, { "epoch": 0.5, "learning_rate": 2.9722964165636263e-06, "logits/chosen": -0.2477823793888092, "logits/rejected": -0.23944005370140076, "logps/chosen": -741.0659790039062, "logps/rejected": -1150.4737548828125, "loss": 0.1334, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21726730465888977, "rewards/margins": 0.18077899515628815, "rewards/rejected": -0.3980463147163391, "step": 310 }, { "epoch": 0.51, "learning_rate": 2.8343961050366275e-06, "logits/chosen": -0.206426739692688, "logits/rejected": -0.294622004032135, "logps/chosen": -698.1879272460938, "logps/rejected": -1074.371337890625, "loss": 0.1279, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.1868043690919876, "rewards/margins": 0.1699267327785492, "rewards/rejected": -0.3567310869693756, "step": 320 }, { "epoch": 0.53, "learning_rate": 2.695451132874385e-06, "logits/chosen": -0.2724359631538391, "logits/rejected": -0.21938621997833252, "logps/chosen": -708.3114013671875, "logps/rejected": -1099.7318115234375, "loss": 0.1196, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.18547296524047852, "rewards/margins": 0.1704384684562683, "rewards/rejected": -0.3559114336967468, "step": 330 }, { "epoch": 0.54, "learning_rate": 2.5558955671628964e-06, "logits/chosen": -0.25434714555740356, "logits/rejected": -0.25090768933296204, "logps/chosen": -695.5904541015625, "logps/rejected": -1176.5301513671875, "loss": 0.131, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.21099016070365906, "rewards/margins": 0.20053663849830627, "rewards/rejected": -0.4115268290042877, "step": 340 }, { "epoch": 0.56, "learning_rate": 2.4161653824955654e-06, "logits/chosen": -0.28552037477493286, "logits/rejected": -0.24306419491767883, "logps/chosen": -764.7316284179688, "logps/rejected": -1231.4193115234375, "loss": 0.1166, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.24171674251556396, "rewards/margins": 0.21239931881427765, "rewards/rejected": -0.4541160464286804, "step": 350 }, { "epoch": 0.58, "learning_rate": 2.2766970989791697e-06, "logits/chosen": -0.25456491112709045, "logits/rejected": -0.21178500354290009, "logps/chosen": -765.3328247070312, "logps/rejected": -1261.208984375, "loss": 0.1103, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22941815853118896, "rewards/margins": 0.21526849269866943, "rewards/rejected": -0.4446867108345032, "step": 360 }, { "epoch": 0.59, "learning_rate": 2.1379264185356545e-06, "logits/chosen": -0.2724260687828064, "logits/rejected": -0.18538828194141388, "logps/chosen": -741.7236328125, "logps/rejected": -1078.375732421875, "loss": 0.1161, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.21514591574668884, "rewards/margins": 0.16624660789966583, "rewards/rejected": -0.38139253854751587, "step": 370 }, { "epoch": 0.61, "learning_rate": 2.000286863759934e-06, "logits/chosen": -0.27519670128822327, "logits/rejected": -0.2832408845424652, "logps/chosen": -682.8367309570312, "logps/rejected": -1131.5003662109375, "loss": 0.1198, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.20102819800376892, "rewards/margins": 0.18376831710338593, "rewards/rejected": -0.38479650020599365, "step": 380 }, { "epoch": 0.62, "learning_rate": 1.8642084235859764e-06, "logits/chosen": -0.2652140259742737, "logits/rejected": -0.2889960706233978, "logps/chosen": -731.1627807617188, "logps/rejected": -1137.467041015625, "loss": 0.1208, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19975660741329193, "rewards/margins": 0.21678335964679718, "rewards/rejected": -0.4165399968624115, "step": 390 }, { "epoch": 0.64, "learning_rate": 1.7301162099921013e-06, "logits/chosen": -0.26221686601638794, "logits/rejected": -0.2641231119632721, "logps/chosen": -755.5460205078125, "logps/rejected": -1093.4853515625, "loss": 0.1228, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.21798157691955566, "rewards/margins": 0.17120864987373352, "rewards/rejected": -0.3891902267932892, "step": 400 }, { "epoch": 0.66, "learning_rate": 1.5984291299420117e-06, "logits/chosen": -0.2737795114517212, "logits/rejected": -0.26781201362609863, "logps/chosen": -703.7815551757812, "logps/rejected": -1142.2947998046875, "loss": 0.1225, "rewards/accuracies": 0.78125, "rewards/chosen": -0.2116294652223587, "rewards/margins": 0.18610379099845886, "rewards/rejected": -0.39773327112197876, "step": 410 }, { "epoch": 0.67, "learning_rate": 1.4695585767104092e-06, "logits/chosen": -0.29544955492019653, "logits/rejected": -0.2773270308971405, "logps/chosen": -686.2037353515625, "logps/rejected": -1173.82958984375, "loss": 0.1319, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.22286900877952576, "rewards/margins": 0.183340385556221, "rewards/rejected": -0.40620937943458557, "step": 420 }, { "epoch": 0.69, "learning_rate": 1.3439071446815452e-06, "logits/chosen": -0.2415129840373993, "logits/rejected": -0.27339568734169006, "logps/chosen": -718.8626708984375, "logps/rejected": -1240.2945556640625, "loss": 0.1158, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.21683505177497864, "rewards/margins": 0.2326025515794754, "rewards/rejected": -0.44943755865097046, "step": 430 }, { "epoch": 0.7, "learning_rate": 1.2218673716356919e-06, "logits/chosen": -0.27473658323287964, "logits/rejected": -0.2448565512895584, "logps/chosen": -734.1240844726562, "logps/rejected": -1157.1583251953125, "loss": 0.1151, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.22244882583618164, "rewards/margins": 0.20606684684753418, "rewards/rejected": -0.4285156726837158, "step": 440 }, { "epoch": 0.72, "learning_rate": 1.103820512452661e-06, "logits/chosen": -0.24782344698905945, "logits/rejected": -0.26655644178390503, "logps/chosen": -712.7034912109375, "logps/rejected": -1142.8284912109375, "loss": 0.1273, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.21107585728168488, "rewards/margins": 0.19358885288238525, "rewards/rejected": -0.40466469526290894, "step": 450 }, { "epoch": 0.74, "learning_rate": 9.901353480633468e-07, "logits/chosen": -0.2719441056251526, "logits/rejected": -0.26369568705558777, "logps/chosen": -686.2820434570312, "logps/rejected": -1074.0950927734375, "loss": 0.1207, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.21289560198783875, "rewards/margins": 0.17246314883232117, "rewards/rejected": -0.3853587806224823, "step": 460 }, { "epoch": 0.75, "learning_rate": 8.811670333701544e-07, "logits/chosen": -0.2672122120857239, "logits/rejected": -0.2347395420074463, "logps/chosen": -752.274169921875, "logps/rejected": -1232.9998779296875, "loss": 0.1098, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -0.2272864133119583, "rewards/margins": 0.21149499714374542, "rewards/rejected": -0.4387814402580261, "step": 470 }, { "epoch": 0.77, "learning_rate": 7.772559877354341e-07, "logits/chosen": -0.24777153134346008, "logits/rejected": -0.2399456799030304, "logps/chosen": -736.2609252929688, "logps/rejected": -1130.094970703125, "loss": 0.1168, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.22519740462303162, "rewards/margins": 0.17224054038524628, "rewards/rejected": -0.3974379599094391, "step": 480 }, { "epoch": 0.78, "learning_rate": 6.787268315040604e-07, "logits/chosen": -0.2684328854084015, "logits/rejected": -0.24628722667694092, "logps/chosen": -689.9044799804688, "logps/rejected": -1155.0946044921875, "loss": 0.1174, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21575360000133514, "rewards/margins": 0.1959463357925415, "rewards/rejected": -0.41169995069503784, "step": 490 }, { "epoch": 0.8, "learning_rate": 5.858873718824829e-07, "logits/chosen": -0.26447051763534546, "logits/rejected": -0.26331770420074463, "logps/chosen": -707.0438842773438, "logps/rejected": -1152.1182861328125, "loss": 0.1172, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.20341674983501434, "rewards/margins": 0.19374600052833557, "rewards/rejected": -0.3971627652645111, "step": 500 }, { "epoch": 0.82, "learning_rate": 4.990276413423817e-07, "logits/chosen": -0.23977680504322052, "logits/rejected": -0.22561779618263245, "logps/chosen": -729.1976318359375, "logps/rejected": -1142.5386962890625, "loss": 0.1182, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.21028919517993927, "rewards/margins": 0.1970864236354828, "rewards/rejected": -0.40737563371658325, "step": 510 }, { "epoch": 0.83, "learning_rate": 4.184189915529796e-07, "logits/chosen": -0.26442182064056396, "logits/rejected": -0.25705209374427795, "logps/chosen": -737.6463623046875, "logps/rejected": -1247.0609130859375, "loss": 0.1102, "rewards/accuracies": 0.78125, "rewards/chosen": -0.2177223414182663, "rewards/margins": 0.20199593901634216, "rewards/rejected": -0.41971832513809204, "step": 520 }, { "epoch": 0.85, "learning_rate": 3.4431324567258176e-07, "logits/chosen": -0.26106202602386475, "logits/rejected": -0.2545424997806549, "logps/chosen": -700.1468505859375, "logps/rejected": -1189.3382568359375, "loss": 0.1211, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20997758209705353, "rewards/margins": 0.2070033997297287, "rewards/rejected": -0.4169809818267822, "step": 530 }, { "epoch": 0.86, "learning_rate": 2.769419116476052e-07, "logits/chosen": -0.2785791754722595, "logits/rejected": -0.26757797598838806, "logps/chosen": -753.193115234375, "logps/rejected": -1155.57763671875, "loss": 0.1094, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.21952767670154572, "rewards/margins": 0.1966555118560791, "rewards/rejected": -0.4161831736564636, "step": 540 }, { "epoch": 0.88, "learning_rate": 2.1651545897676512e-07, "logits/chosen": -0.21319365501403809, "logits/rejected": -0.20390382409095764, "logps/chosen": -736.0490112304688, "logps/rejected": -1217.077880859375, "loss": 0.1175, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21741120517253876, "rewards/margins": 0.21254892647266388, "rewards/rejected": -0.42996007204055786, "step": 550 }, { "epoch": 0.9, "learning_rate": 1.6322266119983222e-07, "logits/chosen": -0.24104240536689758, "logits/rejected": -0.2172488421201706, "logps/chosen": -765.8303833007812, "logps/rejected": -1205.9599609375, "loss": 0.11, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.21993395686149597, "rewards/margins": 0.19853533804416656, "rewards/rejected": -0.41846928000450134, "step": 560 }, { "epoch": 0.91, "learning_rate": 1.1723000616502167e-07, "logits/chosen": -0.26349014043807983, "logits/rejected": -0.3146611452102661, "logps/chosen": -755.0218505859375, "logps/rejected": -1200.8770751953125, "loss": 0.1129, "rewards/accuracies": 0.8125, "rewards/chosen": -0.21991780400276184, "rewards/margins": 0.22101625800132751, "rewards/rejected": -0.44093409180641174, "step": 570 }, { "epoch": 0.93, "learning_rate": 7.868117591737585e-08, "logits/chosen": -0.20703573524951935, "logits/rejected": -0.21116182208061218, "logps/chosen": -741.433837890625, "logps/rejected": -1168.3992919921875, "loss": 0.1149, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.2117561548948288, "rewards/margins": 0.19863612949848175, "rewards/rejected": -0.41039222478866577, "step": 580 }, { "epoch": 0.94, "learning_rate": 4.769659783295383e-08, "logits/chosen": -0.23616275191307068, "logits/rejected": -0.2213321179151535, "logps/chosen": -802.316650390625, "logps/rejected": -1224.220458984375, "loss": 0.1123, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.2234838753938675, "rewards/margins": 0.21354734897613525, "rewards/rejected": -0.43703120946884155, "step": 590 }, { "epoch": 0.96, "learning_rate": 2.4373068401120358e-08, "logits/chosen": -0.25830012559890747, "logits/rejected": -0.24621865153312683, "logps/chosen": -802.4405517578125, "logps/rejected": -1169.389892578125, "loss": 0.1136, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23679308593273163, "rewards/margins": 0.1784859597682953, "rewards/rejected": -0.4152790606021881, "step": 600 }, { "epoch": 0.98, "learning_rate": 8.78345083022425e-09, "logits/chosen": -0.24250511825084686, "logits/rejected": -0.24350687861442566, "logps/chosen": -742.060546875, "logps/rejected": -1132.1630859375, "loss": 0.1141, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.2040189504623413, "rewards/margins": 0.19322746992111206, "rewards/rejected": -0.39724642038345337, "step": 610 }, { "epoch": 0.99, "learning_rate": 9.764474213677654e-10, "logits/chosen": -0.2109394073486328, "logits/rejected": -0.22563035786151886, "logps/chosen": -744.6256103515625, "logps/rejected": -1185.656982421875, "loss": 0.1153, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23164710402488708, "rewards/margins": 0.20466098189353943, "rewards/rejected": -0.4363080859184265, "step": 620 }, { "epoch": 1.0, "step": 625, "total_flos": 0.0, "train_loss": 0.13047290132045747, "train_runtime": 8392.5195, "train_samples_per_second": 3.575, "train_steps_per_second": 0.074 } ], "logging_steps": 10, "max_steps": 625, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }