{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 3000, "global_step": 15284, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.270111183780249e-09, "logits/chosen": -3.2917370796203613, "logits/rejected": -3.2796809673309326, "logps/chosen": -336.192626953125, "logps/rejected": -310.9856872558594, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 3.270111183780249e-08, "logits/chosen": -3.1285088062286377, "logits/rejected": -3.18198299407959, "logps/chosen": -315.2669677734375, "logps/rejected": -272.7064514160156, "loss": 0.693, "rewards/accuracies": 0.5, "rewards/chosen": 0.0010706963948905468, "rewards/margins": 0.00015845504822209477, "rewards/rejected": 0.0009122414048761129, "step": 10 }, { "epoch": 0.0, "learning_rate": 6.540222367560497e-08, "logits/chosen": -3.0872440338134766, "logits/rejected": -3.0241026878356934, "logps/chosen": -414.28765869140625, "logps/rejected": -233.66372680664062, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.00017881770327221602, "rewards/margins": -5.1920022087870166e-05, "rewards/rejected": 0.00023073769989423454, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.810333551340746e-08, "logits/chosen": -2.8971962928771973, "logits/rejected": -2.972332000732422, "logps/chosen": -312.8296203613281, "logps/rejected": -326.06396484375, "loss": 0.6927, "rewards/accuracies": 0.5, "rewards/chosen": 0.0005883770063519478, "rewards/margins": 0.0005195619305595756, "rewards/rejected": 6.88152140355669e-05, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.3080444735120995e-07, "logits/chosen": -2.9546608924865723, "logits/rejected": -2.884795665740967, "logps/chosen": -282.99395751953125, "logps/rejected": -250.0284423828125, "loss": 0.6936, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.00084640400018543, "rewards/margins": 8.266828808700666e-05, "rewards/rejected": -0.0009290723246522248, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.6350555918901243e-07, "logits/chosen": -3.0107474327087402, "logits/rejected": -3.1580097675323486, "logps/chosen": -437.7754821777344, "logps/rejected": -369.67620849609375, "loss": 0.6932, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 9.289549052482471e-05, "rewards/margins": 0.0005522651481442153, "rewards/rejected": -0.0004593696794472635, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.9620667102681492e-07, "logits/chosen": -2.87144136428833, "logits/rejected": -2.7554101943969727, "logps/chosen": -183.0247802734375, "logps/rejected": -211.3839569091797, "loss": 0.6921, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0003684524563141167, "rewards/margins": 0.001609438331797719, "rewards/rejected": -0.0012409858172759414, "step": 60 }, { "epoch": 0.0, "learning_rate": 2.289077828646174e-07, "logits/chosen": -3.0719211101531982, "logits/rejected": -3.0397117137908936, "logps/chosen": -262.9385070800781, "logps/rejected": -270.0909423828125, "loss": 0.692, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.00010034188017016277, "rewards/margins": 0.003150531556457281, "rewards/rejected": -0.003050189930945635, "step": 70 }, { "epoch": 0.01, "learning_rate": 2.616088947024199e-07, "logits/chosen": -2.995681047439575, "logits/rejected": -2.925076961517334, "logps/chosen": -215.14419555664062, "logps/rejected": -248.43911743164062, "loss": 0.6937, "rewards/accuracies": 0.5, "rewards/chosen": -0.0004575929488055408, "rewards/margins": -0.0010273593943566084, "rewards/rejected": 0.0005697665037587285, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.943100065402224e-07, "logits/chosen": -3.0288403034210205, "logits/rejected": -3.10252046585083, "logps/chosen": -317.17596435546875, "logps/rejected": -326.1206359863281, "loss": 0.6932, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0005004165577702224, "rewards/margins": -0.0007878703763708472, "rewards/rejected": 0.00028745370218530297, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.2701111837802487e-07, "logits/chosen": -3.028860330581665, "logits/rejected": -3.1097171306610107, "logps/chosen": -290.9003601074219, "logps/rejected": -242.79989624023438, "loss": 0.6933, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 4.07161314797122e-05, "rewards/margins": 0.00024331473105121404, "rewards/rejected": -0.00020259855955373496, "step": 100 }, { "epoch": 0.01, "learning_rate": 3.5971223021582736e-07, "logits/chosen": -3.10758900642395, "logits/rejected": -3.1759090423583984, "logps/chosen": -319.63482666015625, "logps/rejected": -252.9918975830078, "loss": 0.6937, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.00032907718559727073, "rewards/margins": -0.0014111388009041548, "rewards/rejected": 0.0010820617899298668, "step": 110 }, { "epoch": 0.01, "learning_rate": 3.9241334205362984e-07, "logits/chosen": -2.9805614948272705, "logits/rejected": -3.066190242767334, "logps/chosen": -233.74319458007812, "logps/rejected": -261.84765625, "loss": 0.6925, "rewards/accuracies": 0.75, "rewards/chosen": 0.0013232993660494685, "rewards/margins": 0.0016237791860476136, "rewards/rejected": -0.0003004799073096365, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.251144538914324e-07, "logits/chosen": -2.8722615242004395, "logits/rejected": -2.882192373275757, "logps/chosen": -296.33563232421875, "logps/rejected": -232.06124877929688, "loss": 0.6932, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.001974537968635559, "rewards/margins": 0.0014208784559741616, "rewards/rejected": 0.0005536594544537365, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.578155657292348e-07, "logits/chosen": -3.083569288253784, "logits/rejected": -3.181466579437256, "logps/chosen": -403.43634033203125, "logps/rejected": -419.53668212890625, "loss": 0.6941, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0006010475335642695, "rewards/margins": -0.0011253413977101445, "rewards/rejected": 0.001726388931274414, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.905166775670374e-07, "logits/chosen": -3.0899462699890137, "logits/rejected": -3.0028645992279053, "logps/chosen": -336.4407653808594, "logps/rejected": -242.031005859375, "loss": 0.6935, "rewards/accuracies": 0.5, "rewards/chosen": -8.147531480062753e-05, "rewards/margins": -0.0008282337221316993, "rewards/rejected": 0.0007467584800906479, "step": 150 }, { "epoch": 0.01, "learning_rate": 5.232177894048398e-07, "logits/chosen": -2.966787338256836, "logits/rejected": -2.9958865642547607, "logps/chosen": -369.6310119628906, "logps/rejected": -245.83041381835938, "loss": 0.6936, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.0004910840652883053, "rewards/margins": -0.0005090809427201748, "rewards/rejected": 1.799676101654768e-05, "step": 160 }, { "epoch": 0.01, "learning_rate": 5.559189012426422e-07, "logits/chosen": -2.934314250946045, "logits/rejected": -2.9343628883361816, "logps/chosen": -267.1063232421875, "logps/rejected": -188.83877563476562, "loss": 0.6926, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0004375514108687639, "rewards/margins": 0.0008714391151443124, "rewards/rejected": -0.00043388744234107435, "step": 170 }, { "epoch": 0.01, "learning_rate": 5.886200130804448e-07, "logits/chosen": -3.018904209136963, "logits/rejected": -3.013699531555176, "logps/chosen": -162.3592987060547, "logps/rejected": -218.61184692382812, "loss": 0.693, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.00043491655378602445, "rewards/margins": -0.00012707136920653284, "rewards/rejected": 0.0005619878647848964, "step": 180 }, { "epoch": 0.01, "learning_rate": 6.213211249182473e-07, "logits/chosen": -3.078159809112549, "logits/rejected": -2.9432177543640137, "logps/chosen": -289.8063049316406, "logps/rejected": -177.1880340576172, "loss": 0.6934, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.00021298688079696149, "rewards/margins": 0.00023004722606856376, "rewards/rejected": -1.7060223399312235e-05, "step": 190 }, { "epoch": 0.01, "learning_rate": 6.540222367560497e-07, "logits/chosen": -2.8321168422698975, "logits/rejected": -2.8416965007781982, "logps/chosen": -281.98187255859375, "logps/rejected": -315.5621643066406, "loss": 0.6929, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.002115556737408042, "rewards/margins": 0.0005559118581004441, "rewards/rejected": 0.0015596445882692933, "step": 200 }, { "epoch": 0.01, "learning_rate": 6.867233485938523e-07, "logits/chosen": -3.202807664871216, "logits/rejected": -3.137254238128662, "logps/chosen": -376.1246643066406, "logps/rejected": -399.75103759765625, "loss": 0.6945, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.0005506344023160636, "rewards/margins": -0.004687961656600237, "rewards/rejected": 0.004137327428907156, "step": 210 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "logits/chosen": -2.784348726272583, "logits/rejected": -2.907197952270508, "logps/chosen": -316.67193603515625, "logps/rejected": -334.51568603515625, "loss": 0.6933, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0006333760684356093, "rewards/margins": -0.0016217123484238982, "rewards/rejected": 0.002255088882520795, "step": 220 }, { "epoch": 0.02, "learning_rate": 7.521255722694571e-07, "logits/chosen": -2.9172425270080566, "logits/rejected": -2.982654094696045, "logps/chosen": -246.4900665283203, "logps/rejected": -196.82415771484375, "loss": 0.693, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.002549818018451333, "rewards/margins": 0.0003842850273940712, "rewards/rejected": 0.0021655329037457705, "step": 230 }, { "epoch": 0.02, "learning_rate": 7.848266841072597e-07, "logits/chosen": -2.964061737060547, "logits/rejected": -2.9448022842407227, "logps/chosen": -244.23294067382812, "logps/rejected": -195.120361328125, "loss": 0.6932, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.002195965964347124, "rewards/margins": 0.0011466979049146175, "rewards/rejected": 0.001049267710186541, "step": 240 }, { "epoch": 0.02, "learning_rate": 8.175277959450622e-07, "logits/chosen": -2.729448080062866, "logits/rejected": -2.699052333831787, "logps/chosen": -286.503662109375, "logps/rejected": -290.0101623535156, "loss": 0.6926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.002826615469530225, "rewards/margins": 0.00432937266305089, "rewards/rejected": -0.0015027571935206652, "step": 250 }, { "epoch": 0.02, "learning_rate": 8.502289077828648e-07, "logits/chosen": -2.7071239948272705, "logits/rejected": -2.8652503490448, "logps/chosen": -195.9088897705078, "logps/rejected": -269.6458435058594, "loss": 0.6928, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0018324966076761484, "rewards/margins": 0.0005571646615862846, "rewards/rejected": 0.0012753319460898638, "step": 260 }, { "epoch": 0.02, "learning_rate": 8.829300196206672e-07, "logits/chosen": -2.9909119606018066, "logits/rejected": -2.9201207160949707, "logps/chosen": -193.13424682617188, "logps/rejected": -225.7820281982422, "loss": 0.6929, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0022729868069291115, "rewards/margins": 0.0003797583922278136, "rewards/rejected": 0.001893228618428111, "step": 270 }, { "epoch": 0.02, "learning_rate": 9.156311314584696e-07, "logits/chosen": -2.9982759952545166, "logits/rejected": -2.945038318634033, "logps/chosen": -219.11135864257812, "logps/rejected": -207.0333709716797, "loss": 0.6929, "rewards/accuracies": 0.5, "rewards/chosen": 0.0009125813958235085, "rewards/margins": -0.0010643262648954988, "rewards/rejected": 0.001976907718926668, "step": 280 }, { "epoch": 0.02, "learning_rate": 9.483322432962722e-07, "logits/chosen": -2.9865849018096924, "logits/rejected": -2.938310146331787, "logps/chosen": -243.83993530273438, "logps/rejected": -197.4394989013672, "loss": 0.6931, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.001292440458200872, "rewards/margins": -0.000284919748082757, "rewards/rejected": 0.001577360206283629, "step": 290 }, { "epoch": 0.02, "learning_rate": 9.810333551340747e-07, "logits/chosen": -2.7662692070007324, "logits/rejected": -2.9064176082611084, "logps/chosen": -311.7242431640625, "logps/rejected": -374.3269958496094, "loss": 0.693, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.004182077944278717, "rewards/margins": 0.00121305079665035, "rewards/rejected": 0.002969027729704976, "step": 300 }, { "epoch": 0.02, "learning_rate": 1.0137344669718771e-06, "logits/chosen": -2.9435641765594482, "logits/rejected": -2.9995861053466797, "logps/chosen": -256.0497741699219, "logps/rejected": -298.29425048828125, "loss": 0.6931, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.00030184269417077303, "rewards/margins": -0.0029051245655864477, "rewards/rejected": 0.0032069676090031862, "step": 310 }, { "epoch": 0.02, "learning_rate": 1.0464355788096796e-06, "logits/chosen": -2.8672854900360107, "logits/rejected": -2.7651145458221436, "logps/chosen": -325.3626708984375, "logps/rejected": -317.30511474609375, "loss": 0.6929, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0017262229230254889, "rewards/margins": 0.0012272644089534879, "rewards/rejected": 0.0004989585722796619, "step": 320 }, { "epoch": 0.02, "learning_rate": 1.079136690647482e-06, "logits/chosen": -3.002739429473877, "logits/rejected": -2.9137415885925293, "logps/chosen": -290.84130859375, "logps/rejected": -267.84649658203125, "loss": 0.6928, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.0031630732119083405, "rewards/margins": 0.0001625925360713154, "rewards/rejected": 0.00300048035569489, "step": 330 }, { "epoch": 0.02, "learning_rate": 1.1118378024852844e-06, "logits/chosen": -3.0424742698669434, "logits/rejected": -3.0423471927642822, "logps/chosen": -244.46572875976562, "logps/rejected": -247.05697631835938, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0033019818365573883, "rewards/margins": 0.00133128825109452, "rewards/rejected": 0.00197069370187819, "step": 340 }, { "epoch": 0.02, "learning_rate": 1.144538914323087e-06, "logits/chosen": -3.0653843879699707, "logits/rejected": -2.9315295219421387, "logps/chosen": -260.92156982421875, "logps/rejected": -227.8271026611328, "loss": 0.6914, "rewards/accuracies": 0.75, "rewards/chosen": 0.004591038916260004, "rewards/margins": 0.00426831841468811, "rewards/rejected": 0.00032272053067572415, "step": 350 }, { "epoch": 0.02, "learning_rate": 1.1772400261608895e-06, "logits/chosen": -2.91213059425354, "logits/rejected": -2.748988628387451, "logps/chosen": -259.0718078613281, "logps/rejected": -212.82266235351562, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0024882450234144926, "rewards/margins": 0.0006938829901628196, "rewards/rejected": 0.001794362091459334, "step": 360 }, { "epoch": 0.02, "learning_rate": 1.2099411379986922e-06, "logits/chosen": -2.947958469390869, "logits/rejected": -3.1207051277160645, "logps/chosen": -224.5135955810547, "logps/rejected": -237.42453002929688, "loss": 0.6929, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.003422154113650322, "rewards/margins": -0.0007809843518771231, "rewards/rejected": 0.004203137941658497, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.2426422498364946e-06, "logits/chosen": -2.9310691356658936, "logits/rejected": -3.126197338104248, "logps/chosen": -310.5855712890625, "logps/rejected": -207.7636260986328, "loss": 0.6915, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.006997501011937857, "rewards/margins": 0.004127983469516039, "rewards/rejected": 0.0028695182409137487, "step": 380 }, { "epoch": 0.03, "learning_rate": 1.2753433616742968e-06, "logits/chosen": -3.098378896713257, "logits/rejected": -3.0954604148864746, "logps/chosen": -311.447021484375, "logps/rejected": -359.23431396484375, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.007996728643774986, "rewards/margins": 0.002210235456004739, "rewards/rejected": 0.005786492954939604, "step": 390 }, { "epoch": 0.03, "learning_rate": 1.3080444735120995e-06, "logits/chosen": -2.9606876373291016, "logits/rejected": -2.8155083656311035, "logps/chosen": -328.29986572265625, "logps/rejected": -201.92807006835938, "loss": 0.6929, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.006145277991890907, "rewards/margins": 0.0019246510928496718, "rewards/rejected": 0.004220626782625914, "step": 400 }, { "epoch": 0.03, "learning_rate": 1.3407455853499021e-06, "logits/chosen": -3.044910430908203, "logits/rejected": -2.8663291931152344, "logps/chosen": -228.4242706298828, "logps/rejected": -193.3241729736328, "loss": 0.6918, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.00872000027447939, "rewards/margins": 0.003681318135932088, "rewards/rejected": 0.005038681905716658, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.3734466971877046e-06, "logits/chosen": -3.1175425052642822, "logits/rejected": -3.088022470474243, "logps/chosen": -310.7445983886719, "logps/rejected": -404.0841064453125, "loss": 0.6933, "rewards/accuracies": 0.5, "rewards/chosen": 0.00814331416040659, "rewards/margins": -0.0008761845529079437, "rewards/rejected": 0.009019499644637108, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.406147809025507e-06, "logits/chosen": -2.90427565574646, "logits/rejected": -2.9754254817962646, "logps/chosen": -296.58502197265625, "logps/rejected": -254.1155548095703, "loss": 0.6912, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.009763370268046856, "rewards/margins": 0.005822093226015568, "rewards/rejected": 0.0039412761107087135, "step": 430 }, { "epoch": 0.03, "learning_rate": 1.4388489208633094e-06, "logits/chosen": -2.9745967388153076, "logits/rejected": -3.056553363800049, "logps/chosen": -247.5413360595703, "logps/rejected": -257.79071044921875, "loss": 0.6929, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.007196736987680197, "rewards/margins": -0.0012092979159206152, "rewards/rejected": 0.008406035602092743, "step": 440 }, { "epoch": 0.03, "learning_rate": 1.471550032701112e-06, "logits/chosen": -2.9960484504699707, "logits/rejected": -3.1356592178344727, "logps/chosen": -264.06732177734375, "logps/rejected": -300.70721435546875, "loss": 0.6928, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.010084071196615696, "rewards/margins": -0.0002762650838121772, "rewards/rejected": 0.010360335931181908, "step": 450 }, { "epoch": 0.03, "learning_rate": 1.5042511445389143e-06, "logits/chosen": -2.8759608268737793, "logits/rejected": -2.946329116821289, "logps/chosen": -257.097412109375, "logps/rejected": -230.97933959960938, "loss": 0.691, "rewards/accuracies": 0.75, "rewards/chosen": 0.012902451679110527, "rewards/margins": 0.006056067533791065, "rewards/rejected": 0.006846384145319462, "step": 460 }, { "epoch": 0.03, "learning_rate": 1.536952256376717e-06, "logits/chosen": -2.8737595081329346, "logits/rejected": -2.858184337615967, "logps/chosen": -294.66680908203125, "logps/rejected": -299.099365234375, "loss": 0.6911, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.010799551382660866, "rewards/margins": 0.002541209105402231, "rewards/rejected": 0.008258342742919922, "step": 470 }, { "epoch": 0.03, "learning_rate": 1.5696533682145194e-06, "logits/chosen": -3.0661721229553223, "logits/rejected": -3.044696092605591, "logps/chosen": -355.2105407714844, "logps/rejected": -342.7800598144531, "loss": 0.6922, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.011583315208554268, "rewards/margins": 0.0039892070926725864, "rewards/rejected": 0.007594108581542969, "step": 480 }, { "epoch": 0.03, "learning_rate": 1.602354480052322e-06, "logits/chosen": -2.8834774494171143, "logits/rejected": -2.9143149852752686, "logps/chosen": -408.67523193359375, "logps/rejected": -356.30718994140625, "loss": 0.6921, "rewards/accuracies": 0.5, "rewards/chosen": 0.010796618647873402, "rewards/margins": 0.002751033054664731, "rewards/rejected": 0.008045585826039314, "step": 490 }, { "epoch": 0.03, "learning_rate": 1.6350555918901245e-06, "logits/chosen": -2.9682867527008057, "logits/rejected": -2.963850498199463, "logps/chosen": -264.46136474609375, "logps/rejected": -175.54135131835938, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.011705334298312664, "rewards/margins": 0.0035117301158607006, "rewards/rejected": 0.008193603716790676, "step": 500 }, { "epoch": 0.03, "learning_rate": 1.6677567037279269e-06, "logits/chosen": -2.9862213134765625, "logits/rejected": -2.9082834720611572, "logps/chosen": -236.3311309814453, "logps/rejected": -214.0072021484375, "loss": 0.6924, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012016872875392437, "rewards/margins": 0.003492117626592517, "rewards/rejected": 0.008524755015969276, "step": 510 }, { "epoch": 0.03, "learning_rate": 1.7004578155657295e-06, "logits/chosen": -2.89336895942688, "logits/rejected": -2.966418504714966, "logps/chosen": -427.30865478515625, "logps/rejected": -328.8701477050781, "loss": 0.691, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.011949967592954636, "rewards/margins": 0.0047841123305261135, "rewards/rejected": 0.007165855262428522, "step": 520 }, { "epoch": 0.03, "learning_rate": 1.7331589274035318e-06, "logits/chosen": -3.179882287979126, "logits/rejected": -3.214411497116089, "logps/chosen": -300.58074951171875, "logps/rejected": -305.2362976074219, "loss": 0.6913, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.011264842934906483, "rewards/margins": 0.0016359112923964858, "rewards/rejected": 0.009628929197788239, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.7658600392413344e-06, "logits/chosen": -3.1450603008270264, "logits/rejected": -3.1157948970794678, "logps/chosen": -300.3776550292969, "logps/rejected": -366.01019287109375, "loss": 0.6917, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.013677257113158703, "rewards/margins": 0.0008588259806856513, "rewards/rejected": 0.012818431481719017, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.7985611510791368e-06, "logits/chosen": -2.9594764709472656, "logits/rejected": -2.9382336139678955, "logps/chosen": -255.61941528320312, "logps/rejected": -195.72076416015625, "loss": 0.6907, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.012641459703445435, "rewards/margins": 0.004340741783380508, "rewards/rejected": 0.008300717920064926, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.8312622629169393e-06, "logits/chosen": -2.830198287963867, "logits/rejected": -2.8257110118865967, "logps/chosen": -336.34051513671875, "logps/rejected": -330.2576904296875, "loss": 0.6891, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.017110275104641914, "rewards/margins": 0.007845441810786724, "rewards/rejected": 0.009264834225177765, "step": 560 }, { "epoch": 0.04, "learning_rate": 1.8639633747547417e-06, "logits/chosen": -2.9448599815368652, "logits/rejected": -3.0247905254364014, "logps/chosen": -265.4458312988281, "logps/rejected": -292.64239501953125, "loss": 0.6907, "rewards/accuracies": 0.75, "rewards/chosen": 0.014689937233924866, "rewards/margins": 0.0031159906648099422, "rewards/rejected": 0.011573946103453636, "step": 570 }, { "epoch": 0.04, "learning_rate": 1.8966644865925443e-06, "logits/chosen": -2.987705707550049, "logits/rejected": -3.077376365661621, "logps/chosen": -253.3921356201172, "logps/rejected": -312.4097900390625, "loss": 0.6917, "rewards/accuracies": 0.5, "rewards/chosen": 0.016644762828946114, "rewards/margins": 0.0007716016261838377, "rewards/rejected": 0.01587316021323204, "step": 580 }, { "epoch": 0.04, "learning_rate": 1.9293655984303466e-06, "logits/chosen": -3.165412425994873, "logits/rejected": -2.9510579109191895, "logps/chosen": -256.69708251953125, "logps/rejected": -215.0804901123047, "loss": 0.6899, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.017473822459578514, "rewards/margins": 0.008105012588202953, "rewards/rejected": 0.009368810802698135, "step": 590 }, { "epoch": 0.04, "learning_rate": 1.9620667102681494e-06, "logits/chosen": -3.047368288040161, "logits/rejected": -3.020981788635254, "logps/chosen": -341.5492248535156, "logps/rejected": -232.7269744873047, "loss": 0.692, "rewards/accuracies": 0.75, "rewards/chosen": 0.02070160023868084, "rewards/margins": 0.008108451962471008, "rewards/rejected": 0.01259315200150013, "step": 600 }, { "epoch": 0.04, "learning_rate": 1.994767822105952e-06, "logits/chosen": -2.967050075531006, "logits/rejected": -2.8431944847106934, "logps/chosen": -286.22247314453125, "logps/rejected": -338.3892517089844, "loss": 0.6911, "rewards/accuracies": 0.5, "rewards/chosen": 0.018522150814533234, "rewards/margins": 0.0019176944624632597, "rewards/rejected": 0.016604457050561905, "step": 610 }, { "epoch": 0.04, "learning_rate": 2.0274689339437543e-06, "logits/chosen": -2.938920736312866, "logits/rejected": -2.9256796836853027, "logps/chosen": -335.93853759765625, "logps/rejected": -350.2706604003906, "loss": 0.6896, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.01635252870619297, "rewards/margins": 0.002685768064111471, "rewards/rejected": 0.013666761107742786, "step": 620 }, { "epoch": 0.04, "learning_rate": 2.0601700457815567e-06, "logits/chosen": -2.9339334964752197, "logits/rejected": -3.1059889793395996, "logps/chosen": -275.78179931640625, "logps/rejected": -298.95159912109375, "loss": 0.691, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02482684887945652, "rewards/margins": 0.006859474815428257, "rewards/rejected": 0.017967374995350838, "step": 630 }, { "epoch": 0.04, "learning_rate": 2.092871157619359e-06, "logits/chosen": -3.0149481296539307, "logits/rejected": -2.833794116973877, "logps/chosen": -318.1592712402344, "logps/rejected": -182.3878631591797, "loss": 0.6875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.023765167221426964, "rewards/margins": 0.015172283165156841, "rewards/rejected": 0.008592883124947548, "step": 640 }, { "epoch": 0.04, "learning_rate": 2.1255722694571616e-06, "logits/chosen": -3.138582944869995, "logits/rejected": -3.1221721172332764, "logps/chosen": -320.1607971191406, "logps/rejected": -356.0599670410156, "loss": 0.6914, "rewards/accuracies": 0.5, "rewards/chosen": 0.024007441475987434, "rewards/margins": 0.0019604742992669344, "rewards/rejected": 0.02204696647822857, "step": 650 }, { "epoch": 0.04, "learning_rate": 2.158273381294964e-06, "logits/chosen": -3.2076401710510254, "logits/rejected": -3.1157641410827637, "logps/chosen": -295.0274963378906, "logps/rejected": -313.6297912597656, "loss": 0.6884, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02200487069785595, "rewards/margins": 0.0068700313568115234, "rewards/rejected": 0.015134838409721851, "step": 660 }, { "epoch": 0.04, "learning_rate": 2.190974493132767e-06, "logits/chosen": -3.0300211906433105, "logits/rejected": -2.9287357330322266, "logps/chosen": -321.67987060546875, "logps/rejected": -299.279541015625, "loss": 0.6884, "rewards/accuracies": 0.75, "rewards/chosen": 0.02429506927728653, "rewards/margins": 0.011312566697597504, "rewards/rejected": 0.012982504442334175, "step": 670 }, { "epoch": 0.04, "learning_rate": 2.223675604970569e-06, "logits/chosen": -3.0303335189819336, "logits/rejected": -3.076803684234619, "logps/chosen": -443.90203857421875, "logps/rejected": -369.6851501464844, "loss": 0.6865, "rewards/accuracies": 0.75, "rewards/chosen": 0.0327363982796669, "rewards/margins": 0.011801688000559807, "rewards/rejected": 0.020934712141752243, "step": 680 }, { "epoch": 0.05, "learning_rate": 2.2563767168083718e-06, "logits/chosen": -3.053703784942627, "logits/rejected": -3.116055727005005, "logps/chosen": -245.5877227783203, "logps/rejected": -265.5740966796875, "loss": 0.6874, "rewards/accuracies": 0.75, "rewards/chosen": 0.02502378821372986, "rewards/margins": 0.010461007244884968, "rewards/rejected": 0.014562780037522316, "step": 690 }, { "epoch": 0.05, "learning_rate": 2.289077828646174e-06, "logits/chosen": -3.0341150760650635, "logits/rejected": -2.9276299476623535, "logps/chosen": -271.6872253417969, "logps/rejected": -285.0089416503906, "loss": 0.689, "rewards/accuracies": 0.5, "rewards/chosen": 0.025852534919977188, "rewards/margins": 0.0027749096043407917, "rewards/rejected": 0.023077625781297684, "step": 700 }, { "epoch": 0.05, "learning_rate": 2.3217789404839766e-06, "logits/chosen": -2.939833879470825, "logits/rejected": -2.881133556365967, "logps/chosen": -283.1850280761719, "logps/rejected": -267.50799560546875, "loss": 0.6891, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03299666941165924, "rewards/margins": 0.004511397797614336, "rewards/rejected": 0.028485268354415894, "step": 710 }, { "epoch": 0.05, "learning_rate": 2.354480052321779e-06, "logits/chosen": -2.884310483932495, "logits/rejected": -2.8266072273254395, "logps/chosen": -220.0033721923828, "logps/rejected": -203.66299438476562, "loss": 0.6868, "rewards/accuracies": 0.75, "rewards/chosen": 0.03351835533976555, "rewards/margins": 0.012706073932349682, "rewards/rejected": 0.020812280476093292, "step": 720 }, { "epoch": 0.05, "learning_rate": 2.3871811641595815e-06, "logits/chosen": -2.9537875652313232, "logits/rejected": -3.1177542209625244, "logps/chosen": -272.7933044433594, "logps/rejected": -263.432861328125, "loss": 0.6881, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03301476314663887, "rewards/margins": 0.009064620360732079, "rewards/rejected": 0.023950140923261642, "step": 730 }, { "epoch": 0.05, "learning_rate": 2.4198822759973843e-06, "logits/chosen": -2.9899003505706787, "logits/rejected": -3.02457332611084, "logps/chosen": -252.21902465820312, "logps/rejected": -266.54986572265625, "loss": 0.6882, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.034833673387765884, "rewards/margins": 0.009695864282548428, "rewards/rejected": 0.025137806311249733, "step": 740 }, { "epoch": 0.05, "learning_rate": 2.4525833878351864e-06, "logits/chosen": -3.0314903259277344, "logits/rejected": -3.094717502593994, "logps/chosen": -344.877197265625, "logps/rejected": -282.93853759765625, "loss": 0.6886, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.040870435535907745, "rewards/margins": 0.010108423419296741, "rewards/rejected": 0.03076201304793358, "step": 750 }, { "epoch": 0.05, "learning_rate": 2.4852844996729892e-06, "logits/chosen": -3.1002516746520996, "logits/rejected": -3.1319639682769775, "logps/chosen": -328.6524963378906, "logps/rejected": -280.7578430175781, "loss": 0.6884, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03915861248970032, "rewards/margins": 0.011181964538991451, "rewards/rejected": 0.02797664701938629, "step": 760 }, { "epoch": 0.05, "learning_rate": 2.5179856115107916e-06, "logits/chosen": -2.9437851905822754, "logits/rejected": -2.991842746734619, "logps/chosen": -392.2171936035156, "logps/rejected": -289.81976318359375, "loss": 0.6837, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.047296278178691864, "rewards/margins": 0.01911090686917305, "rewards/rejected": 0.028185371309518814, "step": 770 }, { "epoch": 0.05, "learning_rate": 2.5506867233485937e-06, "logits/chosen": -2.99171781539917, "logits/rejected": -2.9441757202148438, "logps/chosen": -385.4003601074219, "logps/rejected": -305.50103759765625, "loss": 0.6866, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.035985272377729416, "rewards/margins": 0.02009792998433113, "rewards/rejected": 0.015887338668107986, "step": 780 }, { "epoch": 0.05, "learning_rate": 2.5833878351863965e-06, "logits/chosen": -3.073352813720703, "logits/rejected": -3.058485507965088, "logps/chosen": -228.98611450195312, "logps/rejected": -188.19522094726562, "loss": 0.6887, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.03437228873372078, "rewards/margins": 0.005837684962898493, "rewards/rejected": 0.028534606099128723, "step": 790 }, { "epoch": 0.05, "learning_rate": 2.616088947024199e-06, "logits/chosen": -3.0605218410491943, "logits/rejected": -3.0869102478027344, "logps/chosen": -414.29150390625, "logps/rejected": -299.36578369140625, "loss": 0.6888, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.04800860956311226, "rewards/margins": 0.02571099065244198, "rewards/rejected": 0.02229761704802513, "step": 800 }, { "epoch": 0.05, "learning_rate": 2.6487900588620014e-06, "logits/chosen": -3.1698508262634277, "logits/rejected": -3.162097454071045, "logps/chosen": -293.2779846191406, "logps/rejected": -304.637451171875, "loss": 0.6872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04854557663202286, "rewards/margins": 0.015368210151791573, "rewards/rejected": 0.033177368342876434, "step": 810 }, { "epoch": 0.05, "learning_rate": 2.6814911706998042e-06, "logits/chosen": -3.058969020843506, "logits/rejected": -2.893124580383301, "logps/chosen": -290.9662170410156, "logps/rejected": -267.09930419921875, "loss": 0.6911, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03650563582777977, "rewards/margins": 0.011088610626757145, "rewards/rejected": 0.02541702426970005, "step": 820 }, { "epoch": 0.05, "learning_rate": 2.7141922825376067e-06, "logits/chosen": -3.001389265060425, "logits/rejected": -2.7879388332366943, "logps/chosen": -376.96087646484375, "logps/rejected": -301.4887390136719, "loss": 0.6913, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.042007606476545334, "rewards/margins": 0.006915568374097347, "rewards/rejected": 0.035092033445835114, "step": 830 }, { "epoch": 0.05, "learning_rate": 2.746893394375409e-06, "logits/chosen": -2.858647108078003, "logits/rejected": -2.8301475048065186, "logps/chosen": -421.82763671875, "logps/rejected": -453.30963134765625, "loss": 0.6887, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04595818370580673, "rewards/margins": 0.007595541886985302, "rewards/rejected": 0.038362644612789154, "step": 840 }, { "epoch": 0.06, "learning_rate": 2.779594506213211e-06, "logits/chosen": -3.091282844543457, "logits/rejected": -2.925567865371704, "logps/chosen": -290.15289306640625, "logps/rejected": -367.5709533691406, "loss": 0.6822, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.04890549182891846, "rewards/margins": 0.02194773219525814, "rewards/rejected": 0.026957765221595764, "step": 850 }, { "epoch": 0.06, "learning_rate": 2.812295618051014e-06, "logits/chosen": -2.9579873085021973, "logits/rejected": -3.16754412651062, "logps/chosen": -252.59719848632812, "logps/rejected": -273.73052978515625, "loss": 0.6858, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.041400037705898285, "rewards/margins": 0.010079341940581799, "rewards/rejected": 0.03132069483399391, "step": 860 }, { "epoch": 0.06, "learning_rate": 2.8449967298888164e-06, "logits/chosen": -3.0155527591705322, "logits/rejected": -3.0040760040283203, "logps/chosen": -336.65216064453125, "logps/rejected": -299.76556396484375, "loss": 0.6842, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.051180075854063034, "rewards/margins": 0.01540394313633442, "rewards/rejected": 0.035776130855083466, "step": 870 }, { "epoch": 0.06, "learning_rate": 2.877697841726619e-06, "logits/chosen": -3.0651357173919678, "logits/rejected": -3.054041624069214, "logps/chosen": -378.2223205566406, "logps/rejected": -358.79962158203125, "loss": 0.6847, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.06589755415916443, "rewards/margins": 0.025782927870750427, "rewards/rejected": 0.0401146337389946, "step": 880 }, { "epoch": 0.06, "learning_rate": 2.9103989535644217e-06, "logits/chosen": -2.899027109146118, "logits/rejected": -2.873547315597534, "logps/chosen": -273.908447265625, "logps/rejected": -287.2727355957031, "loss": 0.6822, "rewards/accuracies": 0.75, "rewards/chosen": 0.05353100225329399, "rewards/margins": 0.02129439450800419, "rewards/rejected": 0.032236598432064056, "step": 890 }, { "epoch": 0.06, "learning_rate": 2.943100065402224e-06, "logits/chosen": -2.897641658782959, "logits/rejected": -2.8532097339630127, "logps/chosen": -222.2373504638672, "logps/rejected": -259.68011474609375, "loss": 0.6869, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.036358319222927094, "rewards/margins": 0.0070862360298633575, "rewards/rejected": 0.029272085055708885, "step": 900 }, { "epoch": 0.06, "learning_rate": 2.9758011772400266e-06, "logits/chosen": -3.010063648223877, "logits/rejected": -2.952120542526245, "logps/chosen": -242.9051971435547, "logps/rejected": -153.533447265625, "loss": 0.6789, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.05027133971452713, "rewards/margins": 0.031339433044195175, "rewards/rejected": 0.018931902945041656, "step": 910 }, { "epoch": 0.06, "learning_rate": 3.0085022890778286e-06, "logits/chosen": -2.9196720123291016, "logits/rejected": -2.8785929679870605, "logps/chosen": -285.1444396972656, "logps/rejected": -218.13412475585938, "loss": 0.6793, "rewards/accuracies": 0.75, "rewards/chosen": 0.05185414478182793, "rewards/margins": 0.021058565005660057, "rewards/rejected": 0.030795583501458168, "step": 920 }, { "epoch": 0.06, "learning_rate": 3.0412034009156314e-06, "logits/chosen": -2.8984265327453613, "logits/rejected": -2.9644322395324707, "logps/chosen": -315.30517578125, "logps/rejected": -333.4654846191406, "loss": 0.6813, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.05165805667638779, "rewards/margins": 0.022742148488759995, "rewards/rejected": 0.028915906324982643, "step": 930 }, { "epoch": 0.06, "learning_rate": 3.073904512753434e-06, "logits/chosen": -3.0516133308410645, "logits/rejected": -3.0722031593322754, "logps/chosen": -376.10137939453125, "logps/rejected": -313.91754150390625, "loss": 0.6813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.055447887629270554, "rewards/margins": 0.014918354339897633, "rewards/rejected": 0.040529537945985794, "step": 940 }, { "epoch": 0.06, "learning_rate": 3.1066056245912363e-06, "logits/chosen": -2.8814139366149902, "logits/rejected": -2.923527479171753, "logps/chosen": -336.75439453125, "logps/rejected": -280.9449768066406, "loss": 0.6861, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.04855465143918991, "rewards/margins": 0.013142083771526814, "rewards/rejected": 0.03541256859898567, "step": 950 }, { "epoch": 0.06, "learning_rate": 3.1393067364290387e-06, "logits/chosen": -2.878671169281006, "logits/rejected": -2.9011781215667725, "logps/chosen": -237.6183624267578, "logps/rejected": -273.76824951171875, "loss": 0.6927, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03925769031047821, "rewards/margins": 0.007788626942783594, "rewards/rejected": 0.031469058245420456, "step": 960 }, { "epoch": 0.06, "learning_rate": 3.1720078482668416e-06, "logits/chosen": -3.128509521484375, "logits/rejected": -3.1664719581604004, "logps/chosen": -334.951904296875, "logps/rejected": -304.9898681640625, "loss": 0.675, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.0688043162226677, "rewards/margins": 0.044605594128370285, "rewards/rejected": 0.02419872209429741, "step": 970 }, { "epoch": 0.06, "learning_rate": 3.204708960104644e-06, "logits/chosen": -3.01989483833313, "logits/rejected": -2.9897732734680176, "logps/chosen": -215.50637817382812, "logps/rejected": -181.29798889160156, "loss": 0.6831, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.04856652021408081, "rewards/margins": 0.023396335542201996, "rewards/rejected": 0.025170186534523964, "step": 980 }, { "epoch": 0.06, "learning_rate": 3.237410071942446e-06, "logits/chosen": -3.1784377098083496, "logits/rejected": -3.093890905380249, "logps/chosen": -325.9986877441406, "logps/rejected": -321.07208251953125, "loss": 0.678, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.0696471557021141, "rewards/margins": 0.027126217260956764, "rewards/rejected": 0.04252093285322189, "step": 990 }, { "epoch": 0.07, "learning_rate": 3.270111183780249e-06, "logits/chosen": -2.994716167449951, "logits/rejected": -3.0044682025909424, "logps/chosen": -211.45425415039062, "logps/rejected": -274.2388000488281, "loss": 0.6852, "rewards/accuracies": 0.75, "rewards/chosen": 0.04428134113550186, "rewards/margins": 0.02824358269572258, "rewards/rejected": 0.016037756577134132, "step": 1000 }, { "epoch": 0.07, "learning_rate": 3.3028122956180513e-06, "logits/chosen": -3.0379650592803955, "logits/rejected": -2.964712142944336, "logps/chosen": -318.2586364746094, "logps/rejected": -241.7306671142578, "loss": 0.6803, "rewards/accuracies": 0.75, "rewards/chosen": 0.04623536020517349, "rewards/margins": 0.026243770495057106, "rewards/rejected": 0.019991587847471237, "step": 1010 }, { "epoch": 0.07, "learning_rate": 3.3355134074558538e-06, "logits/chosen": -2.9493517875671387, "logits/rejected": -2.9502549171447754, "logps/chosen": -335.3736572265625, "logps/rejected": -308.6483154296875, "loss": 0.6685, "rewards/accuracies": 0.75, "rewards/chosen": 0.07033336162567139, "rewards/margins": 0.03989529609680176, "rewards/rejected": 0.030438074842095375, "step": 1020 }, { "epoch": 0.07, "learning_rate": 3.368214519293656e-06, "logits/chosen": -3.053618907928467, "logits/rejected": -3.0059900283813477, "logps/chosen": -251.8245391845703, "logps/rejected": -264.260009765625, "loss": 0.6847, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.03353007882833481, "rewards/margins": 0.0037069707177579403, "rewards/rejected": 0.02982310950756073, "step": 1030 }, { "epoch": 0.07, "learning_rate": 3.400915631131459e-06, "logits/chosen": -2.9938790798187256, "logits/rejected": -2.943967342376709, "logps/chosen": -252.6847381591797, "logps/rejected": -252.75265502929688, "loss": 0.6747, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.05386337637901306, "rewards/margins": 0.03166833892464638, "rewards/rejected": 0.022195037454366684, "step": 1040 }, { "epoch": 0.07, "learning_rate": 3.4336167429692615e-06, "logits/chosen": -2.7841649055480957, "logits/rejected": -2.8269340991973877, "logps/chosen": -363.76641845703125, "logps/rejected": -302.6523742675781, "loss": 0.6722, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07321266829967499, "rewards/margins": 0.047675829380750656, "rewards/rejected": 0.025536835193634033, "step": 1050 }, { "epoch": 0.07, "learning_rate": 3.4663178548070635e-06, "logits/chosen": -3.1110241413116455, "logits/rejected": -3.0979695320129395, "logps/chosen": -289.6631774902344, "logps/rejected": -406.4927673339844, "loss": 0.6832, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.06365511566400528, "rewards/margins": 0.007427097763866186, "rewards/rejected": 0.05622801184654236, "step": 1060 }, { "epoch": 0.07, "learning_rate": 3.499018966644866e-06, "logits/chosen": -3.257605791091919, "logits/rejected": -3.072235345840454, "logps/chosen": -301.8328552246094, "logps/rejected": -260.7064208984375, "loss": 0.6811, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.07253851741552353, "rewards/margins": 0.016410846263170242, "rewards/rejected": 0.056127674877643585, "step": 1070 }, { "epoch": 0.07, "learning_rate": 3.531720078482669e-06, "logits/chosen": -2.932795524597168, "logits/rejected": -2.9286468029022217, "logps/chosen": -388.4093017578125, "logps/rejected": -298.8544921875, "loss": 0.6792, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.056833960115909576, "rewards/margins": 0.02518545091152191, "rewards/rejected": 0.031648509204387665, "step": 1080 }, { "epoch": 0.07, "learning_rate": 3.5644211903204712e-06, "logits/chosen": -3.0924220085144043, "logits/rejected": -3.116910457611084, "logps/chosen": -334.62994384765625, "logps/rejected": -260.61614990234375, "loss": 0.6777, "rewards/accuracies": 0.5, "rewards/chosen": 0.063229039311409, "rewards/margins": 0.02396319806575775, "rewards/rejected": 0.03926585242152214, "step": 1090 }, { "epoch": 0.07, "learning_rate": 3.5971223021582737e-06, "logits/chosen": -2.729091167449951, "logits/rejected": -2.701641082763672, "logps/chosen": -222.6670684814453, "logps/rejected": -199.57400512695312, "loss": 0.6692, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06342534720897675, "rewards/margins": 0.03789401054382324, "rewards/rejected": 0.02553134225308895, "step": 1100 }, { "epoch": 0.07, "learning_rate": 3.6298234139960765e-06, "logits/chosen": -3.0402450561523438, "logits/rejected": -3.112766981124878, "logps/chosen": -298.9159240722656, "logps/rejected": -251.3931884765625, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": 0.07815130054950714, "rewards/margins": 0.04820716381072998, "rewards/rejected": 0.029944131150841713, "step": 1110 }, { "epoch": 0.07, "learning_rate": 3.6625245258338785e-06, "logits/chosen": -2.9670872688293457, "logits/rejected": -3.015652656555176, "logps/chosen": -225.95596313476562, "logps/rejected": -209.48294067382812, "loss": 0.6714, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.06183785945177078, "rewards/margins": 0.041525088250637054, "rewards/rejected": 0.02031276375055313, "step": 1120 }, { "epoch": 0.07, "learning_rate": 3.695225637671681e-06, "logits/chosen": -2.734419584274292, "logits/rejected": -2.9420053958892822, "logps/chosen": -406.99700927734375, "logps/rejected": -385.1966552734375, "loss": 0.6652, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.06387855857610703, "rewards/margins": 0.052874863147735596, "rewards/rejected": 0.011003690771758556, "step": 1130 }, { "epoch": 0.07, "learning_rate": 3.7279267495094834e-06, "logits/chosen": -2.891770839691162, "logits/rejected": -2.858868360519409, "logps/chosen": -343.5257263183594, "logps/rejected": -374.30633544921875, "loss": 0.6808, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04993007704615593, "rewards/margins": 0.007034347392618656, "rewards/rejected": 0.04289572685956955, "step": 1140 }, { "epoch": 0.08, "learning_rate": 3.7606278613472863e-06, "logits/chosen": -2.916008472442627, "logits/rejected": -2.9267024993896484, "logps/chosen": -300.3416442871094, "logps/rejected": -254.60079956054688, "loss": 0.6699, "rewards/accuracies": 0.75, "rewards/chosen": 0.06914080679416656, "rewards/margins": 0.05962265655398369, "rewards/rejected": 0.009518155828118324, "step": 1150 }, { "epoch": 0.08, "learning_rate": 3.7933289731850887e-06, "logits/chosen": -3.080824851989746, "logits/rejected": -3.029308557510376, "logps/chosen": -468.10479736328125, "logps/rejected": -330.76226806640625, "loss": 0.6648, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.09183225780725479, "rewards/margins": 0.09566021710634232, "rewards/rejected": -0.0038279606960713863, "step": 1160 }, { "epoch": 0.08, "learning_rate": 3.826030085022891e-06, "logits/chosen": -2.990856647491455, "logits/rejected": -3.072279453277588, "logps/chosen": -289.534423828125, "logps/rejected": -258.0203857421875, "loss": 0.6807, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0335356742143631, "rewards/margins": 0.013129748404026031, "rewards/rejected": 0.020405925810337067, "step": 1170 }, { "epoch": 0.08, "learning_rate": 3.858731196860693e-06, "logits/chosen": -3.14605975151062, "logits/rejected": -3.1897926330566406, "logps/chosen": -330.21356201171875, "logps/rejected": -374.36376953125, "loss": 0.6679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.057777903974056244, "rewards/margins": 0.040721334517002106, "rewards/rejected": 0.017056571319699287, "step": 1180 }, { "epoch": 0.08, "learning_rate": 3.891432308698496e-06, "logits/chosen": -3.0566301345825195, "logits/rejected": -2.830443859100342, "logps/chosen": -398.5063171386719, "logps/rejected": -262.1009521484375, "loss": 0.6651, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.07374347746372223, "rewards/margins": 0.08343470841646194, "rewards/rejected": -0.009691240265965462, "step": 1190 }, { "epoch": 0.08, "learning_rate": 3.924133420536299e-06, "logits/chosen": -2.9240617752075195, "logits/rejected": -2.9756927490234375, "logps/chosen": -292.318359375, "logps/rejected": -272.2164001464844, "loss": 0.6574, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.04478208348155022, "rewards/margins": 0.09882897138595581, "rewards/rejected": -0.054046887904405594, "step": 1200 }, { "epoch": 0.08, "learning_rate": 3.956834532374101e-06, "logits/chosen": -2.7908530235290527, "logits/rejected": -2.7424261569976807, "logps/chosen": -262.8086853027344, "logps/rejected": -338.43011474609375, "loss": 0.6779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.008423469960689545, "rewards/margins": 0.039038076996803284, "rewards/rejected": -0.04746154695749283, "step": 1210 }, { "epoch": 0.08, "learning_rate": 3.989535644211904e-06, "logits/chosen": -3.07283878326416, "logits/rejected": -3.0677459239959717, "logps/chosen": -522.1658935546875, "logps/rejected": -308.08135986328125, "loss": 0.6565, "rewards/accuracies": 0.75, "rewards/chosen": 0.06819725036621094, "rewards/margins": 0.12000145018100739, "rewards/rejected": -0.051804203540086746, "step": 1220 }, { "epoch": 0.08, "learning_rate": 4.022236756049706e-06, "logits/chosen": -3.0104000568389893, "logits/rejected": -2.896672248840332, "logps/chosen": -339.0327453613281, "logps/rejected": -285.937744140625, "loss": 0.6618, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.02467339299619198, "rewards/margins": 0.024880170822143555, "rewards/rejected": -0.00020677558495663106, "step": 1230 }, { "epoch": 0.08, "learning_rate": 4.054937867887509e-06, "logits/chosen": -2.83773136138916, "logits/rejected": -2.8805081844329834, "logps/chosen": -299.5276794433594, "logps/rejected": -322.9584655761719, "loss": 0.6599, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.012974822893738747, "rewards/margins": 0.0379905179142952, "rewards/rejected": -0.0250156931579113, "step": 1240 }, { "epoch": 0.08, "learning_rate": 4.087638979725311e-06, "logits/chosen": -2.9484379291534424, "logits/rejected": -2.947218418121338, "logps/chosen": -354.7485046386719, "logps/rejected": -372.19720458984375, "loss": 0.6664, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0066092670895159245, "rewards/margins": 0.057695530354976654, "rewards/rejected": -0.051086265593767166, "step": 1250 }, { "epoch": 0.08, "learning_rate": 4.1203400915631135e-06, "logits/chosen": -2.855781078338623, "logits/rejected": -2.8165206909179688, "logps/chosen": -251.22183227539062, "logps/rejected": -235.73709106445312, "loss": 0.6698, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.011250384151935577, "rewards/margins": 0.05445141717791557, "rewards/rejected": -0.043201036751270294, "step": 1260 }, { "epoch": 0.08, "learning_rate": 4.153041203400916e-06, "logits/chosen": -3.0679378509521484, "logits/rejected": -2.857640504837036, "logps/chosen": -346.5669860839844, "logps/rejected": -250.6300506591797, "loss": 0.6505, "rewards/accuracies": 0.75, "rewards/chosen": 0.043904975056648254, "rewards/margins": 0.10966650396585464, "rewards/rejected": -0.06576154381036758, "step": 1270 }, { "epoch": 0.08, "learning_rate": 4.185742315238718e-06, "logits/chosen": -2.9014155864715576, "logits/rejected": -3.046358585357666, "logps/chosen": -319.28717041015625, "logps/rejected": -307.1815490722656, "loss": 0.6549, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1001446470618248, "rewards/margins": 0.11425294727087021, "rewards/rejected": -0.014108309522271156, "step": 1280 }, { "epoch": 0.08, "learning_rate": 4.218443427076521e-06, "logits/chosen": -2.9250168800354004, "logits/rejected": -2.9453256130218506, "logps/chosen": -322.75653076171875, "logps/rejected": -365.05224609375, "loss": 0.673, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.030087579041719437, "rewards/margins": 0.05422119423747063, "rewards/rejected": -0.024133607745170593, "step": 1290 }, { "epoch": 0.09, "learning_rate": 4.251144538914323e-06, "logits/chosen": -3.013686418533325, "logits/rejected": -3.0016348361968994, "logps/chosen": -302.3190002441406, "logps/rejected": -265.59869384765625, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": 0.006552050355821848, "rewards/margins": 0.08697822690010071, "rewards/rejected": -0.0804261788725853, "step": 1300 }, { "epoch": 0.09, "learning_rate": 4.283845650752126e-06, "logits/chosen": -3.0734734535217285, "logits/rejected": -3.084721803665161, "logps/chosen": -247.04159545898438, "logps/rejected": -196.4541473388672, "loss": 0.6785, "rewards/accuracies": 0.5, "rewards/chosen": 0.0005421757814474404, "rewards/margins": 0.04514998197555542, "rewards/rejected": -0.04460780695080757, "step": 1310 }, { "epoch": 0.09, "learning_rate": 4.316546762589928e-06, "logits/chosen": -3.0271167755126953, "logits/rejected": -3.0676238536834717, "logps/chosen": -274.3728942871094, "logps/rejected": -241.3634033203125, "loss": 0.6439, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.006143507547676563, "rewards/margins": 0.06333483010530472, "rewards/rejected": -0.057191312313079834, "step": 1320 }, { "epoch": 0.09, "learning_rate": 4.349247874427731e-06, "logits/chosen": -2.9338698387145996, "logits/rejected": -2.834819793701172, "logps/chosen": -179.49215698242188, "logps/rejected": -217.63473510742188, "loss": 0.6657, "rewards/accuracies": 0.75, "rewards/chosen": 0.00903988629579544, "rewards/margins": 0.05287107825279236, "rewards/rejected": -0.04383119195699692, "step": 1330 }, { "epoch": 0.09, "learning_rate": 4.381948986265534e-06, "logits/chosen": -2.994556427001953, "logits/rejected": -3.03247332572937, "logps/chosen": -291.14678955078125, "logps/rejected": -273.00640869140625, "loss": 0.6645, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.05871891975402832, "rewards/margins": 0.06984923779964447, "rewards/rejected": -0.011130331084132195, "step": 1340 }, { "epoch": 0.09, "learning_rate": 4.414650098103336e-06, "logits/chosen": -3.00968074798584, "logits/rejected": -3.035393238067627, "logps/chosen": -214.24575805664062, "logps/rejected": -244.91378784179688, "loss": 0.6518, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.05464862659573555, "rewards/margins": 0.08675499260425568, "rewards/rejected": -0.03210636228322983, "step": 1350 }, { "epoch": 0.09, "learning_rate": 4.447351209941138e-06, "logits/chosen": -2.6517372131347656, "logits/rejected": -2.600365161895752, "logps/chosen": -292.5060119628906, "logps/rejected": -240.5036163330078, "loss": 0.6443, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04661313816905022, "rewards/margins": 0.09707652777433395, "rewards/rejected": -0.05046338960528374, "step": 1360 }, { "epoch": 0.09, "learning_rate": 4.480052321778941e-06, "logits/chosen": -2.8098044395446777, "logits/rejected": -2.916383981704712, "logps/chosen": -235.16897583007812, "logps/rejected": -230.39395141601562, "loss": 0.6535, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04006228223443031, "rewards/margins": 0.0514717698097229, "rewards/rejected": -0.09153404831886292, "step": 1370 }, { "epoch": 0.09, "learning_rate": 4.5127534336167435e-06, "logits/chosen": -2.9116311073303223, "logits/rejected": -2.8839030265808105, "logps/chosen": -372.9744873046875, "logps/rejected": -271.71209716796875, "loss": 0.6497, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04008869081735611, "rewards/margins": 0.08455850183963776, "rewards/rejected": -0.12464718520641327, "step": 1380 }, { "epoch": 0.09, "learning_rate": 4.5454545454545455e-06, "logits/chosen": -2.9106247425079346, "logits/rejected": -2.7901999950408936, "logps/chosen": -322.4967346191406, "logps/rejected": -275.24359130859375, "loss": 0.6587, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.011935449205338955, "rewards/margins": 0.08074381202459335, "rewards/rejected": -0.09267926216125488, "step": 1390 }, { "epoch": 0.09, "learning_rate": 4.578155657292348e-06, "logits/chosen": -2.784376621246338, "logits/rejected": -2.7655889987945557, "logps/chosen": -238.74771118164062, "logps/rejected": -277.7735290527344, "loss": 0.6659, "rewards/accuracies": 0.75, "rewards/chosen": -0.01576855033636093, "rewards/margins": 0.09559451043605804, "rewards/rejected": -0.11136305332183838, "step": 1400 }, { "epoch": 0.09, "learning_rate": 4.610856769130151e-06, "logits/chosen": -2.8557660579681396, "logits/rejected": -2.841609477996826, "logps/chosen": -293.3002624511719, "logps/rejected": -261.0894470214844, "loss": 0.6288, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0980224460363388, "rewards/margins": 0.13837285339832306, "rewards/rejected": -0.04035038873553276, "step": 1410 }, { "epoch": 0.09, "learning_rate": 4.643557880967953e-06, "logits/chosen": -2.902876138687134, "logits/rejected": -2.8389651775360107, "logps/chosen": -301.04254150390625, "logps/rejected": -238.2169189453125, "loss": 0.6573, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.08447854965925217, "rewards/margins": 0.1570580005645752, "rewards/rejected": -0.07257945090532303, "step": 1420 }, { "epoch": 0.09, "learning_rate": 4.676258992805755e-06, "logits/chosen": -2.901109457015991, "logits/rejected": -2.942526340484619, "logps/chosen": -257.54705810546875, "logps/rejected": -282.9903259277344, "loss": 0.6491, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.004095321986824274, "rewards/margins": 0.07804937660694122, "rewards/rejected": -0.08214469999074936, "step": 1430 }, { "epoch": 0.09, "learning_rate": 4.708960104643558e-06, "logits/chosen": -2.820150852203369, "logits/rejected": -2.955181837081909, "logps/chosen": -300.74249267578125, "logps/rejected": -280.85546875, "loss": 0.636, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.032780297100543976, "rewards/margins": 0.1143231987953186, "rewards/rejected": -0.08154290169477463, "step": 1440 }, { "epoch": 0.09, "learning_rate": 4.741661216481361e-06, "logits/chosen": -2.9988656044006348, "logits/rejected": -3.0337607860565186, "logps/chosen": -321.4072570800781, "logps/rejected": -303.68927001953125, "loss": 0.6386, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.040198661386966705, "rewards/margins": 0.09094201028347015, "rewards/rejected": -0.13114067912101746, "step": 1450 }, { "epoch": 0.1, "learning_rate": 4.774362328319163e-06, "logits/chosen": -2.9808907508850098, "logits/rejected": -2.916173219680786, "logps/chosen": -385.5293884277344, "logps/rejected": -329.20361328125, "loss": 0.6366, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.050022225826978683, "rewards/margins": 0.1688729077577591, "rewards/rejected": -0.21889512240886688, "step": 1460 }, { "epoch": 0.1, "learning_rate": 4.807063440156966e-06, "logits/chosen": -2.7928836345672607, "logits/rejected": -2.7777328491210938, "logps/chosen": -277.9568786621094, "logps/rejected": -321.45867919921875, "loss": 0.6426, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10680846124887466, "rewards/margins": 0.09730565547943115, "rewards/rejected": -0.20411410927772522, "step": 1470 }, { "epoch": 0.1, "learning_rate": 4.839764551994769e-06, "logits/chosen": -2.968703269958496, "logits/rejected": -2.9341697692871094, "logps/chosen": -387.0003356933594, "logps/rejected": -307.2288513183594, "loss": 0.6598, "rewards/accuracies": 0.75, "rewards/chosen": -0.08723671734333038, "rewards/margins": 0.09109548479318619, "rewards/rejected": -0.17833219468593597, "step": 1480 }, { "epoch": 0.1, "learning_rate": 4.872465663832571e-06, "logits/chosen": -2.7156434059143066, "logits/rejected": -2.7810118198394775, "logps/chosen": -306.45513916015625, "logps/rejected": -310.51678466796875, "loss": 0.644, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07703594118356705, "rewards/margins": 0.19709235429763794, "rewards/rejected": -0.2741282880306244, "step": 1490 }, { "epoch": 0.1, "learning_rate": 4.905166775670373e-06, "logits/chosen": -2.6941978931427, "logits/rejected": -2.7313380241394043, "logps/chosen": -267.1389465332031, "logps/rejected": -262.7569580078125, "loss": 0.6278, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.120394766330719, "rewards/margins": 0.12228628247976303, "rewards/rejected": -0.24268105626106262, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.9378678875081756e-06, "logits/chosen": -2.829664945602417, "logits/rejected": -2.835359573364258, "logps/chosen": -191.07113647460938, "logps/rejected": -161.7421875, "loss": 0.6533, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02066131681203842, "rewards/margins": 0.06928695738315582, "rewards/rejected": -0.08994828164577484, "step": 1510 }, { "epoch": 0.1, "learning_rate": 4.9705689993459784e-06, "logits/chosen": -2.724827527999878, "logits/rejected": -2.694981813430786, "logps/chosen": -266.87750244140625, "logps/rejected": -233.462890625, "loss": 0.6835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0038128546439111233, "rewards/margins": 0.025530481711030006, "rewards/rejected": -0.02171761728823185, "step": 1520 }, { "epoch": 0.1, "learning_rate": 4.999999934793849e-06, "logits/chosen": -2.8814034461975098, "logits/rejected": -2.866196870803833, "logps/chosen": -244.37026977539062, "logps/rejected": -221.701416015625, "loss": 0.6557, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.006678455974906683, "rewards/margins": 0.09985242784023285, "rewards/rejected": -0.09317396581172943, "step": 1530 }, { "epoch": 0.1, "learning_rate": 4.999992110059814e-06, "logits/chosen": -2.891392469406128, "logits/rejected": -2.858283519744873, "logps/chosen": -229.2855682373047, "logps/rejected": -294.8144226074219, "loss": 0.6374, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03305545449256897, "rewards/margins": 0.15080586075782776, "rewards/rejected": -0.18386133015155792, "step": 1540 }, { "epoch": 0.1, "learning_rate": 4.999971244142299e-06, "logits/chosen": -2.71058988571167, "logits/rejected": -2.7885243892669678, "logps/chosen": -405.15472412109375, "logps/rejected": -338.3912658691406, "loss": 0.6308, "rewards/accuracies": 0.75, "rewards/chosen": -0.047473885118961334, "rewards/margins": 0.2326829433441162, "rewards/rejected": -0.28015682101249695, "step": 1550 }, { "epoch": 0.1, "learning_rate": 4.999937337150149e-06, "logits/chosen": -2.848661422729492, "logits/rejected": -2.7998948097229004, "logps/chosen": -347.9453125, "logps/rejected": -438.14276123046875, "loss": 0.6659, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.180062934756279, "rewards/margins": 0.05969248339533806, "rewards/rejected": -0.23975543677806854, "step": 1560 }, { "epoch": 0.1, "learning_rate": 4.99989038926024e-06, "logits/chosen": -2.8489651679992676, "logits/rejected": -2.834139347076416, "logps/chosen": -301.15411376953125, "logps/rejected": -357.87396240234375, "loss": 0.6506, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.01634044013917446, "rewards/margins": 0.14033670723438263, "rewards/rejected": -0.15667715668678284, "step": 1570 }, { "epoch": 0.1, "learning_rate": 4.999830400717476e-06, "logits/chosen": -2.781190872192383, "logits/rejected": -2.6491172313690186, "logps/chosen": -293.2514343261719, "logps/rejected": -223.9866943359375, "loss": 0.6801, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11608616262674332, "rewards/margins": 0.05661296099424362, "rewards/rejected": -0.17269913852214813, "step": 1580 }, { "epoch": 0.1, "learning_rate": 4.999757371834787e-06, "logits/chosen": -2.872457981109619, "logits/rejected": -2.804356098175049, "logps/chosen": -435.396484375, "logps/rejected": -372.4584045410156, "loss": 0.6367, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03728240728378296, "rewards/margins": 0.13812865316867828, "rewards/rejected": -0.17541104555130005, "step": 1590 }, { "epoch": 0.1, "learning_rate": 4.999671302993125e-06, "logits/chosen": -2.895263195037842, "logits/rejected": -2.8939125537872314, "logps/chosen": -387.8405456542969, "logps/rejected": -375.2184753417969, "loss": 0.6226, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17230334877967834, "rewards/margins": 0.1778700351715088, "rewards/rejected": -0.35017335414886475, "step": 1600 }, { "epoch": 0.11, "learning_rate": 4.999572194641471e-06, "logits/chosen": -3.000087261199951, "logits/rejected": -2.9178078174591064, "logps/chosen": -296.35284423828125, "logps/rejected": -271.0105285644531, "loss": 0.6437, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2482984960079193, "rewards/margins": 0.037916336208581924, "rewards/rejected": -0.28621482849121094, "step": 1610 }, { "epoch": 0.11, "learning_rate": 4.999460047296819e-06, "logits/chosen": -2.7535433769226074, "logits/rejected": -2.7120440006256104, "logps/chosen": -205.52490234375, "logps/rejected": -217.80032348632812, "loss": 0.663, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2501955032348633, "rewards/margins": -0.028146442025899887, "rewards/rejected": -0.22204908728599548, "step": 1620 }, { "epoch": 0.11, "learning_rate": 4.999334861544186e-06, "logits/chosen": -3.0465235710144043, "logits/rejected": -2.8982014656066895, "logps/chosen": -386.46673583984375, "logps/rejected": -325.98284912109375, "loss": 0.6366, "rewards/accuracies": 0.75, "rewards/chosen": -0.013998913578689098, "rewards/margins": 0.1589469611644745, "rewards/rejected": -0.17294588685035706, "step": 1630 }, { "epoch": 0.11, "learning_rate": 4.999196638036604e-06, "logits/chosen": -2.8301024436950684, "logits/rejected": -2.880171537399292, "logps/chosen": -243.76687622070312, "logps/rejected": -196.94740295410156, "loss": 0.648, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.04583977907896042, "rewards/margins": 0.11452523618936539, "rewards/rejected": -0.06868544965982437, "step": 1640 }, { "epoch": 0.11, "learning_rate": 4.999045377495111e-06, "logits/chosen": -3.003966808319092, "logits/rejected": -2.8065781593322754, "logps/chosen": -378.6763610839844, "logps/rejected": -311.1116027832031, "loss": 0.6677, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.014937293715775013, "rewards/margins": 0.05808718875050545, "rewards/rejected": -0.04314989596605301, "step": 1650 }, { "epoch": 0.11, "learning_rate": 4.998881080708759e-06, "logits/chosen": -2.5581583976745605, "logits/rejected": -2.461325168609619, "logps/chosen": -231.7208251953125, "logps/rejected": -194.53955078125, "loss": 0.648, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.10793449729681015, "rewards/margins": 0.05950494855642319, "rewards/rejected": 0.04842953756451607, "step": 1660 }, { "epoch": 0.11, "learning_rate": 4.998703748534599e-06, "logits/chosen": -2.7714176177978516, "logits/rejected": -2.815610885620117, "logps/chosen": -244.06005859375, "logps/rejected": -254.1628875732422, "loss": 0.6597, "rewards/accuracies": 0.75, "rewards/chosen": 0.10158028453588486, "rewards/margins": 0.06951704621315002, "rewards/rejected": 0.03206322342157364, "step": 1670 }, { "epoch": 0.11, "learning_rate": 4.998513381897683e-06, "logits/chosen": -2.9440979957580566, "logits/rejected": -2.7861945629119873, "logps/chosen": -403.1971130371094, "logps/rejected": -281.72564697265625, "loss": 0.6469, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.12582075595855713, "rewards/margins": 0.0909295454621315, "rewards/rejected": 0.03489120304584503, "step": 1680 }, { "epoch": 0.11, "learning_rate": 4.9983099817910565e-06, "logits/chosen": -2.793125867843628, "logits/rejected": -2.858919143676758, "logps/chosen": -347.7610778808594, "logps/rejected": -266.6481018066406, "loss": 0.6366, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.04975166171789169, "rewards/margins": 0.13270191848278046, "rewards/rejected": -0.08295025676488876, "step": 1690 }, { "epoch": 0.11, "learning_rate": 4.998093549275754e-06, "logits/chosen": -2.871772527694702, "logits/rejected": -2.8467118740081787, "logps/chosen": -377.32354736328125, "logps/rejected": -303.02301025390625, "loss": 0.6364, "rewards/accuracies": 0.75, "rewards/chosen": 0.06866715848445892, "rewards/margins": 0.15883667767047882, "rewards/rejected": -0.0901695117354393, "step": 1700 }, { "epoch": 0.11, "learning_rate": 4.997864085480794e-06, "logits/chosen": -2.9470508098602295, "logits/rejected": -2.959531307220459, "logps/chosen": -290.06195068359375, "logps/rejected": -299.32269287109375, "loss": 0.625, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.04539639502763748, "rewards/margins": 0.19093555212020874, "rewards/rejected": -0.14553913474082947, "step": 1710 }, { "epoch": 0.11, "learning_rate": 4.997621591603171e-06, "logits/chosen": -2.82224702835083, "logits/rejected": -2.720681667327881, "logps/chosen": -348.3783264160156, "logps/rejected": -381.4719543457031, "loss": 0.6764, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.027834400534629822, "rewards/margins": 0.12895603477954865, "rewards/rejected": -0.15679042041301727, "step": 1720 }, { "epoch": 0.11, "learning_rate": 4.997366068907853e-06, "logits/chosen": -2.795383930206299, "logits/rejected": -2.7165658473968506, "logps/chosen": -359.398193359375, "logps/rejected": -324.2231140136719, "loss": 0.6247, "rewards/accuracies": 0.75, "rewards/chosen": -0.011148473247885704, "rewards/margins": 0.14630728960037231, "rewards/rejected": -0.15745574235916138, "step": 1730 }, { "epoch": 0.11, "learning_rate": 4.997097518727771e-06, "logits/chosen": -2.8241539001464844, "logits/rejected": -2.837796211242676, "logps/chosen": -300.73577880859375, "logps/rejected": -249.60818481445312, "loss": 0.6365, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.07024487107992172, "rewards/margins": 0.11239133030176163, "rewards/rejected": -0.18263621628284454, "step": 1740 }, { "epoch": 0.11, "learning_rate": 4.9968159424638155e-06, "logits/chosen": -2.8376364707946777, "logits/rejected": -2.9506516456604004, "logps/chosen": -358.78302001953125, "logps/rejected": -407.3536071777344, "loss": 0.693, "rewards/accuracies": 0.75, "rewards/chosen": -0.07075332850217819, "rewards/margins": 0.07305476069450378, "rewards/rejected": -0.14380808174610138, "step": 1750 }, { "epoch": 0.12, "learning_rate": 4.9965213415848235e-06, "logits/chosen": -2.9004623889923096, "logits/rejected": -2.792384624481201, "logps/chosen": -309.20452880859375, "logps/rejected": -258.9640808105469, "loss": 0.6135, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08543910086154938, "rewards/margins": 0.14936544001102448, "rewards/rejected": -0.0639263391494751, "step": 1760 }, { "epoch": 0.12, "learning_rate": 4.9962137176275805e-06, "logits/chosen": -2.7845258712768555, "logits/rejected": -2.6718716621398926, "logps/chosen": -197.41293334960938, "logps/rejected": -231.66928100585938, "loss": 0.6562, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.14296093583106995, "rewards/margins": 0.001989279640838504, "rewards/rejected": -0.14495019614696503, "step": 1770 }, { "epoch": 0.12, "learning_rate": 4.9958930721968015e-06, "logits/chosen": -2.9959325790405273, "logits/rejected": -2.9684629440307617, "logps/chosen": -443.68572998046875, "logps/rejected": -327.4826965332031, "loss": 0.6618, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.020464541390538216, "rewards/margins": 0.18904823064804077, "rewards/rejected": -0.1685837060213089, "step": 1780 }, { "epoch": 0.12, "learning_rate": 4.995559406965132e-06, "logits/chosen": -2.945493221282959, "logits/rejected": -2.8600668907165527, "logps/chosen": -390.383056640625, "logps/rejected": -363.43438720703125, "loss": 0.6388, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.10935555398464203, "rewards/margins": 0.17368203401565552, "rewards/rejected": -0.28303760290145874, "step": 1790 }, { "epoch": 0.12, "learning_rate": 4.995212723673131e-06, "logits/chosen": -2.7073473930358887, "logits/rejected": -2.727189779281616, "logps/chosen": -297.14459228515625, "logps/rejected": -357.22979736328125, "loss": 0.6592, "rewards/accuracies": 0.5, "rewards/chosen": -0.19449344277381897, "rewards/margins": 0.01761249266564846, "rewards/rejected": -0.21210594475269318, "step": 1800 }, { "epoch": 0.12, "learning_rate": 4.99485302412927e-06, "logits/chosen": -2.6889750957489014, "logits/rejected": -2.811795711517334, "logps/chosen": -260.56134033203125, "logps/rejected": -371.4437561035156, "loss": 0.6356, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0422750785946846, "rewards/margins": 0.16190145909786224, "rewards/rejected": -0.20417654514312744, "step": 1810 }, { "epoch": 0.12, "learning_rate": 4.994480310209918e-06, "logits/chosen": -2.8943614959716797, "logits/rejected": -2.7965211868286133, "logps/chosen": -372.2635192871094, "logps/rejected": -317.09405517578125, "loss": 0.6225, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.16946710646152496, "rewards/margins": 0.18853087723255157, "rewards/rejected": -0.3579980134963989, "step": 1820 }, { "epoch": 0.12, "learning_rate": 4.994094583859332e-06, "logits/chosen": -2.867899179458618, "logits/rejected": -2.7022008895874023, "logps/chosen": -289.0708923339844, "logps/rejected": -269.9219665527344, "loss": 0.6542, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2944765090942383, "rewards/margins": 0.11893720924854279, "rewards/rejected": -0.4134137034416199, "step": 1830 }, { "epoch": 0.12, "learning_rate": 4.9936958470896525e-06, "logits/chosen": -2.9204633235931396, "logits/rejected": -2.831799268722534, "logps/chosen": -472.00341796875, "logps/rejected": -395.9357604980469, "loss": 0.6416, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.12566198408603668, "rewards/margins": 0.23625274002552032, "rewards/rejected": -0.3619146943092346, "step": 1840 }, { "epoch": 0.12, "learning_rate": 4.993284101980883e-06, "logits/chosen": -2.9902560710906982, "logits/rejected": -2.863290309906006, "logps/chosen": -338.646484375, "logps/rejected": -336.25482177734375, "loss": 0.6014, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1826198399066925, "rewards/margins": 0.1706969141960144, "rewards/rejected": -0.3533167243003845, "step": 1850 }, { "epoch": 0.12, "learning_rate": 4.9928593506808885e-06, "logits/chosen": -2.854135751724243, "logits/rejected": -2.769709587097168, "logps/chosen": -302.7193298339844, "logps/rejected": -304.617919921875, "loss": 0.6377, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.024336855858564377, "rewards/margins": 0.23395895957946777, "rewards/rejected": -0.2096220999956131, "step": 1860 }, { "epoch": 0.12, "learning_rate": 4.992421595405381e-06, "logits/chosen": -2.931744337081909, "logits/rejected": -2.862643241882324, "logps/chosen": -318.87103271484375, "logps/rejected": -319.90106201171875, "loss": 0.602, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0002502381685189903, "rewards/margins": 0.25090909004211426, "rewards/rejected": -0.2506588399410248, "step": 1870 }, { "epoch": 0.12, "learning_rate": 4.991970838437905e-06, "logits/chosen": -2.8745996952056885, "logits/rejected": -2.702986717224121, "logps/chosen": -230.1893310546875, "logps/rejected": -264.86920166015625, "loss": 0.6542, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.12010681629180908, "rewards/margins": 0.02200702764093876, "rewards/rejected": -0.1421138495206833, "step": 1880 }, { "epoch": 0.12, "learning_rate": 4.9915070821298294e-06, "logits/chosen": -2.8845481872558594, "logits/rejected": -2.8821284770965576, "logps/chosen": -284.79052734375, "logps/rejected": -272.1836853027344, "loss": 0.6228, "rewards/accuracies": 0.75, "rewards/chosen": 0.11445657163858414, "rewards/margins": 0.268448144197464, "rewards/rejected": -0.15399155020713806, "step": 1890 }, { "epoch": 0.12, "learning_rate": 4.991030328900336e-06, "logits/chosen": -2.974465847015381, "logits/rejected": -2.8674521446228027, "logps/chosen": -328.77899169921875, "logps/rejected": -249.0155792236328, "loss": 0.62, "rewards/accuracies": 0.75, "rewards/chosen": 0.1050812155008316, "rewards/margins": 0.21933884918689728, "rewards/rejected": -0.11425761878490448, "step": 1900 }, { "epoch": 0.12, "learning_rate": 4.9905405812364014e-06, "logits/chosen": -2.613386631011963, "logits/rejected": -2.7967910766601562, "logps/chosen": -275.4242858886719, "logps/rejected": -352.1298828125, "loss": 0.6447, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.11817902326583862, "rewards/margins": 0.15935374796390533, "rewards/rejected": -0.27753275632858276, "step": 1910 }, { "epoch": 0.13, "learning_rate": 4.990037841692791e-06, "logits/chosen": -2.7451908588409424, "logits/rejected": -2.581724166870117, "logps/chosen": -424.11279296875, "logps/rejected": -315.0185546875, "loss": 0.6417, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.0610206238925457, "rewards/margins": 0.1065296158194542, "rewards/rejected": -0.1675502508878708, "step": 1920 }, { "epoch": 0.13, "learning_rate": 4.989522112892039e-06, "logits/chosen": -2.661689281463623, "logits/rejected": -2.701464891433716, "logps/chosen": -270.7320861816406, "logps/rejected": -265.72564697265625, "loss": 0.5975, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.021038547158241272, "rewards/margins": 0.22835056483745575, "rewards/rejected": -0.20731201767921448, "step": 1930 }, { "epoch": 0.13, "learning_rate": 4.98899339752444e-06, "logits/chosen": -2.7815101146698, "logits/rejected": -2.878944158554077, "logps/chosen": -288.7119445800781, "logps/rejected": -282.08367919921875, "loss": 0.6303, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.1159503310918808, "rewards/margins": 0.09114055335521698, "rewards/rejected": -0.20709089934825897, "step": 1940 }, { "epoch": 0.13, "learning_rate": 4.988451698348033e-06, "logits/chosen": -2.9348340034484863, "logits/rejected": -3.0299041271209717, "logps/chosen": -339.9063415527344, "logps/rejected": -295.8576354980469, "loss": 0.6519, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05489187315106392, "rewards/margins": 0.06054762750864029, "rewards/rejected": -0.1154395118355751, "step": 1950 }, { "epoch": 0.13, "learning_rate": 4.987897018188585e-06, "logits/chosen": -2.618508815765381, "logits/rejected": -2.4888291358947754, "logps/chosen": -310.83551025390625, "logps/rejected": -265.2162170410156, "loss": 0.612, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.14666445553302765, "rewards/margins": 0.09787283092737198, "rewards/rejected": -0.24453727900981903, "step": 1960 }, { "epoch": 0.13, "learning_rate": 4.9873293599395814e-06, "logits/chosen": -2.6340200901031494, "logits/rejected": -2.6515004634857178, "logps/chosen": -306.9834899902344, "logps/rejected": -294.7528076171875, "loss": 0.6432, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10474874824285507, "rewards/margins": 0.20273160934448242, "rewards/rejected": -0.3074803352355957, "step": 1970 }, { "epoch": 0.13, "learning_rate": 4.986748726562203e-06, "logits/chosen": -2.706296682357788, "logits/rejected": -2.8979294300079346, "logps/chosen": -290.6112365722656, "logps/rejected": -338.37713623046875, "loss": 0.5807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03407129645347595, "rewards/margins": 0.20913465321063995, "rewards/rejected": -0.2432059496641159, "step": 1980 }, { "epoch": 0.13, "learning_rate": 4.98615512108532e-06, "logits/chosen": -2.7610926628112793, "logits/rejected": -2.870851993560791, "logps/chosen": -272.8517150878906, "logps/rejected": -366.29388427734375, "loss": 0.6786, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.01706174574792385, "rewards/margins": 0.029359757900238037, "rewards/rejected": -0.046421509236097336, "step": 1990 }, { "epoch": 0.13, "learning_rate": 4.985548546605469e-06, "logits/chosen": -2.651104688644409, "logits/rejected": -2.755127191543579, "logps/chosen": -310.54302978515625, "logps/rejected": -329.9986877441406, "loss": 0.6424, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.09717954695224762, "rewards/margins": 0.07522216439247131, "rewards/rejected": 0.02195737510919571, "step": 2000 }, { "epoch": 0.13, "learning_rate": 4.984929006286838e-06, "logits/chosen": -2.8356099128723145, "logits/rejected": -2.765662670135498, "logps/chosen": -339.5566101074219, "logps/rejected": -300.8033142089844, "loss": 0.6369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07670809328556061, "rewards/margins": 0.14976195991039276, "rewards/rejected": -0.07305385172367096, "step": 2010 }, { "epoch": 0.13, "learning_rate": 4.984296503361256e-06, "logits/chosen": -2.683217763900757, "logits/rejected": -2.7624807357788086, "logps/chosen": -248.154541015625, "logps/rejected": -254.16848754882812, "loss": 0.5923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.08094163239002228, "rewards/margins": 0.11836649477481842, "rewards/rejected": -0.037424858659505844, "step": 2020 }, { "epoch": 0.13, "learning_rate": 4.9836510411281645e-06, "logits/chosen": -2.602567195892334, "logits/rejected": -2.7688286304473877, "logps/chosen": -282.9025573730469, "logps/rejected": -257.9793395996094, "loss": 0.5823, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.016858983784914017, "rewards/margins": 0.3600960373878479, "rewards/rejected": -0.3432370126247406, "step": 2030 }, { "epoch": 0.13, "learning_rate": 4.982992622954613e-06, "logits/chosen": -2.650596857070923, "logits/rejected": -2.770486354827881, "logps/chosen": -414.5718688964844, "logps/rejected": -341.5304260253906, "loss": 0.6332, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.22947485744953156, "rewards/margins": 0.11173935234546661, "rewards/rejected": -0.34121423959732056, "step": 2040 }, { "epoch": 0.13, "learning_rate": 4.9823212522752325e-06, "logits/chosen": -2.7033188343048096, "logits/rejected": -2.6886463165283203, "logps/chosen": -316.0194396972656, "logps/rejected": -309.9945373535156, "loss": 0.6379, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08541987091302872, "rewards/margins": 0.1773550808429718, "rewards/rejected": -0.26277491450309753, "step": 2050 }, { "epoch": 0.13, "learning_rate": 4.981636932592222e-06, "logits/chosen": -2.928769826889038, "logits/rejected": -2.890620470046997, "logps/chosen": -458.3501892089844, "logps/rejected": -394.8435363769531, "loss": 0.607, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03149319440126419, "rewards/margins": 0.18963074684143066, "rewards/rejected": -0.15813757479190826, "step": 2060 }, { "epoch": 0.14, "learning_rate": 4.980939667475328e-06, "logits/chosen": -2.7920684814453125, "logits/rejected": -2.76005220413208, "logps/chosen": -267.3896484375, "logps/rejected": -304.2720947265625, "loss": 0.632, "rewards/accuracies": 0.75, "rewards/chosen": -0.17286111414432526, "rewards/margins": 0.17540349066257477, "rewards/rejected": -0.3482646048069, "step": 2070 }, { "epoch": 0.14, "learning_rate": 4.980229460561826e-06, "logits/chosen": -2.9371695518493652, "logits/rejected": -2.8353123664855957, "logps/chosen": -366.78912353515625, "logps/rejected": -339.2974548339844, "loss": 0.6588, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09282257407903671, "rewards/margins": 0.1689838320016861, "rewards/rejected": -0.2618064284324646, "step": 2080 }, { "epoch": 0.14, "learning_rate": 4.979506315556503e-06, "logits/chosen": -2.775759220123291, "logits/rejected": -2.7336509227752686, "logps/chosen": -212.32644653320312, "logps/rejected": -204.06906127929688, "loss": 0.5562, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02957214042544365, "rewards/margins": 0.33536648750305176, "rewards/rejected": -0.3649386167526245, "step": 2090 }, { "epoch": 0.14, "learning_rate": 4.9787702362316395e-06, "logits/chosen": -2.7911858558654785, "logits/rejected": -2.674790859222412, "logps/chosen": -354.57379150390625, "logps/rejected": -257.1429138183594, "loss": 0.6317, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.06640686839818954, "rewards/margins": 0.14485633373260498, "rewards/rejected": -0.21126320958137512, "step": 2100 }, { "epoch": 0.14, "learning_rate": 4.9780212264269835e-06, "logits/chosen": -2.5965216159820557, "logits/rejected": -2.6920247077941895, "logps/chosen": -236.93283081054688, "logps/rejected": -212.55197143554688, "loss": 0.6517, "rewards/accuracies": 0.75, "rewards/chosen": -0.06051822379231453, "rewards/margins": 0.05328022688627243, "rewards/rejected": -0.11379845440387726, "step": 2110 }, { "epoch": 0.14, "learning_rate": 4.977259290049739e-06, "logits/chosen": -2.8331427574157715, "logits/rejected": -2.8146824836730957, "logps/chosen": -298.81573486328125, "logps/rejected": -366.21905517578125, "loss": 0.6427, "rewards/accuracies": 0.75, "rewards/chosen": 0.03964535892009735, "rewards/margins": 0.1442907154560089, "rewards/rejected": -0.10464537143707275, "step": 2120 }, { "epoch": 0.14, "learning_rate": 4.976484431074538e-06, "logits/chosen": -2.7439489364624023, "logits/rejected": -2.6999521255493164, "logps/chosen": -335.0853576660156, "logps/rejected": -311.0948486328125, "loss": 0.6055, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.10866321623325348, "rewards/margins": 0.17203572392463684, "rewards/rejected": -0.2806989252567291, "step": 2130 }, { "epoch": 0.14, "learning_rate": 4.975696653543425e-06, "logits/chosen": -2.496903419494629, "logits/rejected": -2.6326394081115723, "logps/chosen": -227.9203643798828, "logps/rejected": -305.10430908203125, "loss": 0.627, "rewards/accuracies": 0.75, "rewards/chosen": -0.220001220703125, "rewards/margins": 0.21087773144245148, "rewards/rejected": -0.4308788776397705, "step": 2140 }, { "epoch": 0.14, "learning_rate": 4.974895961565835e-06, "logits/chosen": -3.0033822059631348, "logits/rejected": -2.862435817718506, "logps/chosen": -364.16748046875, "logps/rejected": -307.7515869140625, "loss": 0.6024, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.032728563994169235, "rewards/margins": 0.21506018936634064, "rewards/rejected": -0.24778875708580017, "step": 2150 }, { "epoch": 0.14, "learning_rate": 4.974082359318566e-06, "logits/chosen": -2.738600969314575, "logits/rejected": -2.770749092102051, "logps/chosen": -259.91302490234375, "logps/rejected": -257.9204406738281, "loss": 0.6264, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.08208741247653961, "rewards/margins": 0.2051773965358734, "rewards/rejected": -0.12308996915817261, "step": 2160 }, { "epoch": 0.14, "learning_rate": 4.973255851045769e-06, "logits/chosen": -2.687934398651123, "logits/rejected": -2.558964252471924, "logps/chosen": -245.05209350585938, "logps/rejected": -200.83056640625, "loss": 0.6101, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.13080234825611115, "rewards/margins": 0.2099367082118988, "rewards/rejected": -0.07913436740636826, "step": 2170 }, { "epoch": 0.14, "learning_rate": 4.972416441058915e-06, "logits/chosen": -2.744450330734253, "logits/rejected": -2.5733814239501953, "logps/chosen": -321.6153869628906, "logps/rejected": -291.11920166015625, "loss": 0.6304, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.025879329070448875, "rewards/margins": 0.2006806582212448, "rewards/rejected": -0.1748013198375702, "step": 2180 }, { "epoch": 0.14, "learning_rate": 4.971564133736777e-06, "logits/chosen": -2.665870189666748, "logits/rejected": -2.497673511505127, "logps/chosen": -248.264404296875, "logps/rejected": -253.1470184326172, "loss": 0.6669, "rewards/accuracies": 0.5, "rewards/chosen": -0.0837312713265419, "rewards/margins": 0.012899696826934814, "rewards/rejected": -0.09663096815347672, "step": 2190 }, { "epoch": 0.14, "learning_rate": 4.970698933525409e-06, "logits/chosen": -2.729921340942383, "logits/rejected": -2.6478610038757324, "logps/chosen": -236.1103057861328, "logps/rejected": -298.22674560546875, "loss": 0.6371, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.07094663381576538, "rewards/margins": 0.053258299827575684, "rewards/rejected": 0.017688335850834846, "step": 2200 }, { "epoch": 0.14, "learning_rate": 4.969820844938118e-06, "logits/chosen": -2.7145307064056396, "logits/rejected": -2.6865108013153076, "logps/chosen": -235.56124877929688, "logps/rejected": -294.7950439453125, "loss": 0.6113, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0399596281349659, "rewards/margins": 0.08694467693567276, "rewards/rejected": -0.04698503762483597, "step": 2210 }, { "epoch": 0.15, "learning_rate": 4.968929872555444e-06, "logits/chosen": -2.7173850536346436, "logits/rejected": -2.7645998001098633, "logps/chosen": -331.38848876953125, "logps/rejected": -282.1485595703125, "loss": 0.607, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04040694981813431, "rewards/margins": 0.31585150957107544, "rewards/rejected": -0.27544456720352173, "step": 2220 }, { "epoch": 0.15, "learning_rate": 4.968026021025137e-06, "logits/chosen": -2.807720899581909, "logits/rejected": -2.8572170734405518, "logps/chosen": -297.9932556152344, "logps/rejected": -284.1007385253906, "loss": 0.6067, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08410535752773285, "rewards/margins": 0.12690380215644836, "rewards/rejected": -0.2110091745853424, "step": 2230 }, { "epoch": 0.15, "learning_rate": 4.967109295062128e-06, "logits/chosen": -2.499629497528076, "logits/rejected": -2.497025728225708, "logps/chosen": -266.9873046875, "logps/rejected": -256.3060607910156, "loss": 0.6606, "rewards/accuracies": 0.75, "rewards/chosen": -0.037730611860752106, "rewards/margins": 0.12185688316822052, "rewards/rejected": -0.15958748757839203, "step": 2240 }, { "epoch": 0.15, "learning_rate": 4.966179699448509e-06, "logits/chosen": -2.9855830669403076, "logits/rejected": -2.822563648223877, "logps/chosen": -273.6982727050781, "logps/rejected": -236.4923858642578, "loss": 0.6039, "rewards/accuracies": 0.75, "rewards/chosen": 0.031817689538002014, "rewards/margins": 0.2563541531562805, "rewards/rejected": -0.22453641891479492, "step": 2250 }, { "epoch": 0.15, "learning_rate": 4.965237239033506e-06, "logits/chosen": -2.5667567253112793, "logits/rejected": -2.6416573524475098, "logps/chosen": -274.92852783203125, "logps/rejected": -335.0534362792969, "loss": 0.5869, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.012694315984845161, "rewards/margins": 0.16257108747959137, "rewards/rejected": -0.14987674355506897, "step": 2260 }, { "epoch": 0.15, "learning_rate": 4.964281918733453e-06, "logits/chosen": -2.5804924964904785, "logits/rejected": -2.684441566467285, "logps/chosen": -294.51617431640625, "logps/rejected": -315.2884521484375, "loss": 0.5968, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.06486682593822479, "rewards/margins": 0.2623649835586548, "rewards/rejected": -0.1974981129169464, "step": 2270 }, { "epoch": 0.15, "learning_rate": 4.9633137435317715e-06, "logits/chosen": -2.8421781063079834, "logits/rejected": -2.8614776134490967, "logps/chosen": -352.1127014160156, "logps/rejected": -288.5516357421875, "loss": 0.5829, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.009625107049942017, "rewards/margins": 0.184491366147995, "rewards/rejected": -0.17486627399921417, "step": 2280 }, { "epoch": 0.15, "learning_rate": 4.9623327184789355e-06, "logits/chosen": -2.869119644165039, "logits/rejected": -2.770078182220459, "logps/chosen": -267.7673645019531, "logps/rejected": -217.97592163085938, "loss": 0.5909, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.00918522197753191, "rewards/margins": 0.219018816947937, "rewards/rejected": -0.22820401191711426, "step": 2290 }, { "epoch": 0.15, "learning_rate": 4.9613388486924525e-06, "logits/chosen": -2.757904291152954, "logits/rejected": -2.7098991870880127, "logps/chosen": -266.7463684082031, "logps/rejected": -258.8957824707031, "loss": 0.6369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17312125861644745, "rewards/margins": 0.23959565162658691, "rewards/rejected": -0.4127168655395508, "step": 2300 }, { "epoch": 0.15, "learning_rate": 4.960332139356834e-06, "logits/chosen": -2.615774631500244, "logits/rejected": -2.499512195587158, "logps/chosen": -266.8538818359375, "logps/rejected": -282.8984680175781, "loss": 0.6625, "rewards/accuracies": 0.5, "rewards/chosen": -0.14361433684825897, "rewards/margins": 0.18731355667114258, "rewards/rejected": -0.33092790842056274, "step": 2310 }, { "epoch": 0.15, "learning_rate": 4.95931259572357e-06, "logits/chosen": -2.638256549835205, "logits/rejected": -2.6577975749969482, "logps/chosen": -284.3966979980469, "logps/rejected": -324.7958984375, "loss": 0.5841, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0298948734998703, "rewards/margins": 0.21776501834392548, "rewards/rejected": -0.18787017464637756, "step": 2320 }, { "epoch": 0.15, "learning_rate": 4.9582802231111e-06, "logits/chosen": -2.8069567680358887, "logits/rejected": -2.7043187618255615, "logps/chosen": -285.8518371582031, "logps/rejected": -340.26171875, "loss": 0.6535, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.023144256323575974, "rewards/margins": 0.12189881503582001, "rewards/rejected": -0.14504310488700867, "step": 2330 }, { "epoch": 0.15, "learning_rate": 4.957235026904782e-06, "logits/chosen": -2.7947421073913574, "logits/rejected": -2.693174362182617, "logps/chosen": -343.4107971191406, "logps/rejected": -279.5213623046875, "loss": 0.5414, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.07865247875452042, "rewards/margins": 0.2984381318092346, "rewards/rejected": -0.37709060311317444, "step": 2340 }, { "epoch": 0.15, "learning_rate": 4.956177012556875e-06, "logits/chosen": -2.6740355491638184, "logits/rejected": -2.555637836456299, "logps/chosen": -375.19744873046875, "logps/rejected": -320.8430480957031, "loss": 0.6106, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24865643680095673, "rewards/margins": 0.20838837325572968, "rewards/rejected": -0.45704489946365356, "step": 2350 }, { "epoch": 0.15, "learning_rate": 4.9551061855864976e-06, "logits/chosen": -2.813265800476074, "logits/rejected": -2.802703619003296, "logps/chosen": -361.70770263671875, "logps/rejected": -349.2850341796875, "loss": 0.6683, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2633809745311737, "rewards/margins": 0.19430817663669586, "rewards/rejected": -0.4576891362667084, "step": 2360 }, { "epoch": 0.16, "learning_rate": 4.95402255157961e-06, "logits/chosen": -2.565751552581787, "logits/rejected": -2.5812134742736816, "logps/chosen": -277.73876953125, "logps/rejected": -388.343505859375, "loss": 0.6804, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29647955298423767, "rewards/margins": 0.07615470886230469, "rewards/rejected": -0.37263426184654236, "step": 2370 }, { "epoch": 0.16, "learning_rate": 4.952926116188977e-06, "logits/chosen": -2.7091922760009766, "logits/rejected": -2.7243802547454834, "logps/chosen": -314.4618835449219, "logps/rejected": -292.33929443359375, "loss": 0.6367, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.07366470992565155, "rewards/margins": 0.19427324831485748, "rewards/rejected": -0.26793795824050903, "step": 2380 }, { "epoch": 0.16, "learning_rate": 4.951816885134143e-06, "logits/chosen": -2.9690959453582764, "logits/rejected": -2.8864049911499023, "logps/chosen": -389.62689208984375, "logps/rejected": -310.96044921875, "loss": 0.6694, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13698528707027435, "rewards/margins": 0.010475650429725647, "rewards/rejected": -0.1474609375, "step": 2390 }, { "epoch": 0.16, "learning_rate": 4.950694864201399e-06, "logits/chosen": -2.6521568298339844, "logits/rejected": -2.540308952331543, "logps/chosen": -199.8266143798828, "logps/rejected": -339.84564208984375, "loss": 0.686, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.01984802633523941, "rewards/margins": 0.10776877403259277, "rewards/rejected": -0.1276167929172516, "step": 2400 }, { "epoch": 0.16, "learning_rate": 4.9495600592437575e-06, "logits/chosen": -2.8433144092559814, "logits/rejected": -2.628474235534668, "logps/chosen": -336.7990417480469, "logps/rejected": -364.16583251953125, "loss": 0.6337, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.10713674873113632, "rewards/margins": 0.17330363392829895, "rewards/rejected": -0.06616689264774323, "step": 2410 }, { "epoch": 0.16, "learning_rate": 4.948412476180917e-06, "logits/chosen": -2.846092462539673, "logits/rejected": -2.807753324508667, "logps/chosen": -470.0384826660156, "logps/rejected": -318.007568359375, "loss": 0.563, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.1342833936214447, "rewards/margins": 0.33717745542526245, "rewards/rejected": -0.20289401710033417, "step": 2420 }, { "epoch": 0.16, "learning_rate": 4.947252120999232e-06, "logits/chosen": -2.7561464309692383, "logits/rejected": -2.737532138824463, "logps/chosen": -331.05694580078125, "logps/rejected": -276.02783203125, "loss": 0.5775, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10048478841781616, "rewards/margins": 0.3389813303947449, "rewards/rejected": -0.43946608901023865, "step": 2430 }, { "epoch": 0.16, "learning_rate": 4.946078999751683e-06, "logits/chosen": -2.927323341369629, "logits/rejected": -2.8564233779907227, "logps/chosen": -424.54608154296875, "logps/rejected": -322.99810791015625, "loss": 0.5726, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0023977644741535187, "rewards/margins": 0.43597611784935, "rewards/rejected": -0.4383738934993744, "step": 2440 }, { "epoch": 0.16, "learning_rate": 4.944893118557847e-06, "logits/chosen": -2.6823534965515137, "logits/rejected": -2.632371425628662, "logps/chosen": -273.3263854980469, "logps/rejected": -285.999755859375, "loss": 0.6475, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23725327849388123, "rewards/margins": 0.17456074059009552, "rewards/rejected": -0.41181403398513794, "step": 2450 }, { "epoch": 0.16, "learning_rate": 4.943694483603861e-06, "logits/chosen": -2.4171245098114014, "logits/rejected": -2.4290964603424072, "logps/chosen": -256.521240234375, "logps/rejected": -283.49176025390625, "loss": 0.5532, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.11423642933368683, "rewards/margins": 0.40349873900413513, "rewards/rejected": -0.5177351236343384, "step": 2460 }, { "epoch": 0.16, "learning_rate": 4.9424831011423914e-06, "logits/chosen": -2.8101084232330322, "logits/rejected": -2.8960862159729004, "logps/chosen": -365.55596923828125, "logps/rejected": -305.8233642578125, "loss": 0.61, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.1972278505563736, "rewards/margins": 0.22212381660938263, "rewards/rejected": -0.419351726770401, "step": 2470 }, { "epoch": 0.16, "learning_rate": 4.9412589774926015e-06, "logits/chosen": -2.8390262126922607, "logits/rejected": -2.7306559085845947, "logps/chosen": -395.5912170410156, "logps/rejected": -368.03997802734375, "loss": 0.6008, "rewards/accuracies": 0.75, "rewards/chosen": -0.22051730751991272, "rewards/margins": 0.2272726595401764, "rewards/rejected": -0.44778990745544434, "step": 2480 }, { "epoch": 0.16, "learning_rate": 4.940022119040121e-06, "logits/chosen": -2.8345274925231934, "logits/rejected": -2.4840381145477295, "logps/chosen": -249.48184204101562, "logps/rejected": -277.16351318359375, "loss": 0.611, "rewards/accuracies": 0.75, "rewards/chosen": -0.29384705424308777, "rewards/margins": 0.2915216088294983, "rewards/rejected": -0.5853686332702637, "step": 2490 }, { "epoch": 0.16, "learning_rate": 4.93877253223701e-06, "logits/chosen": -2.30358624458313, "logits/rejected": -2.2991833686828613, "logps/chosen": -239.4513702392578, "logps/rejected": -340.1556396484375, "loss": 0.6116, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2424122840166092, "rewards/margins": 0.2587777376174927, "rewards/rejected": -0.5011900067329407, "step": 2500 }, { "epoch": 0.16, "learning_rate": 4.937510223601725e-06, "logits/chosen": -2.683197498321533, "logits/rejected": -2.6224794387817383, "logps/chosen": -282.93170166015625, "logps/rejected": -275.8567810058594, "loss": 0.5768, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.19845330715179443, "rewards/margins": 0.25473955273628235, "rewards/rejected": -0.453192800283432, "step": 2510 }, { "epoch": 0.16, "learning_rate": 4.936235199719085e-06, "logits/chosen": -2.7041985988616943, "logits/rejected": -2.5971970558166504, "logps/chosen": -331.7781677246094, "logps/rejected": -321.8423767089844, "loss": 0.5466, "rewards/accuracies": 0.75, "rewards/chosen": -0.11719875037670135, "rewards/margins": 0.36858198046684265, "rewards/rejected": -0.4857807159423828, "step": 2520 }, { "epoch": 0.17, "learning_rate": 4.93494746724024e-06, "logits/chosen": -2.744253635406494, "logits/rejected": -2.6150527000427246, "logps/chosen": -371.90411376953125, "logps/rejected": -352.849365234375, "loss": 0.5948, "rewards/accuracies": 0.75, "rewards/chosen": -0.16774104535579681, "rewards/margins": 0.2554989755153656, "rewards/rejected": -0.4232400357723236, "step": 2530 }, { "epoch": 0.17, "learning_rate": 4.933647032882635e-06, "logits/chosen": -2.717115640640259, "logits/rejected": -2.6257479190826416, "logps/chosen": -252.213134765625, "logps/rejected": -235.54183959960938, "loss": 0.617, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20600995421409607, "rewards/margins": 0.17118848860263824, "rewards/rejected": -0.3771984875202179, "step": 2540 }, { "epoch": 0.17, "learning_rate": 4.932333903429969e-06, "logits/chosen": -2.7186102867126465, "logits/rejected": -2.658327579498291, "logps/chosen": -256.15576171875, "logps/rejected": -290.25372314453125, "loss": 0.6289, "rewards/accuracies": 0.75, "rewards/chosen": -0.1984180510044098, "rewards/margins": 0.1289817988872528, "rewards/rejected": -0.327399879693985, "step": 2550 }, { "epoch": 0.17, "learning_rate": 4.931008085732172e-06, "logits/chosen": -2.6936402320861816, "logits/rejected": -2.7055277824401855, "logps/chosen": -248.66830444335938, "logps/rejected": -329.27239990234375, "loss": 0.6649, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35135889053344727, "rewards/margins": 0.08967886865139008, "rewards/rejected": -0.44103774428367615, "step": 2560 }, { "epoch": 0.17, "learning_rate": 4.9296695867053565e-06, "logits/chosen": -2.7393367290496826, "logits/rejected": -2.673619508743286, "logps/chosen": -330.6111145019531, "logps/rejected": -382.0716857910156, "loss": 0.5591, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.056804366409778595, "rewards/margins": 0.4075342118740082, "rewards/rejected": -0.46433860063552856, "step": 2570 }, { "epoch": 0.17, "learning_rate": 4.928318413331791e-06, "logits/chosen": -2.8475029468536377, "logits/rejected": -2.6908457279205322, "logps/chosen": -213.46224975585938, "logps/rejected": -211.10586547851562, "loss": 0.6087, "rewards/accuracies": 0.75, "rewards/chosen": -0.0277628805488348, "rewards/margins": 0.3201954960823059, "rewards/rejected": -0.34795838594436646, "step": 2580 }, { "epoch": 0.17, "learning_rate": 4.926954572659855e-06, "logits/chosen": -2.6036033630371094, "logits/rejected": -2.5377697944641113, "logps/chosen": -282.39874267578125, "logps/rejected": -233.1647186279297, "loss": 0.6071, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04948263615369797, "rewards/margins": 0.3064866364002228, "rewards/rejected": -0.35596925020217896, "step": 2590 }, { "epoch": 0.17, "learning_rate": 4.925578071804013e-06, "logits/chosen": -2.7369303703308105, "logits/rejected": -2.7130982875823975, "logps/chosen": -270.61651611328125, "logps/rejected": -413.401611328125, "loss": 0.5794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21495899558067322, "rewards/margins": 0.24355199933052063, "rewards/rejected": -0.45851102471351624, "step": 2600 }, { "epoch": 0.17, "learning_rate": 4.924188917944763e-06, "logits/chosen": -2.52673077583313, "logits/rejected": -2.575197219848633, "logps/chosen": -222.6197509765625, "logps/rejected": -315.6336975097656, "loss": 0.6478, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.23872074484825134, "rewards/margins": 0.1525263786315918, "rewards/rejected": -0.39124712347984314, "step": 2610 }, { "epoch": 0.17, "learning_rate": 4.922787118328617e-06, "logits/chosen": -2.5672783851623535, "logits/rejected": -2.7630858421325684, "logps/chosen": -235.62557983398438, "logps/rejected": -324.2035827636719, "loss": 0.5879, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.14392860233783722, "rewards/margins": 0.2915584444999695, "rewards/rejected": -0.4354870915412903, "step": 2620 }, { "epoch": 0.17, "learning_rate": 4.921372680268045e-06, "logits/chosen": -2.8274261951446533, "logits/rejected": -2.6772689819335938, "logps/chosen": -379.2863464355469, "logps/rejected": -330.87335205078125, "loss": 0.5386, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.23995617032051086, "rewards/margins": 0.45680126547813416, "rewards/rejected": -0.696757435798645, "step": 2630 }, { "epoch": 0.17, "learning_rate": 4.919945611141451e-06, "logits/chosen": -2.4838151931762695, "logits/rejected": -2.41625714302063, "logps/chosen": -326.9600830078125, "logps/rejected": -331.88836669921875, "loss": 0.6019, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4891494810581207, "rewards/margins": 0.11712410300970078, "rewards/rejected": -0.6062735319137573, "step": 2640 }, { "epoch": 0.17, "learning_rate": 4.918505918393125e-06, "logits/chosen": -2.6945321559906006, "logits/rejected": -2.4596617221832275, "logps/chosen": -285.453857421875, "logps/rejected": -346.5177307128906, "loss": 0.5209, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18411032855510712, "rewards/margins": 0.47116097807884216, "rewards/rejected": -0.6552713513374329, "step": 2650 }, { "epoch": 0.17, "learning_rate": 4.91705360953321e-06, "logits/chosen": -2.7415881156921387, "logits/rejected": -2.708122491836548, "logps/chosen": -288.8190612792969, "logps/rejected": -325.80731201171875, "loss": 0.6134, "rewards/accuracies": 0.75, "rewards/chosen": -0.2988317310810089, "rewards/margins": 0.29410165548324585, "rewards/rejected": -0.5929334163665771, "step": 2660 }, { "epoch": 0.17, "learning_rate": 4.9155886921376615e-06, "logits/chosen": -2.342031955718994, "logits/rejected": -2.5404393672943115, "logps/chosen": -275.90142822265625, "logps/rejected": -270.68988037109375, "loss": 0.565, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34874653816223145, "rewards/margins": 0.1326829195022583, "rewards/rejected": -0.48142942786216736, "step": 2670 }, { "epoch": 0.18, "learning_rate": 4.914111173848205e-06, "logits/chosen": -2.7839322090148926, "logits/rejected": -2.637239694595337, "logps/chosen": -494.39410400390625, "logps/rejected": -440.8687438964844, "loss": 0.5636, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.36821606755256653, "rewards/margins": 0.5051993131637573, "rewards/rejected": -0.873415470123291, "step": 2680 }, { "epoch": 0.18, "learning_rate": 4.9126210623723e-06, "logits/chosen": -2.399345874786377, "logits/rejected": -2.5851237773895264, "logps/chosen": -318.40155029296875, "logps/rejected": -333.19683837890625, "loss": 0.597, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3792705535888672, "rewards/margins": 0.15724647045135498, "rewards/rejected": -0.5365170240402222, "step": 2690 }, { "epoch": 0.18, "learning_rate": 4.911118365483098e-06, "logits/chosen": -2.8516323566436768, "logits/rejected": -2.586472749710083, "logps/chosen": -379.7595520019531, "logps/rejected": -350.9483947753906, "loss": 0.5593, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3597323000431061, "rewards/margins": 0.36504340171813965, "rewards/rejected": -0.7247756719589233, "step": 2700 }, { "epoch": 0.18, "learning_rate": 4.909603091019403e-06, "logits/chosen": -2.684126853942871, "logits/rejected": -2.6924331188201904, "logps/chosen": -364.5967102050781, "logps/rejected": -351.02935791015625, "loss": 0.7345, "rewards/accuracies": 0.5, "rewards/chosen": -0.8244889974594116, "rewards/margins": -0.039155781269073486, "rewards/rejected": -0.7853331565856934, "step": 2710 }, { "epoch": 0.18, "learning_rate": 4.908075246885626e-06, "logits/chosen": -2.609663486480713, "logits/rejected": -2.403352737426758, "logps/chosen": -455.54742431640625, "logps/rejected": -342.30303955078125, "loss": 0.5849, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3363655209541321, "rewards/margins": 0.449228435754776, "rewards/rejected": -0.7855939865112305, "step": 2720 }, { "epoch": 0.18, "learning_rate": 4.906534841051755e-06, "logits/chosen": -2.559765577316284, "logits/rejected": -2.52396559715271, "logps/chosen": -336.14056396484375, "logps/rejected": -396.00921630859375, "loss": 0.5847, "rewards/accuracies": 0.75, "rewards/chosen": -0.4036168158054352, "rewards/margins": 0.4683077335357666, "rewards/rejected": -0.8719245195388794, "step": 2730 }, { "epoch": 0.18, "learning_rate": 4.904981881553297e-06, "logits/chosen": -2.4790966510772705, "logits/rejected": -2.6218953132629395, "logps/chosen": -280.62835693359375, "logps/rejected": -341.94757080078125, "loss": 0.6085, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5170210599899292, "rewards/margins": 0.39688968658447266, "rewards/rejected": -0.9139108657836914, "step": 2740 }, { "epoch": 0.18, "learning_rate": 4.903416376491252e-06, "logits/chosen": -2.4483070373535156, "logits/rejected": -2.463671922683716, "logps/chosen": -291.2413330078125, "logps/rejected": -357.9991760253906, "loss": 0.5845, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6890040636062622, "rewards/margins": 0.3651917278766632, "rewards/rejected": -1.054195761680603, "step": 2750 }, { "epoch": 0.18, "learning_rate": 4.90183833403206e-06, "logits/chosen": -2.2340424060821533, "logits/rejected": -2.195526599884033, "logps/chosen": -341.2486877441406, "logps/rejected": -319.59564208984375, "loss": 0.5644, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7643866539001465, "rewards/margins": 0.27745020389556885, "rewards/rejected": -1.0418369770050049, "step": 2760 }, { "epoch": 0.18, "learning_rate": 4.900247762407564e-06, "logits/chosen": -2.1294519901275635, "logits/rejected": -2.2961814403533936, "logps/chosen": -290.6664733886719, "logps/rejected": -384.6078796386719, "loss": 0.5743, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.493140310049057, "rewards/margins": 0.5134254693984985, "rewards/rejected": -1.0065659284591675, "step": 2770 }, { "epoch": 0.18, "learning_rate": 4.898644669914965e-06, "logits/chosen": -2.4749608039855957, "logits/rejected": -2.49005126953125, "logps/chosen": -377.3028259277344, "logps/rejected": -360.3443603515625, "loss": 0.5785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22494623064994812, "rewards/margins": 0.39548856019973755, "rewards/rejected": -0.6204348206520081, "step": 2780 }, { "epoch": 0.18, "learning_rate": 4.897029064916778e-06, "logits/chosen": -2.4899182319641113, "logits/rejected": -2.4811320304870605, "logps/chosen": -344.0760498046875, "logps/rejected": -380.71453857421875, "loss": 0.5613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36472102999687195, "rewards/margins": 0.4021889269351959, "rewards/rejected": -0.7669100165367126, "step": 2790 }, { "epoch": 0.18, "learning_rate": 4.895400955840791e-06, "logits/chosen": -2.688682794570923, "logits/rejected": -2.5415966510772705, "logps/chosen": -395.62225341796875, "logps/rejected": -340.3146057128906, "loss": 0.606, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3848732113838196, "rewards/margins": 0.2556595504283905, "rewards/rejected": -0.6405327916145325, "step": 2800 }, { "epoch": 0.18, "learning_rate": 4.893760351180018e-06, "logits/chosen": -2.3920750617980957, "logits/rejected": -2.2974207401275635, "logps/chosen": -307.5142822265625, "logps/rejected": -327.1595458984375, "loss": 0.6083, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8331905603408813, "rewards/margins": 0.14832532405853271, "rewards/rejected": -0.9815157651901245, "step": 2810 }, { "epoch": 0.18, "learning_rate": 4.892107259492657e-06, "logits/chosen": -2.618544101715088, "logits/rejected": -2.540144443511963, "logps/chosen": -382.5555114746094, "logps/rejected": -414.6402282714844, "loss": 0.6895, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.7530332207679749, "rewards/margins": 0.08182497322559357, "rewards/rejected": -0.834858238697052, "step": 2820 }, { "epoch": 0.19, "learning_rate": 4.890441689402042e-06, "logits/chosen": -2.2747488021850586, "logits/rejected": -2.4530391693115234, "logps/chosen": -303.0140686035156, "logps/rejected": -342.5342712402344, "loss": 0.567, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6708128452301025, "rewards/margins": 0.3363160490989685, "rewards/rejected": -1.0071289539337158, "step": 2830 }, { "epoch": 0.19, "learning_rate": 4.888763649596606e-06, "logits/chosen": -2.5664830207824707, "logits/rejected": -2.5413601398468018, "logps/chosen": -317.67974853515625, "logps/rejected": -373.93231201171875, "loss": 0.5778, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.509292483329773, "rewards/margins": 0.43065786361694336, "rewards/rejected": -0.9399503469467163, "step": 2840 }, { "epoch": 0.19, "learning_rate": 4.887073148829824e-06, "logits/chosen": -2.659855365753174, "logits/rejected": -2.4314472675323486, "logps/chosen": -371.5045471191406, "logps/rejected": -371.8569030761719, "loss": 0.4925, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6626033782958984, "rewards/margins": 0.57807856798172, "rewards/rejected": -1.2406818866729736, "step": 2850 }, { "epoch": 0.19, "learning_rate": 4.885370195920177e-06, "logits/chosen": -2.265190839767456, "logits/rejected": -2.2952256202697754, "logps/chosen": -221.4605255126953, "logps/rejected": -271.4645690917969, "loss": 0.5525, "rewards/accuracies": 0.75, "rewards/chosen": -0.4074770510196686, "rewards/margins": 0.4455071985721588, "rewards/rejected": -0.8529843091964722, "step": 2860 }, { "epoch": 0.19, "learning_rate": 4.883654799751101e-06, "logits/chosen": -2.5295357704162598, "logits/rejected": -2.604036808013916, "logps/chosen": -251.52352905273438, "logps/rejected": -317.5025939941406, "loss": 0.6774, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5625568628311157, "rewards/margins": 0.09554485976696014, "rewards/rejected": -0.6581017374992371, "step": 2870 }, { "epoch": 0.19, "learning_rate": 4.8819269692709435e-06, "logits/chosen": -2.506788969039917, "logits/rejected": -2.595655679702759, "logps/chosen": -302.5235290527344, "logps/rejected": -373.41839599609375, "loss": 0.5923, "rewards/accuracies": 0.75, "rewards/chosen": -0.6250518560409546, "rewards/margins": 0.585913360118866, "rewards/rejected": -1.2109653949737549, "step": 2880 }, { "epoch": 0.19, "learning_rate": 4.880186713492915e-06, "logits/chosen": -2.668774366378784, "logits/rejected": -2.2550742626190186, "logps/chosen": -292.2339172363281, "logps/rejected": -402.7520446777344, "loss": 0.5938, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5656948685646057, "rewards/margins": 0.446084201335907, "rewards/rejected": -1.0117790699005127, "step": 2890 }, { "epoch": 0.19, "learning_rate": 4.878434041495041e-06, "logits/chosen": -2.6363844871520996, "logits/rejected": -2.402409076690674, "logps/chosen": -326.28057861328125, "logps/rejected": -340.3365783691406, "loss": 0.4879, "rewards/accuracies": 1.0, "rewards/chosen": -0.20777079463005066, "rewards/margins": 0.8683158159255981, "rewards/rejected": -1.0760865211486816, "step": 2900 }, { "epoch": 0.19, "learning_rate": 4.876668962420117e-06, "logits/chosen": -2.0191640853881836, "logits/rejected": -2.336470365524292, "logps/chosen": -216.2163543701172, "logps/rejected": -426.68682861328125, "loss": 0.6081, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.577592134475708, "rewards/margins": 0.34442293643951416, "rewards/rejected": -0.9220150113105774, "step": 2910 }, { "epoch": 0.19, "learning_rate": 4.87489148547566e-06, "logits/chosen": -2.3271188735961914, "logits/rejected": -2.41279673576355, "logps/chosen": -427.1767578125, "logps/rejected": -423.5763244628906, "loss": 0.5848, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.738415539264679, "rewards/margins": 0.5254755020141602, "rewards/rejected": -1.2638909816741943, "step": 2920 }, { "epoch": 0.19, "learning_rate": 4.873101619933862e-06, "logits/chosen": -2.491079330444336, "logits/rejected": -2.249969244003296, "logps/chosen": -496.1459045410156, "logps/rejected": -414.140869140625, "loss": 0.5926, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9692713022232056, "rewards/margins": 0.413583368062973, "rewards/rejected": -1.382854700088501, "step": 2930 }, { "epoch": 0.19, "learning_rate": 4.8712993751315385e-06, "logits/chosen": -2.3056821823120117, "logits/rejected": -2.304752826690674, "logps/chosen": -371.10260009765625, "logps/rejected": -419.9462890625, "loss": 0.5782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.1785022020339966, "rewards/margins": 0.45382365584373474, "rewards/rejected": -1.6323257684707642, "step": 2940 }, { "epoch": 0.19, "learning_rate": 4.869484760470079e-06, "logits/chosen": -2.5141701698303223, "logits/rejected": -2.5063791275024414, "logps/chosen": -392.4580993652344, "logps/rejected": -448.1770935058594, "loss": 0.6687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1139910221099854, "rewards/margins": 0.1485811471939087, "rewards/rejected": -1.2625722885131836, "step": 2950 }, { "epoch": 0.19, "learning_rate": 4.867657785415404e-06, "logits/chosen": -2.4104926586151123, "logits/rejected": -2.4443955421447754, "logps/chosen": -403.44586181640625, "logps/rejected": -444.3780212402344, "loss": 0.5307, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.8458258509635925, "rewards/margins": 0.4578852653503418, "rewards/rejected": -1.303711175918579, "step": 2960 }, { "epoch": 0.19, "learning_rate": 4.865818459497911e-06, "logits/chosen": -2.6232340335845947, "logits/rejected": -2.461763858795166, "logps/chosen": -485.65423583984375, "logps/rejected": -356.5897521972656, "loss": 0.6082, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6778312921524048, "rewards/margins": 0.25565898418426514, "rewards/rejected": -0.9334903955459595, "step": 2970 }, { "epoch": 0.19, "learning_rate": 4.863966792312423e-06, "logits/chosen": -2.198486804962158, "logits/rejected": -2.3604507446289062, "logps/chosen": -354.3186340332031, "logps/rejected": -375.06231689453125, "loss": 0.6062, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.8335941433906555, "rewards/margins": 0.12950456142425537, "rewards/rejected": -0.9630987048149109, "step": 2980 }, { "epoch": 0.2, "learning_rate": 4.862102793518145e-06, "logits/chosen": -2.465055227279663, "logits/rejected": -2.45820951461792, "logps/chosen": -282.09307861328125, "logps/rejected": -328.89666748046875, "loss": 0.5157, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3737873435020447, "rewards/margins": 0.7718119621276855, "rewards/rejected": -1.1455992460250854, "step": 2990 }, { "epoch": 0.2, "learning_rate": 4.8602264728386075e-06, "logits/chosen": -2.371791124343872, "logits/rejected": -2.4465041160583496, "logps/chosen": -405.9463806152344, "logps/rejected": -443.180908203125, "loss": 0.5795, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.43187254667282104, "rewards/margins": 0.49086993932724, "rewards/rejected": -0.922742486000061, "step": 3000 }, { "epoch": 0.2, "eval_logits/chosen": -2.4471633434295654, "eval_logits/rejected": -2.368870735168457, "eval_logps/chosen": -373.0481872558594, "eval_logps/rejected": -392.3198547363281, "eval_loss": 0.5888271331787109, "eval_rewards/accuracies": 0.6830000281333923, "eval_rewards/chosen": -0.7760262489318848, "eval_rewards/margins": 0.3931134045124054, "eval_rewards/rejected": -1.1691396236419678, "eval_runtime": 465.2562, "eval_samples_per_second": 4.299, "eval_steps_per_second": 2.149, "step": 3000 }, { "epoch": 0.2, "learning_rate": 4.858337840061616e-06, "logits/chosen": -2.5099802017211914, "logits/rejected": -2.4437527656555176, "logps/chosen": -273.11676025390625, "logps/rejected": -373.9442443847656, "loss": 0.5426, "rewards/accuracies": 0.75, "rewards/chosen": -0.9518076181411743, "rewards/margins": 0.46014493703842163, "rewards/rejected": -1.4119526147842407, "step": 3010 }, { "epoch": 0.2, "learning_rate": 4.856436905039208e-06, "logits/chosen": -2.579927921295166, "logits/rejected": -2.2081363201141357, "logps/chosen": -420.5865173339844, "logps/rejected": -390.60870361328125, "loss": 0.5135, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6775982975959778, "rewards/margins": 0.4973832070827484, "rewards/rejected": -1.1749814748764038, "step": 3020 }, { "epoch": 0.2, "learning_rate": 4.854523677687588e-06, "logits/chosen": -2.3134303092956543, "logits/rejected": -2.064049243927002, "logps/chosen": -280.912353515625, "logps/rejected": -321.2112731933594, "loss": 0.5114, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49111509323120117, "rewards/margins": 0.5566691160202026, "rewards/rejected": -1.0477842092514038, "step": 3030 }, { "epoch": 0.2, "learning_rate": 4.85259816798709e-06, "logits/chosen": -2.618589401245117, "logits/rejected": -2.6263256072998047, "logps/chosen": -331.26641845703125, "logps/rejected": -321.235595703125, "loss": 0.6071, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.570954442024231, "rewards/margins": 0.10999935865402222, "rewards/rejected": -0.6809538006782532, "step": 3040 }, { "epoch": 0.2, "learning_rate": 4.850660385982114e-06, "logits/chosen": -2.4453115463256836, "logits/rejected": -2.613149881362915, "logps/chosen": -371.54034423828125, "logps/rejected": -444.22314453125, "loss": 0.6007, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5611933469772339, "rewards/margins": 0.351379930973053, "rewards/rejected": -0.9125733375549316, "step": 3050 }, { "epoch": 0.2, "learning_rate": 4.848710341781081e-06, "logits/chosen": -2.22904109954834, "logits/rejected": -2.3576712608337402, "logps/chosen": -308.20159912109375, "logps/rejected": -356.36212158203125, "loss": 0.6788, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49177709221839905, "rewards/margins": 0.14831292629241943, "rewards/rejected": -0.6400899887084961, "step": 3060 }, { "epoch": 0.2, "learning_rate": 4.846748045556377e-06, "logits/chosen": -2.3152804374694824, "logits/rejected": -2.2141315937042236, "logps/chosen": -386.8840026855469, "logps/rejected": -448.82989501953125, "loss": 0.5557, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.44822603464126587, "rewards/margins": 0.6427403092384338, "rewards/rejected": -1.0909662246704102, "step": 3070 }, { "epoch": 0.2, "learning_rate": 4.8447735075442995e-06, "logits/chosen": -2.2687716484069824, "logits/rejected": -2.4406607151031494, "logps/chosen": -229.04373168945312, "logps/rejected": -280.9591979980469, "loss": 0.6253, "rewards/accuracies": 0.75, "rewards/chosen": -0.41208410263061523, "rewards/margins": 0.21082070469856262, "rewards/rejected": -0.6229047775268555, "step": 3080 }, { "epoch": 0.2, "learning_rate": 4.8427867380450075e-06, "logits/chosen": -2.3034188747406006, "logits/rejected": -2.235747814178467, "logps/chosen": -299.91656494140625, "logps/rejected": -340.36090087890625, "loss": 0.5385, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5435574054718018, "rewards/margins": 0.48364514112472534, "rewards/rejected": -1.0272024869918823, "step": 3090 }, { "epoch": 0.2, "learning_rate": 4.840787747422462e-06, "logits/chosen": -2.0936059951782227, "logits/rejected": -2.0867063999176025, "logps/chosen": -317.1687316894531, "logps/rejected": -335.8619079589844, "loss": 0.5503, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3254963755607605, "rewards/margins": 0.6754858493804932, "rewards/rejected": -1.0009822845458984, "step": 3100 }, { "epoch": 0.2, "learning_rate": 4.838776546104378e-06, "logits/chosen": -2.322481155395508, "logits/rejected": -2.3373770713806152, "logps/chosen": -390.653076171875, "logps/rejected": -397.94122314453125, "loss": 0.5695, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6159372925758362, "rewards/margins": 0.4556799530982971, "rewards/rejected": -1.0716171264648438, "step": 3110 }, { "epoch": 0.2, "learning_rate": 4.836753144582168e-06, "logits/chosen": -2.4194235801696777, "logits/rejected": -2.2814254760742188, "logps/chosen": -395.2923278808594, "logps/rejected": -362.1666259765625, "loss": 0.477, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.32465457916259766, "rewards/margins": 0.8579540252685547, "rewards/rejected": -1.1826084852218628, "step": 3120 }, { "epoch": 0.2, "learning_rate": 4.834717553410884e-06, "logits/chosen": -2.35410475730896, "logits/rejected": -2.2035439014434814, "logps/chosen": -339.8991394042969, "logps/rejected": -425.37158203125, "loss": 0.678, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9469770193099976, "rewards/margins": 0.16363593935966492, "rewards/rejected": -1.1106128692626953, "step": 3130 }, { "epoch": 0.21, "learning_rate": 4.832669783209167e-06, "logits/chosen": -2.2493810653686523, "logits/rejected": -2.4438600540161133, "logps/chosen": -356.7527770996094, "logps/rejected": -357.9443054199219, "loss": 0.5837, "rewards/accuracies": 0.75, "rewards/chosen": -0.5280917882919312, "rewards/margins": 0.3985002934932709, "rewards/rejected": -0.9265921711921692, "step": 3140 }, { "epoch": 0.21, "learning_rate": 4.8306098446591895e-06, "logits/chosen": -2.6090426445007324, "logits/rejected": -2.6477925777435303, "logps/chosen": -463.68072509765625, "logps/rejected": -425.4881896972656, "loss": 0.6984, "rewards/accuracies": 0.5, "rewards/chosen": -0.5927342176437378, "rewards/margins": 0.3940940499305725, "rewards/rejected": -0.9868283271789551, "step": 3150 }, { "epoch": 0.21, "learning_rate": 4.828537748506601e-06, "logits/chosen": -2.613419771194458, "logits/rejected": -2.545729875564575, "logps/chosen": -398.8688049316406, "logps/rejected": -460.64581298828125, "loss": 0.6951, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.44156503677368164, "rewards/margins": 0.43986767530441284, "rewards/rejected": -0.8814327120780945, "step": 3160 }, { "epoch": 0.21, "learning_rate": 4.826453505560469e-06, "logits/chosen": -2.530033588409424, "logits/rejected": -2.5991828441619873, "logps/chosen": -325.0137023925781, "logps/rejected": -420.013671875, "loss": 0.5368, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40284332633018494, "rewards/margins": 0.6239348649978638, "rewards/rejected": -1.026778221130371, "step": 3170 }, { "epoch": 0.21, "learning_rate": 4.824357126693226e-06, "logits/chosen": -2.3249502182006836, "logits/rejected": -2.104342460632324, "logps/chosen": -466.13714599609375, "logps/rejected": -325.49383544921875, "loss": 0.4687, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.5315268635749817, "rewards/margins": 0.9683923721313477, "rewards/rejected": -1.4999191761016846, "step": 3180 }, { "epoch": 0.21, "learning_rate": 4.8222486228406105e-06, "logits/chosen": -2.0594241619110107, "logits/rejected": -2.126784324645996, "logps/chosen": -307.40728759765625, "logps/rejected": -328.8841247558594, "loss": 0.4993, "rewards/accuracies": 0.75, "rewards/chosen": -0.6959290504455566, "rewards/margins": 0.5490103363990784, "rewards/rejected": -1.2449394464492798, "step": 3190 }, { "epoch": 0.21, "learning_rate": 4.820128005001612e-06, "logits/chosen": -2.2309165000915527, "logits/rejected": -2.4116392135620117, "logps/chosen": -356.24578857421875, "logps/rejected": -441.033935546875, "loss": 0.6128, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.739788830280304, "rewards/margins": 0.4947684407234192, "rewards/rejected": -1.2345572710037231, "step": 3200 }, { "epoch": 0.21, "learning_rate": 4.817995284238412e-06, "logits/chosen": -2.317817211151123, "logits/rejected": -2.565655469894409, "logps/chosen": -386.046142578125, "logps/rejected": -382.74322509765625, "loss": 0.6374, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.47409671545028687, "rewards/margins": 0.45875558257102966, "rewards/rejected": -0.9328522682189941, "step": 3210 }, { "epoch": 0.21, "learning_rate": 4.815850471676327e-06, "logits/chosen": -2.107598066329956, "logits/rejected": -2.22800874710083, "logps/chosen": -308.1783142089844, "logps/rejected": -380.44512939453125, "loss": 0.6251, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0362741947174072, "rewards/margins": 0.3872363269329071, "rewards/rejected": -1.4235105514526367, "step": 3220 }, { "epoch": 0.21, "learning_rate": 4.813693578503751e-06, "logits/chosen": -2.0429303646087646, "logits/rejected": -2.1980860233306885, "logps/chosen": -394.8866882324219, "logps/rejected": -415.1434020996094, "loss": 0.6315, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9625520706176758, "rewards/margins": 0.28988438844680786, "rewards/rejected": -1.2524363994598389, "step": 3230 }, { "epoch": 0.21, "learning_rate": 4.811524615972093e-06, "logits/chosen": -2.6508214473724365, "logits/rejected": -2.228062391281128, "logps/chosen": -421.9400329589844, "logps/rejected": -352.90460205078125, "loss": 0.5991, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9435746073722839, "rewards/margins": 0.35201746225357056, "rewards/rejected": -1.2955920696258545, "step": 3240 }, { "epoch": 0.21, "learning_rate": 4.809343595395724e-06, "logits/chosen": -2.3315563201904297, "logits/rejected": -2.0697438716888428, "logps/chosen": -385.09320068359375, "logps/rejected": -379.77740478515625, "loss": 0.5256, "rewards/accuracies": 0.75, "rewards/chosen": -0.8119958639144897, "rewards/margins": 0.5800799131393433, "rewards/rejected": -1.3920756578445435, "step": 3250 }, { "epoch": 0.21, "learning_rate": 4.807150528151918e-06, "logits/chosen": -2.42061185836792, "logits/rejected": -2.132824659347534, "logps/chosen": -340.1400146484375, "logps/rejected": -332.33837890625, "loss": 0.5084, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.44665035605430603, "rewards/margins": 0.6681479811668396, "rewards/rejected": -1.1147983074188232, "step": 3260 }, { "epoch": 0.21, "learning_rate": 4.804945425680787e-06, "logits/chosen": -2.6440634727478027, "logits/rejected": -2.209912061691284, "logps/chosen": -351.9530029296875, "logps/rejected": -332.0872802734375, "loss": 0.4822, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35653167963027954, "rewards/margins": 0.6620830297470093, "rewards/rejected": -1.0186147689819336, "step": 3270 }, { "epoch": 0.21, "learning_rate": 4.802728299485225e-06, "logits/chosen": -2.2627944946289062, "logits/rejected": -2.4057581424713135, "logps/chosen": -400.33074951171875, "logps/rejected": -461.861572265625, "loss": 0.6489, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7264330983161926, "rewards/margins": 0.17443110048770905, "rewards/rejected": -0.9008641242980957, "step": 3280 }, { "epoch": 0.22, "learning_rate": 4.8004991611308495e-06, "logits/chosen": -2.164400100708008, "logits/rejected": -2.3146960735321045, "logps/chosen": -367.74267578125, "logps/rejected": -397.4012145996094, "loss": 0.6908, "rewards/accuracies": 0.5, "rewards/chosen": -0.5942524671554565, "rewards/margins": 0.31151682138442993, "rewards/rejected": -0.9057693481445312, "step": 3290 }, { "epoch": 0.22, "learning_rate": 4.798258022245937e-06, "logits/chosen": -2.2937357425689697, "logits/rejected": -2.0109310150146484, "logps/chosen": -253.11441040039062, "logps/rejected": -310.01715087890625, "loss": 0.4082, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.210373193025589, "rewards/margins": 0.7842254638671875, "rewards/rejected": -0.9945986866950989, "step": 3300 }, { "epoch": 0.22, "learning_rate": 4.796004894521365e-06, "logits/chosen": -2.49446964263916, "logits/rejected": -2.3719279766082764, "logps/chosen": -446.8296813964844, "logps/rejected": -426.35284423828125, "loss": 0.5862, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5281698107719421, "rewards/margins": 0.3869725167751312, "rewards/rejected": -0.9151424169540405, "step": 3310 }, { "epoch": 0.22, "learning_rate": 4.7937397897105545e-06, "logits/chosen": -2.5837979316711426, "logits/rejected": -2.4648940563201904, "logps/chosen": -295.7978515625, "logps/rejected": -295.44000244140625, "loss": 0.5636, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3310592472553253, "rewards/margins": 0.5538357496261597, "rewards/rejected": -0.8848949670791626, "step": 3320 }, { "epoch": 0.22, "learning_rate": 4.791462719629399e-06, "logits/chosen": -2.5264270305633545, "logits/rejected": -2.589407444000244, "logps/chosen": -407.0428771972656, "logps/rejected": -364.3183898925781, "loss": 0.6217, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4815065264701843, "rewards/margins": 0.15432050824165344, "rewards/rejected": -0.6358270645141602, "step": 3330 }, { "epoch": 0.22, "learning_rate": 4.789173696156212e-06, "logits/chosen": -2.4700942039489746, "logits/rejected": -2.3618836402893066, "logps/chosen": -350.07818603515625, "logps/rejected": -345.74725341796875, "loss": 0.6073, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7455496788024902, "rewards/margins": 0.35116735100746155, "rewards/rejected": -1.096717119216919, "step": 3340 }, { "epoch": 0.22, "learning_rate": 4.786872731231662e-06, "logits/chosen": -2.548314094543457, "logits/rejected": -2.4918599128723145, "logps/chosen": -446.01544189453125, "logps/rejected": -495.8675842285156, "loss": 0.5932, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17381136119365692, "rewards/margins": 0.887151837348938, "rewards/rejected": -1.0609632730484009, "step": 3350 }, { "epoch": 0.22, "learning_rate": 4.784559836858709e-06, "logits/chosen": -2.56068754196167, "logits/rejected": -2.5343821048736572, "logps/chosen": -420.57012939453125, "logps/rejected": -417.384765625, "loss": 0.5484, "rewards/accuracies": 0.75, "rewards/chosen": -0.4359206557273865, "rewards/margins": 0.4395936131477356, "rewards/rejected": -0.8755143284797668, "step": 3360 }, { "epoch": 0.22, "learning_rate": 4.782235025102542e-06, "logits/chosen": -2.3371522426605225, "logits/rejected": -2.6841964721679688, "logps/chosen": -430.0653381347656, "logps/rejected": -399.28131103515625, "loss": 0.5836, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3325575590133667, "rewards/margins": 0.5200690031051636, "rewards/rejected": -0.8526265025138855, "step": 3370 }, { "epoch": 0.22, "learning_rate": 4.779898308090519e-06, "logits/chosen": -2.445465564727783, "logits/rejected": -2.5201573371887207, "logps/chosen": -459.0187072753906, "logps/rejected": -477.300537109375, "loss": 0.5541, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5303617715835571, "rewards/margins": 0.4567131996154785, "rewards/rejected": -0.9870750308036804, "step": 3380 }, { "epoch": 0.22, "learning_rate": 4.777549698012101e-06, "logits/chosen": -2.197383403778076, "logits/rejected": -2.2855629920959473, "logps/chosen": -308.02545166015625, "logps/rejected": -471.09454345703125, "loss": 0.4765, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5677157044410706, "rewards/margins": 1.0973682403564453, "rewards/rejected": -1.6650841236114502, "step": 3390 }, { "epoch": 0.22, "learning_rate": 4.775189207118787e-06, "logits/chosen": -2.546276330947876, "logits/rejected": -2.33321475982666, "logps/chosen": -483.5270080566406, "logps/rejected": -526.8450927734375, "loss": 0.5009, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.328021377325058, "rewards/margins": 0.9916882514953613, "rewards/rejected": -1.3197096586227417, "step": 3400 }, { "epoch": 0.22, "learning_rate": 4.772816847724054e-06, "logits/chosen": -2.4170174598693848, "logits/rejected": -2.4794392585754395, "logps/chosen": -567.5896606445312, "logps/rejected": -457.197021484375, "loss": 0.6967, "rewards/accuracies": 0.75, "rewards/chosen": -0.722898006439209, "rewards/margins": 0.5282581448554993, "rewards/rejected": -1.251156210899353, "step": 3410 }, { "epoch": 0.22, "learning_rate": 4.770432632203294e-06, "logits/chosen": -2.2665069103240967, "logits/rejected": -2.404815673828125, "logps/chosen": -353.43902587890625, "logps/rejected": -422.36627197265625, "loss": 0.4706, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7308656573295593, "rewards/margins": 0.8495320081710815, "rewards/rejected": -1.5803978443145752, "step": 3420 }, { "epoch": 0.22, "learning_rate": 4.768036572993738e-06, "logits/chosen": -2.409658670425415, "logits/rejected": -2.3566925525665283, "logps/chosen": -419.1898498535156, "logps/rejected": -444.04180908203125, "loss": 0.5349, "rewards/accuracies": 0.75, "rewards/chosen": -1.0371206998825073, "rewards/margins": 0.6133378744125366, "rewards/rejected": -1.6504586935043335, "step": 3430 }, { "epoch": 0.23, "learning_rate": 4.765628682594409e-06, "logits/chosen": -2.053723096847534, "logits/rejected": -1.9276959896087646, "logps/chosen": -300.51214599609375, "logps/rejected": -445.822021484375, "loss": 0.4754, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.40936318039894104, "rewards/margins": 0.7937641143798828, "rewards/rejected": -1.2031272649765015, "step": 3440 }, { "epoch": 0.23, "learning_rate": 4.763208973566041e-06, "logits/chosen": -2.242048740386963, "logits/rejected": -2.1858267784118652, "logps/chosen": -421.215087890625, "logps/rejected": -487.296142578125, "loss": 0.5922, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6550520658493042, "rewards/margins": 0.769637942314148, "rewards/rejected": -1.4246900081634521, "step": 3450 }, { "epoch": 0.23, "learning_rate": 4.76077745853102e-06, "logits/chosen": -2.4033076763153076, "logits/rejected": -2.1319174766540527, "logps/chosen": -442.29925537109375, "logps/rejected": -400.6534423828125, "loss": 0.5929, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0011038780212402, "rewards/margins": 0.5951480269432068, "rewards/rejected": -1.5962518453598022, "step": 3460 }, { "epoch": 0.23, "learning_rate": 4.758334150173322e-06, "logits/chosen": -1.8501081466674805, "logits/rejected": -1.8074462413787842, "logps/chosen": -341.0419006347656, "logps/rejected": -442.912353515625, "loss": 0.4994, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8858741521835327, "rewards/margins": 0.6651721000671387, "rewards/rejected": -1.5510464906692505, "step": 3470 }, { "epoch": 0.23, "learning_rate": 4.755879061238439e-06, "logits/chosen": -2.0849146842956543, "logits/rejected": -1.8035848140716553, "logps/chosen": -452.26275634765625, "logps/rejected": -470.74822998046875, "loss": 0.5805, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1448858976364136, "rewards/margins": 0.30576300621032715, "rewards/rejected": -1.4506490230560303, "step": 3480 }, { "epoch": 0.23, "learning_rate": 4.753412204533317e-06, "logits/chosen": -1.9051125049591064, "logits/rejected": -1.9653847217559814, "logps/chosen": -321.68829345703125, "logps/rejected": -493.9693908691406, "loss": 0.6153, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5572867393493652, "rewards/margins": 0.18797598779201508, "rewards/rejected": -1.745262861251831, "step": 3490 }, { "epoch": 0.23, "learning_rate": 4.750933592926292e-06, "logits/chosen": -2.239838123321533, "logits/rejected": -2.150252342224121, "logps/chosen": -430.62799072265625, "logps/rejected": -448.97747802734375, "loss": 0.6151, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.0551931858062744, "rewards/margins": 0.3509371280670166, "rewards/rejected": -1.4061301946640015, "step": 3500 }, { "epoch": 0.23, "learning_rate": 4.7484432393470124e-06, "logits/chosen": -2.284513235092163, "logits/rejected": -2.1681177616119385, "logps/chosen": -414.47314453125, "logps/rejected": -401.70819091796875, "loss": 0.5902, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0691416263580322, "rewards/margins": 0.7813543081283569, "rewards/rejected": -1.8504959344863892, "step": 3510 }, { "epoch": 0.23, "learning_rate": 4.745941156786385e-06, "logits/chosen": -2.242554187774658, "logits/rejected": -1.9078760147094727, "logps/chosen": -298.891845703125, "logps/rejected": -330.5885009765625, "loss": 0.6425, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.9184421300888062, "rewards/margins": 0.2459874153137207, "rewards/rejected": -1.164429783821106, "step": 3520 }, { "epoch": 0.23, "learning_rate": 4.743427358296497e-06, "logits/chosen": -2.1518125534057617, "logits/rejected": -1.8860008716583252, "logps/chosen": -389.6554870605469, "logps/rejected": -460.02960205078125, "loss": 0.4746, "rewards/accuracies": 0.75, "rewards/chosen": -0.7955155372619629, "rewards/margins": 0.6738079190254211, "rewards/rejected": -1.4693235158920288, "step": 3530 }, { "epoch": 0.23, "learning_rate": 4.740901856990553e-06, "logits/chosen": -2.2190632820129395, "logits/rejected": -2.1639111042022705, "logps/chosen": -367.6224060058594, "logps/rejected": -394.49090576171875, "loss": 0.7616, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.097391128540039, "rewards/margins": 0.1312669813632965, "rewards/rejected": -1.2286580801010132, "step": 3540 }, { "epoch": 0.23, "learning_rate": 4.738364666042804e-06, "logits/chosen": -2.3005948066711426, "logits/rejected": -1.7008392810821533, "logps/chosen": -389.313232421875, "logps/rejected": -368.5771484375, "loss": 0.6126, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.3324612379074097, "rewards/margins": 0.3842897117137909, "rewards/rejected": -1.7167507410049438, "step": 3550 }, { "epoch": 0.23, "learning_rate": 4.735815798688483e-06, "logits/chosen": -1.9981590509414673, "logits/rejected": -2.083627462387085, "logps/chosen": -366.39874267578125, "logps/rejected": -425.00469970703125, "loss": 0.4667, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2873318195343018, "rewards/margins": 0.6296716332435608, "rewards/rejected": -1.9170036315917969, "step": 3560 }, { "epoch": 0.23, "learning_rate": 4.7332552682237285e-06, "logits/chosen": -2.1684536933898926, "logits/rejected": -2.210479497909546, "logps/chosen": -428.04638671875, "logps/rejected": -458.6451110839844, "loss": 0.5565, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.248167872428894, "rewards/margins": 0.5324001312255859, "rewards/rejected": -1.7805678844451904, "step": 3570 }, { "epoch": 0.23, "learning_rate": 4.7306830880055234e-06, "logits/chosen": -2.2435035705566406, "logits/rejected": -2.0245437622070312, "logps/chosen": -425.3526306152344, "logps/rejected": -438.34307861328125, "loss": 0.4326, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29618892073631287, "rewards/margins": 1.0744205713272095, "rewards/rejected": -1.3706094026565552, "step": 3580 }, { "epoch": 0.23, "learning_rate": 4.728099271451619e-06, "logits/chosen": -2.253227710723877, "logits/rejected": -2.200775623321533, "logps/chosen": -394.0977478027344, "logps/rejected": -383.7187805175781, "loss": 0.6185, "rewards/accuracies": 0.75, "rewards/chosen": -0.43818607926368713, "rewards/margins": 0.6707245707511902, "rewards/rejected": -1.1089107990264893, "step": 3590 }, { "epoch": 0.24, "learning_rate": 4.725503832040466e-06, "logits/chosen": -2.3103957176208496, "logits/rejected": -2.211575984954834, "logps/chosen": -375.616943359375, "logps/rejected": -480.480224609375, "loss": 0.6166, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.8503471612930298, "rewards/margins": 0.2555815577507019, "rewards/rejected": -1.1059287786483765, "step": 3600 }, { "epoch": 0.24, "learning_rate": 4.722896783311152e-06, "logits/chosen": -2.03578519821167, "logits/rejected": -2.1414883136749268, "logps/chosen": -342.0790100097656, "logps/rejected": -416.62762451171875, "loss": 0.6199, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6153795123100281, "rewards/margins": 1.017171025276184, "rewards/rejected": -1.632550597190857, "step": 3610 }, { "epoch": 0.24, "learning_rate": 4.720278138863318e-06, "logits/chosen": -2.362046718597412, "logits/rejected": -1.7298154830932617, "logps/chosen": -337.70806884765625, "logps/rejected": -404.6884765625, "loss": 0.461, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8324246406555176, "rewards/margins": 1.3992846012115479, "rewards/rejected": -2.2317090034484863, "step": 3620 }, { "epoch": 0.24, "learning_rate": 4.717647912357095e-06, "logits/chosen": -2.1297411918640137, "logits/rejected": -2.0725905895233154, "logps/chosen": -417.9518127441406, "logps/rejected": -386.58245849609375, "loss": 0.5315, "rewards/accuracies": 0.75, "rewards/chosen": -0.5126937627792358, "rewards/margins": 0.6966745853424072, "rewards/rejected": -1.209368348121643, "step": 3630 }, { "epoch": 0.24, "learning_rate": 4.715006117513035e-06, "logits/chosen": -2.3161420822143555, "logits/rejected": -2.2902309894561768, "logps/chosen": -437.8321838378906, "logps/rejected": -506.87744140625, "loss": 0.5374, "rewards/accuracies": 0.75, "rewards/chosen": -0.5351322293281555, "rewards/margins": 0.9479573965072632, "rewards/rejected": -1.4830896854400635, "step": 3640 }, { "epoch": 0.24, "learning_rate": 4.7123527681120326e-06, "logits/chosen": -2.1307809352874756, "logits/rejected": -2.225717782974243, "logps/chosen": -424.44268798828125, "logps/rejected": -457.6105041503906, "loss": 0.5055, "rewards/accuracies": 0.75, "rewards/chosen": -0.948150634765625, "rewards/margins": 0.9346704483032227, "rewards/rejected": -1.8828208446502686, "step": 3650 }, { "epoch": 0.24, "learning_rate": 4.7096878779952594e-06, "logits/chosen": -2.139453887939453, "logits/rejected": -2.0068986415863037, "logps/chosen": -498.299072265625, "logps/rejected": -551.0009155273438, "loss": 0.687, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0130908489227295, "rewards/margins": 0.6879249811172485, "rewards/rejected": -1.701015830039978, "step": 3660 }, { "epoch": 0.24, "learning_rate": 4.707011461064086e-06, "logits/chosen": -2.2592787742614746, "logits/rejected": -2.0789124965667725, "logps/chosen": -453.88763427734375, "logps/rejected": -462.21533203125, "loss": 0.6584, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.4273847341537476, "rewards/margins": 0.19941839575767517, "rewards/rejected": -1.6268031597137451, "step": 3670 }, { "epoch": 0.24, "learning_rate": 4.704323531280016e-06, "logits/chosen": -2.475243330001831, "logits/rejected": -2.24717378616333, "logps/chosen": -546.4675903320312, "logps/rejected": -525.025390625, "loss": 0.5654, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.911401093006134, "rewards/margins": 0.4049322009086609, "rewards/rejected": -1.3163334131240845, "step": 3680 }, { "epoch": 0.24, "learning_rate": 4.701624102664606e-06, "logits/chosen": -2.4433531761169434, "logits/rejected": -2.279224395751953, "logps/chosen": -578.2789916992188, "logps/rejected": -490.89068603515625, "loss": 0.5225, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0811493396759033, "rewards/margins": 0.6341270804405212, "rewards/rejected": -1.7152763605117798, "step": 3690 }, { "epoch": 0.24, "learning_rate": 4.698913189299399e-06, "logits/chosen": -1.9927383661270142, "logits/rejected": -1.8205230236053467, "logps/chosen": -437.2293395996094, "logps/rejected": -482.68701171875, "loss": 0.4716, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2083475589752197, "rewards/margins": 1.060872197151184, "rewards/rejected": -2.2692196369171143, "step": 3700 }, { "epoch": 0.24, "learning_rate": 4.696190805325847e-06, "logits/chosen": -2.0010743141174316, "logits/rejected": -2.033720016479492, "logps/chosen": -365.48394775390625, "logps/rejected": -443.25347900390625, "loss": 0.5951, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3819520473480225, "rewards/margins": 0.6150561571121216, "rewards/rejected": -1.9970080852508545, "step": 3710 }, { "epoch": 0.24, "learning_rate": 4.693456964945239e-06, "logits/chosen": -2.0450572967529297, "logits/rejected": -2.0645382404327393, "logps/chosen": -420.50335693359375, "logps/rejected": -503.34454345703125, "loss": 0.534, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9044291377067566, "rewards/margins": 0.6475400924682617, "rewards/rejected": -1.551969289779663, "step": 3720 }, { "epoch": 0.24, "learning_rate": 4.6907116824186245e-06, "logits/chosen": -2.178130626678467, "logits/rejected": -2.186275005340576, "logps/chosen": -364.72845458984375, "logps/rejected": -462.02618408203125, "loss": 0.5147, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6570572257041931, "rewards/margins": 0.6763592958450317, "rewards/rejected": -1.3334165811538696, "step": 3730 }, { "epoch": 0.24, "learning_rate": 4.687954972066742e-06, "logits/chosen": -2.0935654640197754, "logits/rejected": -1.9112679958343506, "logps/chosen": -394.4638977050781, "logps/rejected": -397.63812255859375, "loss": 0.6238, "rewards/accuracies": 0.5, "rewards/chosen": -0.7246359586715698, "rewards/margins": 0.2681662440299988, "rewards/rejected": -0.9928021430969238, "step": 3740 }, { "epoch": 0.25, "learning_rate": 4.685186848269944e-06, "logits/chosen": -1.9995677471160889, "logits/rejected": -1.9223219156265259, "logps/chosen": -397.94915771484375, "logps/rejected": -484.4215393066406, "loss": 0.4696, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6363323330879211, "rewards/margins": 1.0407795906066895, "rewards/rejected": -1.6771119832992554, "step": 3750 }, { "epoch": 0.25, "learning_rate": 4.682407325468119e-06, "logits/chosen": -2.4718260765075684, "logits/rejected": -2.0210213661193848, "logps/chosen": -443.21990966796875, "logps/rejected": -384.62713623046875, "loss": 0.7026, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.257415771484375, "rewards/margins": 0.09741837531328201, "rewards/rejected": -1.3548343181610107, "step": 3760 }, { "epoch": 0.25, "learning_rate": 4.67961641816062e-06, "logits/chosen": -2.411287784576416, "logits/rejected": -2.1336147785186768, "logps/chosen": -371.8212585449219, "logps/rejected": -365.04302978515625, "loss": 0.6053, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.4813748896121979, "rewards/margins": 0.3684390187263489, "rewards/rejected": -0.8498139381408691, "step": 3770 }, { "epoch": 0.25, "learning_rate": 4.676814140906188e-06, "logits/chosen": -2.2237911224365234, "logits/rejected": -1.9375216960906982, "logps/chosen": -399.607666015625, "logps/rejected": -406.94879150390625, "loss": 0.4214, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.43446213006973267, "rewards/margins": 1.0578770637512207, "rewards/rejected": -1.4923391342163086, "step": 3780 }, { "epoch": 0.25, "learning_rate": 4.674000508322872e-06, "logits/chosen": -2.434276580810547, "logits/rejected": -2.393608570098877, "logps/chosen": -457.020263671875, "logps/rejected": -479.9658203125, "loss": 0.54, "rewards/accuracies": 0.75, "rewards/chosen": -0.4180832505226135, "rewards/margins": 0.8254992365837097, "rewards/rejected": -1.2435824871063232, "step": 3790 }, { "epoch": 0.25, "learning_rate": 4.671175535087959e-06, "logits/chosen": -2.320284605026245, "logits/rejected": -1.8357927799224854, "logps/chosen": -394.5904846191406, "logps/rejected": -348.63140869140625, "loss": 0.6604, "rewards/accuracies": 0.75, "rewards/chosen": -0.7477728724479675, "rewards/margins": 0.5096405744552612, "rewards/rejected": -1.2574135065078735, "step": 3800 }, { "epoch": 0.25, "learning_rate": 4.6683392359378924e-06, "logits/chosen": -2.4195501804351807, "logits/rejected": -2.113314151763916, "logps/chosen": -315.3486633300781, "logps/rejected": -385.1060485839844, "loss": 0.6387, "rewards/accuracies": 0.5, "rewards/chosen": -0.5793839693069458, "rewards/margins": 0.2212570160627365, "rewards/rejected": -0.8006409406661987, "step": 3810 }, { "epoch": 0.25, "learning_rate": 4.665491625668198e-06, "logits/chosen": -2.0303101539611816, "logits/rejected": -1.8734495639801025, "logps/chosen": -309.8327941894531, "logps/rejected": -332.2201232910156, "loss": 0.5457, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.7786539793014526, "rewards/margins": 0.4457494616508484, "rewards/rejected": -1.2244032621383667, "step": 3820 }, { "epoch": 0.25, "learning_rate": 4.662632719133407e-06, "logits/chosen": -2.1171882152557373, "logits/rejected": -1.7437130212783813, "logps/chosen": -300.73651123046875, "logps/rejected": -407.58721923828125, "loss": 0.6046, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.04213285446167, "rewards/margins": 0.2609824240207672, "rewards/rejected": -1.3031153678894043, "step": 3830 }, { "epoch": 0.25, "learning_rate": 4.659762531246974e-06, "logits/chosen": -2.126164197921753, "logits/rejected": -2.0261282920837402, "logps/chosen": -279.12664794921875, "logps/rejected": -399.71173095703125, "loss": 0.5652, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.49107131361961365, "rewards/margins": 0.6926654577255249, "rewards/rejected": -1.1837369203567505, "step": 3840 }, { "epoch": 0.25, "learning_rate": 4.656881076981207e-06, "logits/chosen": -2.259749174118042, "logits/rejected": -2.4762749671936035, "logps/chosen": -387.292236328125, "logps/rejected": -439.84100341796875, "loss": 0.5908, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5123671293258667, "rewards/margins": 0.43006739020347595, "rewards/rejected": -0.9424344897270203, "step": 3850 }, { "epoch": 0.25, "learning_rate": 4.653988371367183e-06, "logits/chosen": -2.232792854309082, "logits/rejected": -1.897709608078003, "logps/chosen": -378.61553955078125, "logps/rejected": -530.4464721679688, "loss": 0.5279, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6617268323898315, "rewards/margins": 0.752768874168396, "rewards/rejected": -1.4144957065582275, "step": 3860 }, { "epoch": 0.25, "learning_rate": 4.651084429494671e-06, "logits/chosen": -2.2239532470703125, "logits/rejected": -1.796750783920288, "logps/chosen": -314.5279235839844, "logps/rejected": -361.30572509765625, "loss": 0.5363, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9054681658744812, "rewards/margins": 0.5881915092468262, "rewards/rejected": -1.4936596155166626, "step": 3870 }, { "epoch": 0.25, "learning_rate": 4.648169266512053e-06, "logits/chosen": -2.2404286861419678, "logits/rejected": -2.038695812225342, "logps/chosen": -469.52740478515625, "logps/rejected": -552.247802734375, "loss": 0.6447, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0587666034698486, "rewards/margins": 0.5267407298088074, "rewards/rejected": -1.5855072736740112, "step": 3880 }, { "epoch": 0.25, "learning_rate": 4.6452428976262505e-06, "logits/chosen": -2.192521572113037, "logits/rejected": -2.2139482498168945, "logps/chosen": -397.2904968261719, "logps/rejected": -423.5071716308594, "loss": 0.5414, "rewards/accuracies": 0.75, "rewards/chosen": -0.8510065078735352, "rewards/margins": 0.9575133323669434, "rewards/rejected": -1.808519721031189, "step": 3890 }, { "epoch": 0.26, "learning_rate": 4.642305338102633e-06, "logits/chosen": -2.1184463500976562, "logits/rejected": -2.1783528327941895, "logps/chosen": -439.29248046875, "logps/rejected": -539.232666015625, "loss": 0.5006, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8429673910140991, "rewards/margins": 1.2577102184295654, "rewards/rejected": -2.100677490234375, "step": 3900 }, { "epoch": 0.26, "learning_rate": 4.639356603264953e-06, "logits/chosen": -2.2874608039855957, "logits/rejected": -2.227522373199463, "logps/chosen": -382.64434814453125, "logps/rejected": -534.5256958007812, "loss": 0.5064, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9097906351089478, "rewards/margins": 1.0155916213989258, "rewards/rejected": -1.9253822565078735, "step": 3910 }, { "epoch": 0.26, "learning_rate": 4.636396708495255e-06, "logits/chosen": -1.881720781326294, "logits/rejected": -2.0848631858825684, "logps/chosen": -413.80279541015625, "logps/rejected": -475.28692626953125, "loss": 0.6035, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.131103754043579, "rewards/margins": 0.6518651247024536, "rewards/rejected": -1.7829687595367432, "step": 3920 }, { "epoch": 0.26, "learning_rate": 4.633425669233799e-06, "logits/chosen": -2.0936412811279297, "logits/rejected": -2.0829267501831055, "logps/chosen": -506.6336975097656, "logps/rejected": -541.3096923828125, "loss": 0.5988, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5224252939224243, "rewards/margins": 0.4728141725063324, "rewards/rejected": -1.995239496231079, "step": 3930 }, { "epoch": 0.26, "learning_rate": 4.6304435009789825e-06, "logits/chosen": -2.333017110824585, "logits/rejected": -2.3485379219055176, "logps/chosen": -378.2872619628906, "logps/rejected": -490.08038330078125, "loss": 0.5252, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.9699506759643555, "rewards/margins": 0.65302973985672, "rewards/rejected": -1.6229804754257202, "step": 3940 }, { "epoch": 0.26, "learning_rate": 4.627450219287256e-06, "logits/chosen": -2.0185725688934326, "logits/rejected": -1.6690444946289062, "logps/chosen": -388.4188232421875, "logps/rejected": -435.01483154296875, "loss": 0.6317, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3685195446014404, "rewards/margins": 0.7547028064727783, "rewards/rejected": -2.123222589492798, "step": 3950 }, { "epoch": 0.26, "learning_rate": 4.624445839773042e-06, "logits/chosen": -1.7887862920761108, "logits/rejected": -1.7998504638671875, "logps/chosen": -403.81365966796875, "logps/rejected": -547.1272583007812, "loss": 0.482, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2239279747009277, "rewards/margins": 1.0462610721588135, "rewards/rejected": -2.270189046859741, "step": 3960 }, { "epoch": 0.26, "learning_rate": 4.621430378108656e-06, "logits/chosen": -2.050891637802124, "logits/rejected": -1.8938401937484741, "logps/chosen": -389.00299072265625, "logps/rejected": -550.5980224609375, "loss": 0.5963, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2409727573394775, "rewards/margins": 0.8711128234863281, "rewards/rejected": -2.1120858192443848, "step": 3970 }, { "epoch": 0.26, "learning_rate": 4.618403850024223e-06, "logits/chosen": -2.2062628269195557, "logits/rejected": -2.1440682411193848, "logps/chosen": -441.54205322265625, "logps/rejected": -556.6419067382812, "loss": 0.5051, "rewards/accuracies": 0.75, "rewards/chosen": -1.2554422616958618, "rewards/margins": 0.930446982383728, "rewards/rejected": -2.18588924407959, "step": 3980 }, { "epoch": 0.26, "learning_rate": 4.615366271307598e-06, "logits/chosen": -2.1400365829467773, "logits/rejected": -1.9636625051498413, "logps/chosen": -427.9474182128906, "logps/rejected": -428.34844970703125, "loss": 0.5877, "rewards/accuracies": 0.5, "rewards/chosen": -1.8203452825546265, "rewards/margins": 0.3573678433895111, "rewards/rejected": -2.177712917327881, "step": 3990 }, { "epoch": 0.26, "learning_rate": 4.612317657804277e-06, "logits/chosen": -1.7337572574615479, "logits/rejected": -1.5541400909423828, "logps/chosen": -336.13958740234375, "logps/rejected": -405.3411560058594, "loss": 0.5326, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.7155799865722656, "rewards/margins": 0.8053289651870728, "rewards/rejected": -2.520909070968628, "step": 4000 }, { "epoch": 0.26, "learning_rate": 4.6092580254173236e-06, "logits/chosen": -2.1133389472961426, "logits/rejected": -2.1598751544952393, "logps/chosen": -502.5541076660156, "logps/rejected": -561.4671630859375, "loss": 0.622, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6341140270233154, "rewards/margins": 0.7851090431213379, "rewards/rejected": -2.4192230701446533, "step": 4010 }, { "epoch": 0.26, "learning_rate": 4.606187390107277e-06, "logits/chosen": -1.842321753501892, "logits/rejected": -1.8627601861953735, "logps/chosen": -441.71307373046875, "logps/rejected": -457.519775390625, "loss": 0.6325, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.5453346967697144, "rewards/margins": 0.3694360852241516, "rewards/rejected": -1.9147708415985107, "step": 4020 }, { "epoch": 0.26, "learning_rate": 4.603105767892077e-06, "logits/chosen": -2.073103427886963, "logits/rejected": -2.0986146926879883, "logps/chosen": -459.52081298828125, "logps/rejected": -571.8239135742188, "loss": 0.5492, "rewards/accuracies": 0.75, "rewards/chosen": -1.6996116638183594, "rewards/margins": 0.6912637948989868, "rewards/rejected": -2.3908753395080566, "step": 4030 }, { "epoch": 0.26, "learning_rate": 4.6000131748469725e-06, "logits/chosen": -2.240130662918091, "logits/rejected": -2.3786003589630127, "logps/chosen": -482.5987854003906, "logps/rejected": -516.04931640625, "loss": 0.434, "rewards/accuracies": 0.75, "rewards/chosen": -0.6731864213943481, "rewards/margins": 0.9360666275024414, "rewards/rejected": -1.6092529296875, "step": 4040 }, { "epoch": 0.26, "learning_rate": 4.596909627104445e-06, "logits/chosen": -2.0946714878082275, "logits/rejected": -1.9713408946990967, "logps/chosen": -421.6636657714844, "logps/rejected": -420.95709228515625, "loss": 0.5574, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.4437962770462036, "rewards/margins": 0.46572622656822205, "rewards/rejected": -1.9095224142074585, "step": 4050 }, { "epoch": 0.27, "learning_rate": 4.5937951408541215e-06, "logits/chosen": -2.180326461791992, "logits/rejected": -1.5821549892425537, "logps/chosen": -513.3946533203125, "logps/rejected": -469.1312561035156, "loss": 0.5073, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.408052682876587, "rewards/margins": 0.7202036380767822, "rewards/rejected": -2.128256320953369, "step": 4060 }, { "epoch": 0.27, "learning_rate": 4.590669732342685e-06, "logits/chosen": -1.8135654926300049, "logits/rejected": -2.0680251121520996, "logps/chosen": -355.2364807128906, "logps/rejected": -480.3870544433594, "loss": 0.5478, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2912461757659912, "rewards/margins": 0.7113165855407715, "rewards/rejected": -2.002562999725342, "step": 4070 }, { "epoch": 0.27, "learning_rate": 4.587533417873799e-06, "logits/chosen": -1.9205124378204346, "logits/rejected": -1.9332654476165771, "logps/chosen": -368.07330322265625, "logps/rejected": -443.73223876953125, "loss": 0.5676, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0717012882232666, "rewards/margins": 0.7870392203330994, "rewards/rejected": -1.8587405681610107, "step": 4080 }, { "epoch": 0.27, "learning_rate": 4.584386213808016e-06, "logits/chosen": -1.9750468730926514, "logits/rejected": -1.7371772527694702, "logps/chosen": -380.44927978515625, "logps/rejected": -362.88751220703125, "loss": 0.6002, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1538901329040527, "rewards/margins": 0.6567569971084595, "rewards/rejected": -1.8106470108032227, "step": 4090 }, { "epoch": 0.27, "learning_rate": 4.581228136562693e-06, "logits/chosen": -2.6001052856445312, "logits/rejected": -2.1924033164978027, "logps/chosen": -488.16082763671875, "logps/rejected": -533.7776489257812, "loss": 0.5543, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1459815502166748, "rewards/margins": 0.7997251749038696, "rewards/rejected": -1.9457066059112549, "step": 4100 }, { "epoch": 0.27, "learning_rate": 4.578059202611909e-06, "logits/chosen": -2.046915054321289, "logits/rejected": -2.2624149322509766, "logps/chosen": -352.61962890625, "logps/rejected": -454.8382263183594, "loss": 0.4396, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0706390142440796, "rewards/margins": 0.870526134967804, "rewards/rejected": -1.9411652088165283, "step": 4110 }, { "epoch": 0.27, "learning_rate": 4.574879428486376e-06, "logits/chosen": -1.712337851524353, "logits/rejected": -1.7488536834716797, "logps/chosen": -363.9149169921875, "logps/rejected": -437.3489685058594, "loss": 0.5609, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.2686556577682495, "rewards/margins": 0.8758034706115723, "rewards/rejected": -2.1444592475891113, "step": 4120 }, { "epoch": 0.27, "learning_rate": 4.571688830773352e-06, "logits/chosen": -2.3221523761749268, "logits/rejected": -2.22611927986145, "logps/chosen": -467.30377197265625, "logps/rejected": -583.3150634765625, "loss": 0.4784, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.326136589050293, "rewards/margins": 0.8334660530090332, "rewards/rejected": -2.159602642059326, "step": 4130 }, { "epoch": 0.27, "learning_rate": 4.568487426116559e-06, "logits/chosen": -2.140794277191162, "logits/rejected": -2.1583950519561768, "logps/chosen": -437.3667907714844, "logps/rejected": -428.0228576660156, "loss": 0.5807, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.687368392944336, "rewards/margins": 0.315930038690567, "rewards/rejected": -2.00329852104187, "step": 4140 }, { "epoch": 0.27, "learning_rate": 4.565275231216092e-06, "logits/chosen": -1.6297184228897095, "logits/rejected": -1.4589264392852783, "logps/chosen": -418.3526306152344, "logps/rejected": -456.468017578125, "loss": 0.5298, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8031368255615234, "rewards/margins": 0.7424917817115784, "rewards/rejected": -2.545628309249878, "step": 4150 }, { "epoch": 0.27, "learning_rate": 4.562052262828331e-06, "logits/chosen": -1.6205329895019531, "logits/rejected": -1.862931489944458, "logps/chosen": -362.370361328125, "logps/rejected": -370.2030334472656, "loss": 0.6984, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.7531535625457764, "rewards/margins": -0.05112253874540329, "rewards/rejected": -1.702031135559082, "step": 4160 }, { "epoch": 0.27, "learning_rate": 4.558818537765861e-06, "logits/chosen": -2.1181905269622803, "logits/rejected": -2.2233452796936035, "logps/chosen": -423.51629638671875, "logps/rejected": -519.7943115234375, "loss": 0.4127, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3734153509140015, "rewards/margins": 0.9970091581344604, "rewards/rejected": -2.370424509048462, "step": 4170 }, { "epoch": 0.27, "learning_rate": 4.555574072897374e-06, "logits/chosen": -2.010232448577881, "logits/rejected": -1.2434194087982178, "logps/chosen": -479.2718811035156, "logps/rejected": -380.8519592285156, "loss": 0.5867, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -1.7946832180023193, "rewards/margins": 0.40616196393966675, "rewards/rejected": -2.200845241546631, "step": 4180 }, { "epoch": 0.27, "learning_rate": 4.552318885147589e-06, "logits/chosen": -2.271369457244873, "logits/rejected": -1.6737544536590576, "logps/chosen": -410.2076721191406, "logps/rejected": -420.95220947265625, "loss": 0.6536, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.0419671535491943, "rewards/margins": 0.33758771419525146, "rewards/rejected": -1.3795549869537354, "step": 4190 }, { "epoch": 0.27, "learning_rate": 4.549052991497159e-06, "logits/chosen": -1.976317048072815, "logits/rejected": -2.192378044128418, "logps/chosen": -286.4415283203125, "logps/rejected": -399.6639404296875, "loss": 0.4888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8588165044784546, "rewards/margins": 0.7741298675537109, "rewards/rejected": -1.6329463720321655, "step": 4200 }, { "epoch": 0.28, "learning_rate": 4.545776408982585e-06, "logits/chosen": -1.9261176586151123, "logits/rejected": -1.5991665124893188, "logps/chosen": -292.0099792480469, "logps/rejected": -431.8436584472656, "loss": 0.5102, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.317204475402832, "rewards/margins": 1.204416036605835, "rewards/rejected": -2.521620750427246, "step": 4210 }, { "epoch": 0.28, "learning_rate": 4.542489154696128e-06, "logits/chosen": -2.0769829750061035, "logits/rejected": -1.9520477056503296, "logps/chosen": -391.1345520019531, "logps/rejected": -382.9508972167969, "loss": 0.6132, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.9532631635665894, "rewards/margins": 0.8000235557556152, "rewards/rejected": -1.753286600112915, "step": 4220 }, { "epoch": 0.28, "learning_rate": 4.5391912457857145e-06, "logits/chosen": -1.8980144262313843, "logits/rejected": -1.5454305410385132, "logps/chosen": -319.369873046875, "logps/rejected": -440.15936279296875, "loss": 0.5566, "rewards/accuracies": 0.75, "rewards/chosen": -0.8968728184700012, "rewards/margins": 1.1255744695663452, "rewards/rejected": -2.022447347640991, "step": 4230 }, { "epoch": 0.28, "learning_rate": 4.535882699454854e-06, "logits/chosen": -2.2944552898406982, "logits/rejected": -1.9272899627685547, "logps/chosen": -397.7351379394531, "logps/rejected": -526.8030395507812, "loss": 0.4981, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0970957279205322, "rewards/margins": 1.2821524143218994, "rewards/rejected": -2.3792481422424316, "step": 4240 }, { "epoch": 0.28, "learning_rate": 4.532563532962546e-06, "logits/chosen": -2.3536887168884277, "logits/rejected": -1.7265129089355469, "logps/chosen": -443.1983947753906, "logps/rejected": -416.35797119140625, "loss": 0.6158, "rewards/accuracies": 0.75, "rewards/chosen": -0.9765421152114868, "rewards/margins": 1.1191794872283936, "rewards/rejected": -2.09572172164917, "step": 4250 }, { "epoch": 0.28, "learning_rate": 4.529233763623187e-06, "logits/chosen": -2.1754398345947266, "logits/rejected": -2.0757946968078613, "logps/chosen": -416.9156188964844, "logps/rejected": -435.5135192871094, "loss": 0.5907, "rewards/accuracies": 0.75, "rewards/chosen": -0.8619926571846008, "rewards/margins": 0.7766445875167847, "rewards/rejected": -1.6386373043060303, "step": 4260 }, { "epoch": 0.28, "learning_rate": 4.5258934088064854e-06, "logits/chosen": -2.361896514892578, "logits/rejected": -2.3284542560577393, "logps/chosen": -416.2744140625, "logps/rejected": -625.7243041992188, "loss": 0.4612, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0023585557937622, "rewards/margins": 1.0771377086639404, "rewards/rejected": -2.079496383666992, "step": 4270 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.9742462635040283, "logits/rejected": -1.6750361919403076, "logps/chosen": -322.1027526855469, "logps/rejected": -342.66912841796875, "loss": 0.7162, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.497710943222046, "rewards/margins": 0.40949931740760803, "rewards/rejected": -1.907210111618042, "step": 4280 }, { "epoch": 0.28, "learning_rate": 4.519181012495892e-06, "logits/chosen": -2.279792308807373, "logits/rejected": -1.960753083229065, "logps/chosen": -444.9163513183594, "logps/rejected": -554.8828125, "loss": 0.5329, "rewards/accuracies": 0.75, "rewards/chosen": -1.734828233718872, "rewards/margins": 0.7434547543525696, "rewards/rejected": -2.4782826900482178, "step": 4290 }, { "epoch": 0.28, "learning_rate": 4.515809006017147e-06, "logits/chosen": -1.8855798244476318, "logits/rejected": -1.7125604152679443, "logps/chosen": -436.1876525878906, "logps/rejected": -519.3978271484375, "loss": 0.6427, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2301485538482666, "rewards/margins": 0.7057285308837891, "rewards/rejected": -2.9358768463134766, "step": 4300 }, { "epoch": 0.28, "learning_rate": 4.512426484091171e-06, "logits/chosen": -1.9551712274551392, "logits/rejected": -2.109748363494873, "logps/chosen": -495.01007080078125, "logps/rejected": -530.8716430664062, "loss": 0.4651, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.095219373703003, "rewards/margins": 0.6353042125701904, "rewards/rejected": -2.7305235862731934, "step": 4310 }, { "epoch": 0.28, "learning_rate": 4.509033464362858e-06, "logits/chosen": -2.250704765319824, "logits/rejected": -2.059197187423706, "logps/chosen": -586.2399291992188, "logps/rejected": -715.1597900390625, "loss": 0.4659, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1677844524383545, "rewards/margins": 1.3767186403274536, "rewards/rejected": -3.5445029735565186, "step": 4320 }, { "epoch": 0.28, "learning_rate": 4.505629964531857e-06, "logits/chosen": -2.2147953510284424, "logits/rejected": -1.9781885147094727, "logps/chosen": -441.72674560546875, "logps/rejected": -460.2294006347656, "loss": 0.4821, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.527851939201355, "rewards/margins": 0.9044572710990906, "rewards/rejected": -2.432309150695801, "step": 4330 }, { "epoch": 0.28, "learning_rate": 4.502216002352492e-06, "logits/chosen": -2.224872350692749, "logits/rejected": -2.1673412322998047, "logps/chosen": -551.7815551757812, "logps/rejected": -583.5092163085938, "loss": 0.6369, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7062419652938843, "rewards/margins": 0.8081586956977844, "rewards/rejected": -2.5144007205963135, "step": 4340 }, { "epoch": 0.28, "learning_rate": 4.498791595633663e-06, "logits/chosen": -1.6142504215240479, "logits/rejected": -1.7114217281341553, "logps/chosen": -358.05645751953125, "logps/rejected": -460.80279541015625, "loss": 0.5532, "rewards/accuracies": 0.75, "rewards/chosen": -1.6065235137939453, "rewards/margins": 0.5826237201690674, "rewards/rejected": -2.189147472381592, "step": 4350 }, { "epoch": 0.29, "learning_rate": 4.495356762238751e-06, "logits/chosen": -1.9433860778808594, "logits/rejected": -1.9632737636566162, "logps/chosen": -393.555908203125, "logps/rejected": -529.794677734375, "loss": 0.5459, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6980907917022705, "rewards/margins": 0.9175578355789185, "rewards/rejected": -2.6156485080718994, "step": 4360 }, { "epoch": 0.29, "learning_rate": 4.491911520085532e-06, "logits/chosen": -2.3992745876312256, "logits/rejected": -2.0180933475494385, "logps/chosen": -497.4437561035156, "logps/rejected": -598.4129638671875, "loss": 0.3831, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.9618456363677979, "rewards/margins": 1.466132402420044, "rewards/rejected": -3.4279778003692627, "step": 4370 }, { "epoch": 0.29, "learning_rate": 4.488455887146075e-06, "logits/chosen": -1.813454270362854, "logits/rejected": -2.147561550140381, "logps/chosen": -557.65478515625, "logps/rejected": -547.4513549804688, "loss": 0.7619, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.099945306777954, "rewards/margins": 0.7333266139030457, "rewards/rejected": -2.8332715034484863, "step": 4380 }, { "epoch": 0.29, "learning_rate": 4.484989881446654e-06, "logits/chosen": -2.0431084632873535, "logits/rejected": -2.0684776306152344, "logps/chosen": -519.9021606445312, "logps/rejected": -536.9307861328125, "loss": 0.4899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.86326003074646, "rewards/margins": 0.8370512127876282, "rewards/rejected": -2.7003111839294434, "step": 4390 }, { "epoch": 0.29, "learning_rate": 4.481513521067654e-06, "logits/chosen": -2.206216335296631, "logits/rejected": -1.772658109664917, "logps/chosen": -440.90008544921875, "logps/rejected": -565.4508056640625, "loss": 0.5633, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6224441528320312, "rewards/margins": 1.1807271242141724, "rewards/rejected": -2.8031716346740723, "step": 4400 }, { "epoch": 0.29, "learning_rate": 4.478026824143473e-06, "logits/chosen": -2.2154037952423096, "logits/rejected": -1.8642908334732056, "logps/chosen": -493.7405700683594, "logps/rejected": -593.7854614257812, "loss": 0.4516, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1170547008514404, "rewards/margins": 0.7973050475120544, "rewards/rejected": -2.9143595695495605, "step": 4410 }, { "epoch": 0.29, "learning_rate": 4.474529808862429e-06, "logits/chosen": -2.2030978202819824, "logits/rejected": -2.3663721084594727, "logps/chosen": -544.3342895507812, "logps/rejected": -605.0272216796875, "loss": 0.6167, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7068147659301758, "rewards/margins": 0.36660951375961304, "rewards/rejected": -2.0734241008758545, "step": 4420 }, { "epoch": 0.29, "learning_rate": 4.471022493466669e-06, "logits/chosen": -2.409914493560791, "logits/rejected": -1.8466516733169556, "logps/chosen": -686.7117919921875, "logps/rejected": -613.4614868164062, "loss": 0.5096, "rewards/accuracies": 0.75, "rewards/chosen": -1.2821083068847656, "rewards/margins": 1.3262908458709717, "rewards/rejected": -2.608398914337158, "step": 4430 }, { "epoch": 0.29, "learning_rate": 4.467504896252066e-06, "logits/chosen": -2.1752190589904785, "logits/rejected": -2.1447174549102783, "logps/chosen": -549.7022705078125, "logps/rejected": -543.849609375, "loss": 0.8446, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7671550512313843, "rewards/margins": 0.3856579661369324, "rewards/rejected": -2.152813196182251, "step": 4440 }, { "epoch": 0.29, "learning_rate": 4.463977035568132e-06, "logits/chosen": -2.402294158935547, "logits/rejected": -1.758493185043335, "logps/chosen": -483.510986328125, "logps/rejected": -524.33056640625, "loss": 0.5975, "rewards/accuracies": 0.75, "rewards/chosen": -1.6731014251708984, "rewards/margins": 0.6348594427108765, "rewards/rejected": -2.3079609870910645, "step": 4450 }, { "epoch": 0.29, "learning_rate": 4.460438929817914e-06, "logits/chosen": -1.9748367071151733, "logits/rejected": -1.612786054611206, "logps/chosen": -381.75091552734375, "logps/rejected": -470.46441650390625, "loss": 0.55, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6240602731704712, "rewards/margins": 0.9709591865539551, "rewards/rejected": -2.5950193405151367, "step": 4460 }, { "epoch": 0.29, "learning_rate": 4.456890597457907e-06, "logits/chosen": -2.1747958660125732, "logits/rejected": -1.5399408340454102, "logps/chosen": -488.53912353515625, "logps/rejected": -555.6651000976562, "loss": 0.5172, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7258663177490234, "rewards/margins": 1.2122005224227905, "rewards/rejected": -2.9380667209625244, "step": 4470 }, { "epoch": 0.29, "learning_rate": 4.453332056997951e-06, "logits/chosen": -1.8284416198730469, "logits/rejected": -2.1309056282043457, "logps/chosen": -527.9506225585938, "logps/rejected": -619.3512573242188, "loss": 0.5483, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.246567487716675, "rewards/margins": 0.6451946496963501, "rewards/rejected": -2.8917624950408936, "step": 4480 }, { "epoch": 0.29, "learning_rate": 4.449763327001134e-06, "logits/chosen": -1.84951651096344, "logits/rejected": -2.066161870956421, "logps/chosen": -416.08087158203125, "logps/rejected": -450.5859375, "loss": 0.6554, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9449373483657837, "rewards/margins": 0.45978718996047974, "rewards/rejected": -2.404724597930908, "step": 4490 }, { "epoch": 0.29, "learning_rate": 4.446184426083702e-06, "logits/chosen": -2.311021327972412, "logits/rejected": -2.4845499992370605, "logps/chosen": -478.23431396484375, "logps/rejected": -570.850341796875, "loss": 0.6725, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1061079502105713, "rewards/margins": 0.28127962350845337, "rewards/rejected": -2.38738751411438, "step": 4500 }, { "epoch": 0.3, "learning_rate": 4.442595372914954e-06, "logits/chosen": -1.8901832103729248, "logits/rejected": -1.8583076000213623, "logps/chosen": -542.8806762695312, "logps/rejected": -535.1598510742188, "loss": 0.4545, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.166436195373535, "rewards/margins": 0.6861886382102966, "rewards/rejected": -2.8526248931884766, "step": 4510 }, { "epoch": 0.3, "learning_rate": 4.43899618621715e-06, "logits/chosen": -2.3254332542419434, "logits/rejected": -2.0657501220703125, "logps/chosen": -656.4212646484375, "logps/rejected": -512.197021484375, "loss": 0.6208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3151257038116455, "rewards/margins": 0.511042594909668, "rewards/rejected": -2.8261685371398926, "step": 4520 }, { "epoch": 0.3, "learning_rate": 4.4353868847654105e-06, "logits/chosen": -1.948752760887146, "logits/rejected": -1.5994007587432861, "logps/chosen": -457.9671325683594, "logps/rejected": -459.787353515625, "loss": 0.4473, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8508408069610596, "rewards/margins": 0.8039600253105164, "rewards/rejected": -2.6548008918762207, "step": 4530 }, { "epoch": 0.3, "learning_rate": 4.43176748738762e-06, "logits/chosen": -2.456021785736084, "logits/rejected": -2.0521798133850098, "logps/chosen": -520.0567626953125, "logps/rejected": -550.7706909179688, "loss": 0.6049, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.211097478866577, "rewards/margins": 0.2645843029022217, "rewards/rejected": -2.475681781768799, "step": 4540 }, { "epoch": 0.3, "learning_rate": 4.4281380129643295e-06, "logits/chosen": -1.8761346340179443, "logits/rejected": -1.4433989524841309, "logps/chosen": -607.0074462890625, "logps/rejected": -571.9659423828125, "loss": 0.5778, "rewards/accuracies": 0.75, "rewards/chosen": -2.474813461303711, "rewards/margins": 0.7461470365524292, "rewards/rejected": -3.2209606170654297, "step": 4550 }, { "epoch": 0.3, "learning_rate": 4.424498480428654e-06, "logits/chosen": -1.8305082321166992, "logits/rejected": -1.5906250476837158, "logps/chosen": -509.3614196777344, "logps/rejected": -521.6648559570312, "loss": 0.5047, "rewards/accuracies": 0.75, "rewards/chosen": -2.1890828609466553, "rewards/margins": 0.8899819254875183, "rewards/rejected": -3.0790648460388184, "step": 4560 }, { "epoch": 0.3, "learning_rate": 4.420848908766178e-06, "logits/chosen": -1.8799827098846436, "logits/rejected": -1.8004913330078125, "logps/chosen": -416.0606994628906, "logps/rejected": -512.1343383789062, "loss": 0.4316, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8887369632720947, "rewards/margins": 1.1407749652862549, "rewards/rejected": -3.0295119285583496, "step": 4570 }, { "epoch": 0.3, "learning_rate": 4.417189317014855e-06, "logits/chosen": -2.281931161880493, "logits/rejected": -1.9250017404556274, "logps/chosen": -531.4035034179688, "logps/rejected": -626.2095947265625, "loss": 0.5609, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3789849281311035, "rewards/margins": 0.8340399861335754, "rewards/rejected": -3.213024616241455, "step": 4580 }, { "epoch": 0.3, "learning_rate": 4.41351972426491e-06, "logits/chosen": -2.017589569091797, "logits/rejected": -2.132452964782715, "logps/chosen": -540.7257690429688, "logps/rejected": -575.760498046875, "loss": 0.5085, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.080129623413086, "rewards/margins": 1.0058696269989014, "rewards/rejected": -3.085999011993408, "step": 4590 }, { "epoch": 0.3, "learning_rate": 4.409840149658735e-06, "logits/chosen": -1.8754075765609741, "logits/rejected": -1.7641195058822632, "logps/chosen": -501.75738525390625, "logps/rejected": -543.0303955078125, "loss": 0.6767, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5409250259399414, "rewards/margins": 0.22758810222148895, "rewards/rejected": -2.7685132026672363, "step": 4600 }, { "epoch": 0.3, "learning_rate": 4.4061506123907925e-06, "logits/chosen": -1.8758211135864258, "logits/rejected": -1.8451995849609375, "logps/chosen": -545.2836303710938, "logps/rejected": -475.25299072265625, "loss": 0.574, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.855198860168457, "rewards/margins": 0.3814522325992584, "rewards/rejected": -2.2366509437561035, "step": 4610 }, { "epoch": 0.3, "learning_rate": 4.402451131707519e-06, "logits/chosen": -2.3047826290130615, "logits/rejected": -1.9144645929336548, "logps/chosen": -508.73443603515625, "logps/rejected": -539.4705200195312, "loss": 0.553, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7654201984405518, "rewards/margins": 0.7647563219070435, "rewards/rejected": -2.5301764011383057, "step": 4620 }, { "epoch": 0.3, "learning_rate": 4.398741726907215e-06, "logits/chosen": -1.8007876873016357, "logits/rejected": -1.868740439414978, "logps/chosen": -496.21282958984375, "logps/rejected": -553.9698486328125, "loss": 0.8282, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.53663969039917, "rewards/margins": 0.03457468003034592, "rewards/rejected": -2.571214437484741, "step": 4630 }, { "epoch": 0.3, "learning_rate": 4.395022417339955e-06, "logits/chosen": -2.113487720489502, "logits/rejected": -1.8683029413223267, "logps/chosen": -530.5501708984375, "logps/rejected": -591.97021484375, "loss": 0.3195, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.364399790763855, "rewards/margins": 1.4552600383758545, "rewards/rejected": -2.819659948348999, "step": 4640 }, { "epoch": 0.3, "learning_rate": 4.391293222407479e-06, "logits/chosen": -2.048769474029541, "logits/rejected": -1.9766504764556885, "logps/chosen": -475.11651611328125, "logps/rejected": -549.103271484375, "loss": 0.5875, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.867302656173706, "rewards/margins": 0.6196926832199097, "rewards/rejected": -2.486995220184326, "step": 4650 }, { "epoch": 0.3, "learning_rate": 4.387554161563094e-06, "logits/chosen": -1.9550203084945679, "logits/rejected": -2.0374646186828613, "logps/chosen": -420.591796875, "logps/rejected": -440.51904296875, "loss": 0.4986, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6441049575805664, "rewards/margins": 0.697167158126831, "rewards/rejected": -2.3412718772888184, "step": 4660 }, { "epoch": 0.31, "learning_rate": 4.383805254311575e-06, "logits/chosen": -2.0355887413024902, "logits/rejected": -2.292731761932373, "logps/chosen": -399.4480285644531, "logps/rejected": -524.9561767578125, "loss": 0.4126, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9074411392211914, "rewards/margins": 0.7646468877792358, "rewards/rejected": -2.672088146209717, "step": 4670 }, { "epoch": 0.31, "learning_rate": 4.380046520209056e-06, "logits/chosen": -2.2156951427459717, "logits/rejected": -2.034742593765259, "logps/chosen": -579.1575317382812, "logps/rejected": -681.671142578125, "loss": 0.4618, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0864853858947754, "rewards/margins": 1.258885145187378, "rewards/rejected": -3.3453705310821533, "step": 4680 }, { "epoch": 0.31, "learning_rate": 4.376277978862936e-06, "logits/chosen": -1.830632209777832, "logits/rejected": -1.9834390878677368, "logps/chosen": -404.22003173828125, "logps/rejected": -464.7509765625, "loss": 0.4448, "rewards/accuracies": 0.75, "rewards/chosen": -1.7368991374969482, "rewards/margins": 0.8379810452461243, "rewards/rejected": -2.5748801231384277, "step": 4690 }, { "epoch": 0.31, "learning_rate": 4.372499649931774e-06, "logits/chosen": -2.358445644378662, "logits/rejected": -1.5742676258087158, "logps/chosen": -552.6972045898438, "logps/rejected": -541.5938720703125, "loss": 0.5748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6361271142959595, "rewards/margins": 0.6868298649787903, "rewards/rejected": -2.3229570388793945, "step": 4700 }, { "epoch": 0.31, "learning_rate": 4.368711553125185e-06, "logits/chosen": -2.168393850326538, "logits/rejected": -1.5660316944122314, "logps/chosen": -542.9609985351562, "logps/rejected": -575.06494140625, "loss": 0.5706, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.852471113204956, "rewards/margins": 0.8874910473823547, "rewards/rejected": -2.739962100982666, "step": 4710 }, { "epoch": 0.31, "learning_rate": 4.364913708203734e-06, "logits/chosen": -2.1243157386779785, "logits/rejected": -1.9141753911972046, "logps/chosen": -492.9778747558594, "logps/rejected": -508.63812255859375, "loss": 0.4768, "rewards/accuracies": 0.75, "rewards/chosen": -2.0230612754821777, "rewards/margins": 0.7433712482452393, "rewards/rejected": -2.766432523727417, "step": 4720 }, { "epoch": 0.31, "learning_rate": 4.361106134978844e-06, "logits/chosen": -2.172084331512451, "logits/rejected": -1.5947898626327515, "logps/chosen": -466.58770751953125, "logps/rejected": -405.91754150390625, "loss": 0.5439, "rewards/accuracies": 0.75, "rewards/chosen": -1.236283540725708, "rewards/margins": 0.8609572649002075, "rewards/rejected": -2.097240686416626, "step": 4730 }, { "epoch": 0.31, "learning_rate": 4.357288853312681e-06, "logits/chosen": -2.3615665435791016, "logits/rejected": -2.126474380493164, "logps/chosen": -442.7232360839844, "logps/rejected": -458.16937255859375, "loss": 0.453, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3965190649032593, "rewards/margins": 0.6557938456535339, "rewards/rejected": -2.0523128509521484, "step": 4740 }, { "epoch": 0.31, "learning_rate": 4.353461883118056e-06, "logits/chosen": -1.8026084899902344, "logits/rejected": -1.6555544137954712, "logps/chosen": -484.675537109375, "logps/rejected": -532.8782958984375, "loss": 0.5328, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5672917366027832, "rewards/margins": 1.2434580326080322, "rewards/rejected": -2.8107497692108154, "step": 4750 }, { "epoch": 0.31, "learning_rate": 4.34962524435832e-06, "logits/chosen": -2.1514358520507812, "logits/rejected": -2.190011501312256, "logps/chosen": -411.32818603515625, "logps/rejected": -457.2787170410156, "loss": 0.5686, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2103312015533447, "rewards/margins": 0.9705151319503784, "rewards/rejected": -2.1808464527130127, "step": 4760 }, { "epoch": 0.31, "learning_rate": 4.34577895704726e-06, "logits/chosen": -2.0095913410186768, "logits/rejected": -2.151557683944702, "logps/chosen": -450.92962646484375, "logps/rejected": -574.5406494140625, "loss": 0.4753, "rewards/accuracies": 0.75, "rewards/chosen": -1.8003053665161133, "rewards/margins": 1.186186671257019, "rewards/rejected": -2.9864919185638428, "step": 4770 }, { "epoch": 0.31, "learning_rate": 4.3419230412489954e-06, "logits/chosen": -2.141251802444458, "logits/rejected": -1.974914312362671, "logps/chosen": -521.9320678710938, "logps/rejected": -572.5324096679688, "loss": 0.4808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7105445861816406, "rewards/margins": 1.2596935033798218, "rewards/rejected": -2.970238208770752, "step": 4780 }, { "epoch": 0.31, "learning_rate": 4.338057517077872e-06, "logits/chosen": -2.1706271171569824, "logits/rejected": -2.2307467460632324, "logps/chosen": -473.8224182128906, "logps/rejected": -512.6556396484375, "loss": 0.6164, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0227160453796387, "rewards/margins": 0.5150956511497498, "rewards/rejected": -2.537811517715454, "step": 4790 }, { "epoch": 0.31, "learning_rate": 4.334182404698356e-06, "logits/chosen": -1.4364566802978516, "logits/rejected": -1.9702926874160767, "logps/chosen": -525.397216796875, "logps/rejected": -492.07470703125, "loss": 0.5419, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.342111587524414, "rewards/margins": 0.5041699409484863, "rewards/rejected": -2.8462817668914795, "step": 4800 }, { "epoch": 0.31, "learning_rate": 4.330297724324933e-06, "logits/chosen": -1.4881843328475952, "logits/rejected": -1.5391967296600342, "logps/chosen": -454.2434997558594, "logps/rejected": -580.0150146484375, "loss": 0.4813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.307107448577881, "rewards/margins": 1.1245578527450562, "rewards/rejected": -3.4316658973693848, "step": 4810 }, { "epoch": 0.32, "learning_rate": 4.326403496221999e-06, "logits/chosen": -1.6759843826293945, "logits/rejected": -1.577532410621643, "logps/chosen": -455.1409606933594, "logps/rejected": -482.29095458984375, "loss": 0.5314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9880273342132568, "rewards/margins": 0.8221282958984375, "rewards/rejected": -2.8101556301116943, "step": 4820 }, { "epoch": 0.32, "learning_rate": 4.322499740703755e-06, "logits/chosen": -2.2555432319641113, "logits/rejected": -1.9342491626739502, "logps/chosen": -507.57135009765625, "logps/rejected": -517.7844848632812, "loss": 0.5564, "rewards/accuracies": 0.75, "rewards/chosen": -2.0044960975646973, "rewards/margins": 1.0811337232589722, "rewards/rejected": -3.085629940032959, "step": 4830 }, { "epoch": 0.32, "learning_rate": 4.318586478134101e-06, "logits/chosen": -1.9722309112548828, "logits/rejected": -2.1587672233581543, "logps/chosen": -454.78125, "logps/rejected": -635.3939819335938, "loss": 0.4745, "rewards/accuracies": 0.75, "rewards/chosen": -2.132972240447998, "rewards/margins": 1.0663903951644897, "rewards/rejected": -3.1993625164031982, "step": 4840 }, { "epoch": 0.32, "learning_rate": 4.314663728926534e-06, "logits/chosen": -1.7362626791000366, "logits/rejected": -1.6253337860107422, "logps/chosen": -512.2847290039062, "logps/rejected": -569.8792724609375, "loss": 0.7054, "rewards/accuracies": 0.5, "rewards/chosen": -2.7742371559143066, "rewards/margins": 0.21049389243125916, "rewards/rejected": -2.9847309589385986, "step": 4850 }, { "epoch": 0.32, "learning_rate": 4.310731513544033e-06, "logits/chosen": -2.0022008419036865, "logits/rejected": -1.6744985580444336, "logps/chosen": -484.0415954589844, "logps/rejected": -606.7132568359375, "loss": 0.3881, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.31618070602417, "rewards/margins": 1.1469529867172241, "rewards/rejected": -3.4631335735321045, "step": 4860 }, { "epoch": 0.32, "learning_rate": 4.30678985249896e-06, "logits/chosen": -1.9810054302215576, "logits/rejected": -1.7759273052215576, "logps/chosen": -469.736083984375, "logps/rejected": -595.0543823242188, "loss": 0.4512, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.0385842323303223, "rewards/margins": 1.3698408603668213, "rewards/rejected": -3.4084250926971436, "step": 4870 }, { "epoch": 0.32, "learning_rate": 4.302838766352952e-06, "logits/chosen": -1.886491060256958, "logits/rejected": -2.1183762550354004, "logps/chosen": -503.8521423339844, "logps/rejected": -629.655029296875, "loss": 0.5701, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.8886480331420898, "rewards/margins": 0.9551739692687988, "rewards/rejected": -2.8438220024108887, "step": 4880 }, { "epoch": 0.32, "learning_rate": 4.298878275716806e-06, "logits/chosen": -1.7694628238677979, "logits/rejected": -1.6724827289581299, "logps/chosen": -444.172607421875, "logps/rejected": -549.7099609375, "loss": 0.4663, "rewards/accuracies": 0.75, "rewards/chosen": -1.8042892217636108, "rewards/margins": 1.326806902885437, "rewards/rejected": -3.1310958862304688, "step": 4890 }, { "epoch": 0.32, "learning_rate": 4.294908401250386e-06, "logits/chosen": -2.007857084274292, "logits/rejected": -1.7297179698944092, "logps/chosen": -404.09442138671875, "logps/rejected": -523.7421264648438, "loss": 0.5109, "rewards/accuracies": 0.75, "rewards/chosen": -1.8268076181411743, "rewards/margins": 1.179343819618225, "rewards/rejected": -3.0061516761779785, "step": 4900 }, { "epoch": 0.32, "learning_rate": 4.290929163662498e-06, "logits/chosen": -1.631799340248108, "logits/rejected": -1.5702697038650513, "logps/chosen": -556.2808837890625, "logps/rejected": -681.7789916992188, "loss": 0.2917, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.999092698097229, "rewards/margins": 1.6200498342514038, "rewards/rejected": -3.619143009185791, "step": 4910 }, { "epoch": 0.32, "learning_rate": 4.286940583710796e-06, "logits/chosen": -1.9041754007339478, "logits/rejected": -1.4066526889801025, "logps/chosen": -464.74822998046875, "logps/rejected": -575.4861450195312, "loss": 0.362, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8307641744613647, "rewards/margins": 1.4628773927688599, "rewards/rejected": -3.2936415672302246, "step": 4920 }, { "epoch": 0.32, "learning_rate": 4.282942682201667e-06, "logits/chosen": -1.7861030101776123, "logits/rejected": -1.9110653400421143, "logps/chosen": -495.28131103515625, "logps/rejected": -656.2120361328125, "loss": 0.4626, "rewards/accuracies": 0.75, "rewards/chosen": -2.301225423812866, "rewards/margins": 0.6522427797317505, "rewards/rejected": -2.9534683227539062, "step": 4930 }, { "epoch": 0.32, "learning_rate": 4.278935479990123e-06, "logits/chosen": -1.8299472332000732, "logits/rejected": -1.1779818534851074, "logps/chosen": -420.9767150878906, "logps/rejected": -632.309326171875, "loss": 0.4589, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9877490997314453, "rewards/margins": 1.4198617935180664, "rewards/rejected": -3.4076104164123535, "step": 4940 }, { "epoch": 0.32, "learning_rate": 4.274918997979695e-06, "logits/chosen": -2.1755924224853516, "logits/rejected": -2.0056111812591553, "logps/chosen": -547.8631591796875, "logps/rejected": -511.017578125, "loss": 0.6709, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.987717628479004, "rewards/margins": 0.31354743242263794, "rewards/rejected": -2.301265239715576, "step": 4950 }, { "epoch": 0.32, "learning_rate": 4.270893257122319e-06, "logits/chosen": -2.0175094604492188, "logits/rejected": -1.6015933752059937, "logps/chosen": -505.9883728027344, "logps/rejected": -537.2809448242188, "loss": 0.4714, "rewards/accuracies": 0.75, "rewards/chosen": -1.958593726158142, "rewards/margins": 0.8624840974807739, "rewards/rejected": -2.821077823638916, "step": 4960 }, { "epoch": 0.33, "learning_rate": 4.266858278418232e-06, "logits/chosen": -2.076406240463257, "logits/rejected": -1.926634430885315, "logps/chosen": -438.182373046875, "logps/rejected": -538.9945678710938, "loss": 0.6311, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9475584030151367, "rewards/margins": 0.55462646484375, "rewards/rejected": -2.502185344696045, "step": 4970 }, { "epoch": 0.33, "learning_rate": 4.26281408291586e-06, "logits/chosen": -2.040910243988037, "logits/rejected": -1.9170629978179932, "logps/chosen": -571.2846069335938, "logps/rejected": -575.861083984375, "loss": 0.4263, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9143867492675781, "rewards/margins": 0.9282675981521606, "rewards/rejected": -2.8426547050476074, "step": 4980 }, { "epoch": 0.33, "learning_rate": 4.258760691711706e-06, "logits/chosen": -2.102288246154785, "logits/rejected": -2.1348159313201904, "logps/chosen": -505.85321044921875, "logps/rejected": -508.9205017089844, "loss": 0.5157, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.7438154220581055, "rewards/margins": 0.6320822834968567, "rewards/rejected": -2.3758978843688965, "step": 4990 }, { "epoch": 0.33, "learning_rate": 4.254698125950247e-06, "logits/chosen": -2.086390733718872, "logits/rejected": -1.717895269393921, "logps/chosen": -456.79022216796875, "logps/rejected": -578.4363403320312, "loss": 0.4682, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.532288074493408, "rewards/margins": 1.1843751668930054, "rewards/rejected": -3.716663360595703, "step": 5000 }, { "epoch": 0.33, "learning_rate": 4.250626406823815e-06, "logits/chosen": -2.286832094192505, "logits/rejected": -1.8051612377166748, "logps/chosen": -522.6347045898438, "logps/rejected": -707.0551147460938, "loss": 0.5183, "rewards/accuracies": 0.75, "rewards/chosen": -2.0341882705688477, "rewards/margins": 1.119905710220337, "rewards/rejected": -3.1540939807891846, "step": 5010 }, { "epoch": 0.33, "learning_rate": 4.246545555572489e-06, "logits/chosen": -2.2195258140563965, "logits/rejected": -1.7351186275482178, "logps/chosen": -532.5775146484375, "logps/rejected": -485.69708251953125, "loss": 0.5602, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5605659484863281, "rewards/margins": 1.0322446823120117, "rewards/rejected": -2.592810869216919, "step": 5020 }, { "epoch": 0.33, "learning_rate": 4.242455593483992e-06, "logits/chosen": -2.168919563293457, "logits/rejected": -2.084602117538452, "logps/chosen": -479.1246032714844, "logps/rejected": -557.0482788085938, "loss": 0.5369, "rewards/accuracies": 0.75, "rewards/chosen": -1.8440663814544678, "rewards/margins": 1.1115206480026245, "rewards/rejected": -2.955587148666382, "step": 5030 }, { "epoch": 0.33, "learning_rate": 4.238356541893567e-06, "logits/chosen": -2.0111374855041504, "logits/rejected": -2.025683879852295, "logps/chosen": -524.3695068359375, "logps/rejected": -623.1271362304688, "loss": 0.7591, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2869372367858887, "rewards/margins": 0.8538182973861694, "rewards/rejected": -3.1407554149627686, "step": 5040 }, { "epoch": 0.33, "learning_rate": 4.234248422183876e-06, "logits/chosen": -2.258674144744873, "logits/rejected": -2.089789390563965, "logps/chosen": -533.87890625, "logps/rejected": -596.0156860351562, "loss": 0.7065, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -1.7382818460464478, "rewards/margins": 0.8634111285209656, "rewards/rejected": -2.6016926765441895, "step": 5050 }, { "epoch": 0.33, "learning_rate": 4.230131255784884e-06, "logits/chosen": -1.9967342615127563, "logits/rejected": -2.0427513122558594, "logps/chosen": -422.4784240722656, "logps/rejected": -509.82550048828125, "loss": 0.6431, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4072448015213013, "rewards/margins": 0.980424702167511, "rewards/rejected": -2.387669801712036, "step": 5060 }, { "epoch": 0.33, "learning_rate": 4.226005064173748e-06, "logits/chosen": -2.129643678665161, "logits/rejected": -1.6824462413787842, "logps/chosen": -478.75177001953125, "logps/rejected": -463.4546813964844, "loss": 0.5305, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1462364196777344, "rewards/margins": 1.4325624704360962, "rewards/rejected": -2.578798770904541, "step": 5070 }, { "epoch": 0.33, "learning_rate": 4.2218698688747035e-06, "logits/chosen": -2.551898241043091, "logits/rejected": -1.856146216392517, "logps/chosen": -423.08001708984375, "logps/rejected": -483.04766845703125, "loss": 0.4279, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3132469654083252, "rewards/margins": 1.371935248374939, "rewards/rejected": -2.6851823329925537, "step": 5080 }, { "epoch": 0.33, "learning_rate": 4.217725691458957e-06, "logits/chosen": -2.3234143257141113, "logits/rejected": -1.9465463161468506, "logps/chosen": -548.46630859375, "logps/rejected": -516.9241943359375, "loss": 0.5183, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7047113180160522, "rewards/margins": 0.6300544142723083, "rewards/rejected": -2.334765911102295, "step": 5090 }, { "epoch": 0.33, "learning_rate": 4.213572553544565e-06, "logits/chosen": -2.1165995597839355, "logits/rejected": -2.101731061935425, "logps/chosen": -521.7017822265625, "logps/rejected": -487.0848083496094, "loss": 0.6403, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.7880723476409912, "rewards/margins": 0.4881587028503418, "rewards/rejected": -2.276231288909912, "step": 5100 }, { "epoch": 0.33, "learning_rate": 4.209410476796331e-06, "logits/chosen": -2.2760448455810547, "logits/rejected": -2.162075996398926, "logps/chosen": -505.9744567871094, "logps/rejected": -572.6317138671875, "loss": 0.4641, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6629308462142944, "rewards/margins": 1.0742077827453613, "rewards/rejected": -2.7371392250061035, "step": 5110 }, { "epoch": 0.33, "learning_rate": 4.205239482925686e-06, "logits/chosen": -2.5016868114471436, "logits/rejected": -2.208329439163208, "logps/chosen": -486.69451904296875, "logps/rejected": -514.7063598632812, "loss": 0.5182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.9836909174919128, "rewards/margins": 0.8252573013305664, "rewards/rejected": -1.8089481592178345, "step": 5120 }, { "epoch": 0.34, "learning_rate": 4.201059593690577e-06, "logits/chosen": -2.203293800354004, "logits/rejected": -1.8268276453018188, "logps/chosen": -417.32916259765625, "logps/rejected": -432.7237854003906, "loss": 0.7042, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.301255464553833, "rewards/margins": 0.9073659181594849, "rewards/rejected": -2.2086215019226074, "step": 5130 }, { "epoch": 0.34, "learning_rate": 4.196870830895354e-06, "logits/chosen": -2.3410606384277344, "logits/rejected": -2.001013994216919, "logps/chosen": -346.8150329589844, "logps/rejected": -517.2017822265625, "loss": 0.4128, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8742402195930481, "rewards/margins": 1.6113131046295166, "rewards/rejected": -2.48555326461792, "step": 5140 }, { "epoch": 0.34, "learning_rate": 4.192673216390657e-06, "logits/chosen": -2.111858606338501, "logits/rejected": -2.1761603355407715, "logps/chosen": -501.1302185058594, "logps/rejected": -512.5011596679688, "loss": 0.7603, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.516308307647705, "rewards/margins": 0.6539145708084106, "rewards/rejected": -2.170222759246826, "step": 5150 }, { "epoch": 0.34, "learning_rate": 4.188466772073296e-06, "logits/chosen": -2.0586180686950684, "logits/rejected": -1.8965294361114502, "logps/chosen": -447.8399353027344, "logps/rejected": -547.6044921875, "loss": 0.5181, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.681647539138794, "rewards/margins": 1.0179592370986938, "rewards/rejected": -2.6996068954467773, "step": 5160 }, { "epoch": 0.34, "learning_rate": 4.184251519886148e-06, "logits/chosen": -1.794429063796997, "logits/rejected": -1.6684194803237915, "logps/chosen": -421.6888122558594, "logps/rejected": -462.4202575683594, "loss": 0.736, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.355869770050049, "rewards/margins": 0.24080920219421387, "rewards/rejected": -2.5966787338256836, "step": 5170 }, { "epoch": 0.34, "learning_rate": 4.180027481818033e-06, "logits/chosen": -2.1500463485717773, "logits/rejected": -1.862236738204956, "logps/chosen": -497.8409118652344, "logps/rejected": -479.428955078125, "loss": 0.5666, "rewards/accuracies": 0.75, "rewards/chosen": -1.6651195287704468, "rewards/margins": 1.0246086120605469, "rewards/rejected": -2.689728260040283, "step": 5180 }, { "epoch": 0.34, "learning_rate": 4.175794679903602e-06, "logits/chosen": -2.3446333408355713, "logits/rejected": -1.8376610279083252, "logps/chosen": -455.2227478027344, "logps/rejected": -458.30029296875, "loss": 0.6515, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.7315324544906616, "rewards/margins": 0.38231196999549866, "rewards/rejected": -2.113844394683838, "step": 5190 }, { "epoch": 0.34, "learning_rate": 4.171553136223222e-06, "logits/chosen": -2.3308043479919434, "logits/rejected": -2.1053833961486816, "logps/chosen": -521.3209838867188, "logps/rejected": -570.7605590820312, "loss": 0.5516, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2747447490692139, "rewards/margins": 0.7890059351921082, "rewards/rejected": -2.0637505054473877, "step": 5200 }, { "epoch": 0.34, "learning_rate": 4.167302872902865e-06, "logits/chosen": -2.3960559368133545, "logits/rejected": -1.8900690078735352, "logps/chosen": -487.9169006347656, "logps/rejected": -510.0519104003906, "loss": 0.7002, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5574736595153809, "rewards/margins": 1.0342365503311157, "rewards/rejected": -2.591710329055786, "step": 5210 }, { "epoch": 0.34, "learning_rate": 4.163043912113985e-06, "logits/chosen": -2.1093697547912598, "logits/rejected": -1.9578189849853516, "logps/chosen": -491.0966796875, "logps/rejected": -520.5272216796875, "loss": 0.5832, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.5820024013519287, "rewards/margins": 0.46106070280075073, "rewards/rejected": -2.043062925338745, "step": 5220 }, { "epoch": 0.34, "learning_rate": 4.15877627607341e-06, "logits/chosen": -1.9862468242645264, "logits/rejected": -1.8697761297225952, "logps/chosen": -365.73193359375, "logps/rejected": -427.60260009765625, "loss": 0.5577, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5219247341156006, "rewards/margins": 0.6337286233901978, "rewards/rejected": -2.155653238296509, "step": 5230 }, { "epoch": 0.34, "learning_rate": 4.154499987043217e-06, "logits/chosen": -1.718347191810608, "logits/rejected": -1.9001245498657227, "logps/chosen": -416.593505859375, "logps/rejected": -536.9202270507812, "loss": 0.4793, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6195068359375, "rewards/margins": 1.0918627977371216, "rewards/rejected": -2.7113699913024902, "step": 5240 }, { "epoch": 0.34, "learning_rate": 4.150215067330625e-06, "logits/chosen": -2.2182650566101074, "logits/rejected": -2.0222058296203613, "logps/chosen": -499.1563415527344, "logps/rejected": -585.13232421875, "loss": 0.5189, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7680704593658447, "rewards/margins": 1.1057997941970825, "rewards/rejected": -2.873870372772217, "step": 5250 }, { "epoch": 0.34, "learning_rate": 4.145921539287876e-06, "logits/chosen": -2.067573070526123, "logits/rejected": -1.926025390625, "logps/chosen": -557.6201171875, "logps/rejected": -531.8350830078125, "loss": 0.5725, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5217723846435547, "rewards/margins": 0.673224925994873, "rewards/rejected": -3.1949973106384277, "step": 5260 }, { "epoch": 0.34, "learning_rate": 4.141619425312115e-06, "logits/chosen": -2.2126173973083496, "logits/rejected": -2.1073038578033447, "logps/chosen": -496.4862365722656, "logps/rejected": -522.2747192382812, "loss": 0.5975, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.240407943725586, "rewards/margins": 0.9295075535774231, "rewards/rejected": -3.1699154376983643, "step": 5270 }, { "epoch": 0.35, "learning_rate": 4.1373087478452735e-06, "logits/chosen": -1.852992057800293, "logits/rejected": -1.8106857538223267, "logps/chosen": -436.77740478515625, "logps/rejected": -474.5143127441406, "loss": 0.7164, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -2.3405117988586426, "rewards/margins": 0.06529967486858368, "rewards/rejected": -2.405811309814453, "step": 5280 }, { "epoch": 0.35, "learning_rate": 4.132989529373959e-06, "logits/chosen": -1.9827455282211304, "logits/rejected": -2.061856746673584, "logps/chosen": -505.684326171875, "logps/rejected": -570.9915161132812, "loss": 0.4378, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6012649536132812, "rewards/margins": 1.2496049404144287, "rewards/rejected": -2.850870370864868, "step": 5290 }, { "epoch": 0.35, "learning_rate": 4.128661792429331e-06, "logits/chosen": -1.7086913585662842, "logits/rejected": -2.030909776687622, "logps/chosen": -413.0350646972656, "logps/rejected": -516.2833251953125, "loss": 0.6779, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1970055103302, "rewards/margins": 0.5528850555419922, "rewards/rejected": -2.7498905658721924, "step": 5300 }, { "epoch": 0.35, "learning_rate": 4.124325559586985e-06, "logits/chosen": -2.0355865955352783, "logits/rejected": -1.7397657632827759, "logps/chosen": -442.1444396972656, "logps/rejected": -477.7100524902344, "loss": 0.6172, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.8046258687973022, "rewards/margins": 0.736786425113678, "rewards/rejected": -2.541412353515625, "step": 5310 }, { "epoch": 0.35, "learning_rate": 4.119980853466835e-06, "logits/chosen": -1.9371654987335205, "logits/rejected": -1.4551010131835938, "logps/chosen": -443.48883056640625, "logps/rejected": -509.99652099609375, "loss": 0.5016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0050268173217773, "rewards/margins": 0.9008941650390625, "rewards/rejected": -2.905921220779419, "step": 5320 }, { "epoch": 0.35, "learning_rate": 4.115627696732997e-06, "logits/chosen": -2.2487733364105225, "logits/rejected": -1.9912960529327393, "logps/chosen": -587.8577270507812, "logps/rejected": -580.4589233398438, "loss": 0.5762, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.7368290424346924, "rewards/margins": 0.7809491753578186, "rewards/rejected": -2.517777919769287, "step": 5330 }, { "epoch": 0.35, "learning_rate": 4.111266112093668e-06, "logits/chosen": -2.338475465774536, "logits/rejected": -1.7393487691879272, "logps/chosen": -565.7013549804688, "logps/rejected": -561.8267822265625, "loss": 0.5838, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7987060546875, "rewards/margins": 0.9617929458618164, "rewards/rejected": -2.7604992389678955, "step": 5340 }, { "epoch": 0.35, "learning_rate": 4.1068961223010115e-06, "logits/chosen": -2.0487475395202637, "logits/rejected": -1.9890035390853882, "logps/chosen": -494.0810546875, "logps/rejected": -526.8984985351562, "loss": 0.7369, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0883586406707764, "rewards/margins": 0.25026410818099976, "rewards/rejected": -2.338622808456421, "step": 5350 }, { "epoch": 0.35, "learning_rate": 4.102517750151034e-06, "logits/chosen": -2.4758353233337402, "logits/rejected": -2.0741126537323, "logps/chosen": -516.5465087890625, "logps/rejected": -488.43035888671875, "loss": 0.5367, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.434444785118103, "rewards/margins": 0.4245724678039551, "rewards/rejected": -1.8590171337127686, "step": 5360 }, { "epoch": 0.35, "learning_rate": 4.09813101848347e-06, "logits/chosen": -2.068437337875366, "logits/rejected": -1.921613097190857, "logps/chosen": -448.24346923828125, "logps/rejected": -530.2891845703125, "loss": 0.6322, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6722848415374756, "rewards/margins": 0.7226563096046448, "rewards/rejected": -2.3949408531188965, "step": 5370 }, { "epoch": 0.35, "learning_rate": 4.093735950181659e-06, "logits/chosen": -2.3279154300689697, "logits/rejected": -2.0696029663085938, "logps/chosen": -443.210205078125, "logps/rejected": -503.69805908203125, "loss": 0.5236, "rewards/accuracies": 0.75, "rewards/chosen": -1.8402855396270752, "rewards/margins": 0.7039087414741516, "rewards/rejected": -2.544194459915161, "step": 5380 }, { "epoch": 0.35, "learning_rate": 4.0893325681724326e-06, "logits/chosen": -2.0327677726745605, "logits/rejected": -2.0879409313201904, "logps/chosen": -473.17022705078125, "logps/rejected": -611.0216064453125, "loss": 0.5203, "rewards/accuracies": 0.75, "rewards/chosen": -1.875012993812561, "rewards/margins": 1.0310866832733154, "rewards/rejected": -2.906099557876587, "step": 5390 }, { "epoch": 0.35, "learning_rate": 4.084920895425988e-06, "logits/chosen": -1.8736546039581299, "logits/rejected": -1.8165258169174194, "logps/chosen": -429.8600158691406, "logps/rejected": -548.5433349609375, "loss": 0.5732, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9263439178466797, "rewards/margins": 0.8851688504219055, "rewards/rejected": -2.8115124702453613, "step": 5400 }, { "epoch": 0.35, "learning_rate": 4.080500954955769e-06, "logits/chosen": -1.9591480493545532, "logits/rejected": -1.8119045495986938, "logps/chosen": -464.69207763671875, "logps/rejected": -554.7318115234375, "loss": 0.6704, "rewards/accuracies": 0.75, "rewards/chosen": -2.0796895027160645, "rewards/margins": 0.8748849034309387, "rewards/rejected": -2.9545741081237793, "step": 5410 }, { "epoch": 0.35, "learning_rate": 4.076072769818354e-06, "logits/chosen": -1.8003151416778564, "logits/rejected": -1.8232829570770264, "logps/chosen": -430.34686279296875, "logps/rejected": -528.0757446289062, "loss": 0.5268, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.004014730453491, "rewards/margins": 1.071571946144104, "rewards/rejected": -3.0755867958068848, "step": 5420 }, { "epoch": 0.36, "learning_rate": 4.071636363113323e-06, "logits/chosen": -2.4541430473327637, "logits/rejected": -2.071518659591675, "logps/chosen": -541.6749877929688, "logps/rejected": -580.9114990234375, "loss": 0.6572, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4755618572235107, "rewards/margins": 0.49576932191848755, "rewards/rejected": -2.9713308811187744, "step": 5430 }, { "epoch": 0.36, "learning_rate": 4.067191757983146e-06, "logits/chosen": -1.6669687032699585, "logits/rejected": -1.8759205341339111, "logps/chosen": -481.29742431640625, "logps/rejected": -646.45751953125, "loss": 0.585, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7207741737365723, "rewards/margins": 0.6411170959472656, "rewards/rejected": -3.361891269683838, "step": 5440 }, { "epoch": 0.36, "learning_rate": 4.062738977613063e-06, "logits/chosen": -2.1314239501953125, "logits/rejected": -1.5466810464859009, "logps/chosen": -536.1124877929688, "logps/rejected": -521.1047973632812, "loss": 0.5347, "rewards/accuracies": 0.75, "rewards/chosen": -2.06589674949646, "rewards/margins": 0.4216742515563965, "rewards/rejected": -2.4875710010528564, "step": 5450 }, { "epoch": 0.36, "learning_rate": 4.058278045230957e-06, "logits/chosen": -2.354736328125, "logits/rejected": -1.8038610219955444, "logps/chosen": -589.7005615234375, "logps/rejected": -649.4627685546875, "loss": 0.4977, "rewards/accuracies": 0.75, "rewards/chosen": -1.8075393438339233, "rewards/margins": 0.8336325883865356, "rewards/rejected": -2.641171932220459, "step": 5460 }, { "epoch": 0.36, "learning_rate": 4.053808984107235e-06, "logits/chosen": -2.18099308013916, "logits/rejected": -2.067991018295288, "logps/chosen": -644.2923583984375, "logps/rejected": -661.2281494140625, "loss": 0.5483, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1242473125457764, "rewards/margins": 0.7089834809303284, "rewards/rejected": -2.833230495452881, "step": 5470 }, { "epoch": 0.36, "learning_rate": 4.04933181755471e-06, "logits/chosen": -2.0857701301574707, "logits/rejected": -1.6287143230438232, "logps/chosen": -522.9374389648438, "logps/rejected": -495.65899658203125, "loss": 0.6808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.090423107147217, "rewards/margins": 0.5246143937110901, "rewards/rejected": -2.615037441253662, "step": 5480 }, { "epoch": 0.36, "learning_rate": 4.044846568928477e-06, "logits/chosen": -1.865134596824646, "logits/rejected": -2.0705184936523438, "logps/chosen": -457.414794921875, "logps/rejected": -597.051025390625, "loss": 0.5042, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.685142159461975, "rewards/margins": 0.8473315238952637, "rewards/rejected": -2.5324740409851074, "step": 5490 }, { "epoch": 0.36, "learning_rate": 4.040353261625788e-06, "logits/chosen": -2.257359266281128, "logits/rejected": -2.08892822265625, "logps/chosen": -455.34820556640625, "logps/rejected": -545.6905517578125, "loss": 0.5441, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7408716678619385, "rewards/margins": 1.2403234243392944, "rewards/rejected": -2.9811949729919434, "step": 5500 }, { "epoch": 0.36, "learning_rate": 4.035851919085936e-06, "logits/chosen": -2.324406862258911, "logits/rejected": -2.0984809398651123, "logps/chosen": -519.6268310546875, "logps/rejected": -553.7537841796875, "loss": 0.4771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2758551836013794, "rewards/margins": 1.4612915515899658, "rewards/rejected": -2.7371463775634766, "step": 5510 }, { "epoch": 0.36, "learning_rate": 4.031342564790128e-06, "logits/chosen": -2.2692208290100098, "logits/rejected": -1.6718521118164062, "logps/chosen": -525.452880859375, "logps/rejected": -548.0057983398438, "loss": 0.4416, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.848789930343628, "rewards/margins": 0.8104416728019714, "rewards/rejected": -2.659231662750244, "step": 5520 }, { "epoch": 0.36, "learning_rate": 4.026825222261367e-06, "logits/chosen": -2.027358055114746, "logits/rejected": -2.0448803901672363, "logps/chosen": -634.1797485351562, "logps/rejected": -619.1446533203125, "loss": 0.3874, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5155118703842163, "rewards/margins": 1.2813142538070679, "rewards/rejected": -2.796826124191284, "step": 5530 }, { "epoch": 0.36, "learning_rate": 4.022299915064321e-06, "logits/chosen": -2.2447431087493896, "logits/rejected": -1.7948484420776367, "logps/chosen": -429.32586669921875, "logps/rejected": -488.72100830078125, "loss": 0.4734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7056586742401123, "rewards/margins": 0.9686349630355835, "rewards/rejected": -2.6742939949035645, "step": 5540 }, { "epoch": 0.36, "learning_rate": 4.017766666805213e-06, "logits/chosen": -1.7969173192977905, "logits/rejected": -2.1284358501434326, "logps/chosen": -505.5121154785156, "logps/rejected": -535.5445556640625, "loss": 0.5333, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8811172246932983, "rewards/margins": 0.8386653065681458, "rewards/rejected": -2.719782590866089, "step": 5550 }, { "epoch": 0.36, "learning_rate": 4.013225501131684e-06, "logits/chosen": -2.1914796829223633, "logits/rejected": -2.0097999572753906, "logps/chosen": -542.9992065429688, "logps/rejected": -625.959716796875, "loss": 0.5042, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.498547077178955, "rewards/margins": 1.0935722589492798, "rewards/rejected": -2.5921194553375244, "step": 5560 }, { "epoch": 0.36, "learning_rate": 4.008676441732679e-06, "logits/chosen": -1.9770981073379517, "logits/rejected": -1.7834548950195312, "logps/chosen": -538.4899291992188, "logps/rejected": -591.5714721679688, "loss": 0.5164, "rewards/accuracies": 0.75, "rewards/chosen": -2.0004758834838867, "rewards/margins": 1.0672811269760132, "rewards/rejected": -3.0677573680877686, "step": 5570 }, { "epoch": 0.37, "learning_rate": 4.00411951233832e-06, "logits/chosen": -2.2867188453674316, "logits/rejected": -1.8971859216690063, "logps/chosen": -578.2861938476562, "logps/rejected": -632.0267944335938, "loss": 0.6347, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9446357488632202, "rewards/margins": 0.6547573208808899, "rewards/rejected": -2.599392890930176, "step": 5580 }, { "epoch": 0.37, "learning_rate": 3.999554736719785e-06, "logits/chosen": -2.297450304031372, "logits/rejected": -2.1157913208007812, "logps/chosen": -591.5215454101562, "logps/rejected": -636.9697875976562, "loss": 0.5849, "rewards/accuracies": 0.5, "rewards/chosen": -1.753023386001587, "rewards/margins": 0.7581643462181091, "rewards/rejected": -2.5111875534057617, "step": 5590 }, { "epoch": 0.37, "learning_rate": 3.994982138689177e-06, "logits/chosen": -1.5000120401382446, "logits/rejected": -1.7612262964248657, "logps/chosen": -399.55999755859375, "logps/rejected": -479.05859375, "loss": 0.607, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7114953994750977, "rewards/margins": 0.7481001615524292, "rewards/rejected": -2.4595954418182373, "step": 5600 }, { "epoch": 0.37, "learning_rate": 3.990401742099408e-06, "logits/chosen": -1.8648064136505127, "logits/rejected": -2.075862407684326, "logps/chosen": -418.94281005859375, "logps/rejected": -506.51904296875, "loss": 0.4838, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.2457597255706787, "rewards/margins": 1.0919276475906372, "rewards/rejected": -2.3376874923706055, "step": 5610 }, { "epoch": 0.37, "learning_rate": 3.985813570844072e-06, "logits/chosen": -2.1678080558776855, "logits/rejected": -2.161743402481079, "logps/chosen": -423.78521728515625, "logps/rejected": -466.4239196777344, "loss": 0.6135, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.386838674545288, "rewards/margins": 0.6362934112548828, "rewards/rejected": -2.023132085800171, "step": 5620 }, { "epoch": 0.37, "learning_rate": 3.981217648857316e-06, "logits/chosen": -2.2606282234191895, "logits/rejected": -2.1236536502838135, "logps/chosen": -523.9324951171875, "logps/rejected": -568.092041015625, "loss": 0.6394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.5362694263458252, "rewards/margins": 0.4496995508670807, "rewards/rejected": -1.9859689474105835, "step": 5630 }, { "epoch": 0.37, "learning_rate": 3.97661400011372e-06, "logits/chosen": -1.7658350467681885, "logits/rejected": -1.849532127380371, "logps/chosen": -428.254638671875, "logps/rejected": -622.2107543945312, "loss": 0.5494, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.562973141670227, "rewards/margins": 1.3392118215560913, "rewards/rejected": -2.9021852016448975, "step": 5640 }, { "epoch": 0.37, "learning_rate": 3.972002648628174e-06, "logits/chosen": -2.3656766414642334, "logits/rejected": -2.090914487838745, "logps/chosen": -527.8897094726562, "logps/rejected": -521.8011474609375, "loss": 0.4767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.9467498064041138, "rewards/margins": 1.3752272129058838, "rewards/rejected": -2.321976661682129, "step": 5650 }, { "epoch": 0.37, "learning_rate": 3.967383618455743e-06, "logits/chosen": -2.2997450828552246, "logits/rejected": -2.1734695434570312, "logps/chosen": -412.80889892578125, "logps/rejected": -531.1956176757812, "loss": 0.4566, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4110808372497559, "rewards/margins": 0.9126925468444824, "rewards/rejected": -2.323773145675659, "step": 5660 }, { "epoch": 0.37, "learning_rate": 3.9627569336915515e-06, "logits/chosen": -2.169245719909668, "logits/rejected": -2.1522982120513916, "logps/chosen": -438.11492919921875, "logps/rejected": -493.45867919921875, "loss": 0.6598, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.1814568042755127, "rewards/margins": 0.8335569500923157, "rewards/rejected": -2.0150136947631836, "step": 5670 }, { "epoch": 0.37, "learning_rate": 3.9581226184706555e-06, "logits/chosen": -2.0356647968292236, "logits/rejected": -1.761395812034607, "logps/chosen": -548.0200805664062, "logps/rejected": -517.4801635742188, "loss": 0.5303, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.3839877843856812, "rewards/margins": 1.20305597782135, "rewards/rejected": -2.5870437622070312, "step": 5680 }, { "epoch": 0.37, "learning_rate": 3.953480696967912e-06, "logits/chosen": -1.7281534671783447, "logits/rejected": -1.9886353015899658, "logps/chosen": -360.5095520019531, "logps/rejected": -469.1702575683594, "loss": 0.554, "rewards/accuracies": 0.75, "rewards/chosen": -1.5830284357070923, "rewards/margins": 0.7689424753189087, "rewards/rejected": -2.35197114944458, "step": 5690 }, { "epoch": 0.37, "learning_rate": 3.948831193397857e-06, "logits/chosen": -2.055854320526123, "logits/rejected": -1.6672054529190063, "logps/chosen": -474.595458984375, "logps/rejected": -489.5585021972656, "loss": 0.638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.260961890220642, "rewards/margins": 0.9778332710266113, "rewards/rejected": -2.238795042037964, "step": 5700 }, { "epoch": 0.37, "learning_rate": 3.94417413201458e-06, "logits/chosen": -2.0651679039001465, "logits/rejected": -1.9424585103988647, "logps/chosen": -449.7088317871094, "logps/rejected": -470.5743713378906, "loss": 0.5406, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.487428903579712, "rewards/margins": 0.7987778782844543, "rewards/rejected": -2.2862064838409424, "step": 5710 }, { "epoch": 0.37, "learning_rate": 3.9395095371115935e-06, "logits/chosen": -2.0351507663726807, "logits/rejected": -1.9006555080413818, "logps/chosen": -459.5558166503906, "logps/rejected": -525.0375366210938, "loss": 0.5175, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2365690469741821, "rewards/margins": 1.2108285427093506, "rewards/rejected": -2.447397470474243, "step": 5720 }, { "epoch": 0.37, "learning_rate": 3.93483743302171e-06, "logits/chosen": -2.2190499305725098, "logits/rejected": -1.791672945022583, "logps/chosen": -516.0533447265625, "logps/rejected": -502.7911071777344, "loss": 0.5993, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2167534828186035, "rewards/margins": 0.7097581624984741, "rewards/rejected": -2.926511526107788, "step": 5730 }, { "epoch": 0.38, "learning_rate": 3.930157844116913e-06, "logits/chosen": -2.240591526031494, "logits/rejected": -2.143420457839966, "logps/chosen": -458.56195068359375, "logps/rejected": -549.3421630859375, "loss": 0.5023, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8407554626464844, "rewards/margins": 0.6896196603775024, "rewards/rejected": -2.5303750038146973, "step": 5740 }, { "epoch": 0.38, "learning_rate": 3.925470794808229e-06, "logits/chosen": -2.3181164264678955, "logits/rejected": -1.8494704961776733, "logps/chosen": -503.0951232910156, "logps/rejected": -676.624267578125, "loss": 0.6081, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0876903533935547, "rewards/margins": 0.9999464750289917, "rewards/rejected": -3.087637186050415, "step": 5750 }, { "epoch": 0.38, "learning_rate": 3.920776309545606e-06, "logits/chosen": -1.8568137884140015, "logits/rejected": -2.0069284439086914, "logps/chosen": -518.7303466796875, "logps/rejected": -570.5198974609375, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -2.189582347869873, "rewards/margins": 0.9730221033096313, "rewards/rejected": -3.162604331970215, "step": 5760 }, { "epoch": 0.38, "learning_rate": 3.916074412817778e-06, "logits/chosen": -2.047844409942627, "logits/rejected": -1.6396989822387695, "logps/chosen": -427.2057189941406, "logps/rejected": -556.9207763671875, "loss": 0.6314, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.956089735031128, "rewards/margins": 1.3886876106262207, "rewards/rejected": -3.3447773456573486, "step": 5770 }, { "epoch": 0.38, "learning_rate": 3.911365129152139e-06, "logits/chosen": -1.8778527975082397, "logits/rejected": -1.5888967514038086, "logps/chosen": -491.4697265625, "logps/rejected": -579.642822265625, "loss": 0.6426, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2698824405670166, "rewards/margins": 1.137494683265686, "rewards/rejected": -3.407376766204834, "step": 5780 }, { "epoch": 0.38, "learning_rate": 3.906648483114623e-06, "logits/chosen": -1.5776411294937134, "logits/rejected": -2.142578601837158, "logps/chosen": -383.7577819824219, "logps/rejected": -552.5573120117188, "loss": 0.427, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.3256168365478516, "rewards/margins": 1.7148630619049072, "rewards/rejected": -4.04047966003418, "step": 5790 }, { "epoch": 0.38, "learning_rate": 3.901924499309564e-06, "logits/chosen": -2.079991340637207, "logits/rejected": -1.6393836736679077, "logps/chosen": -446.80401611328125, "logps/rejected": -541.393798828125, "loss": 0.5418, "rewards/accuracies": 0.75, "rewards/chosen": -1.5220731496810913, "rewards/margins": 1.0446290969848633, "rewards/rejected": -2.5667026042938232, "step": 5800 }, { "epoch": 0.38, "learning_rate": 3.897193202379575e-06, "logits/chosen": -2.1350531578063965, "logits/rejected": -1.84587824344635, "logps/chosen": -537.3280029296875, "logps/rejected": -529.4195556640625, "loss": 0.5709, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.909463882446289, "rewards/margins": 0.6809160113334656, "rewards/rejected": -2.5903801918029785, "step": 5810 }, { "epoch": 0.38, "learning_rate": 3.8924546170054215e-06, "logits/chosen": -2.3045449256896973, "logits/rejected": -2.2156808376312256, "logps/chosen": -501.5043029785156, "logps/rejected": -581.8260498046875, "loss": 0.4218, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.793035864830017, "rewards/margins": 1.2349274158477783, "rewards/rejected": -3.0279629230499268, "step": 5820 }, { "epoch": 0.38, "learning_rate": 3.887708767905883e-06, "logits/chosen": -2.1339306831359863, "logits/rejected": -1.840649962425232, "logps/chosen": -524.33935546875, "logps/rejected": -588.361328125, "loss": 0.3532, "rewards/accuracies": 0.75, "rewards/chosen": -1.9939978122711182, "rewards/margins": 1.0440418720245361, "rewards/rejected": -3.0380396842956543, "step": 5830 }, { "epoch": 0.38, "learning_rate": 3.882955679837636e-06, "logits/chosen": -1.8100063800811768, "logits/rejected": -2.144019842147827, "logps/chosen": -473.1988830566406, "logps/rejected": -469.0291442871094, "loss": 0.7634, "rewards/accuracies": 0.5, "rewards/chosen": -1.7491573095321655, "rewards/margins": 0.5698834657669067, "rewards/rejected": -2.3190410137176514, "step": 5840 }, { "epoch": 0.38, "learning_rate": 3.878195377595113e-06, "logits/chosen": -1.8882341384887695, "logits/rejected": -1.9309072494506836, "logps/chosen": -544.5458374023438, "logps/rejected": -674.8884887695312, "loss": 0.6168, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.0545291900634766, "rewards/margins": 0.7788420915603638, "rewards/rejected": -2.833371162414551, "step": 5850 }, { "epoch": 0.38, "learning_rate": 3.873427886010384e-06, "logits/chosen": -2.1351327896118164, "logits/rejected": -2.0625691413879395, "logps/chosen": -417.35888671875, "logps/rejected": -596.0733032226562, "loss": 0.3956, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5938721895217896, "rewards/margins": 1.5050804615020752, "rewards/rejected": -3.0989527702331543, "step": 5860 }, { "epoch": 0.38, "learning_rate": 3.868653229953021e-06, "logits/chosen": -1.8998457193374634, "logits/rejected": -2.005687713623047, "logps/chosen": -397.5736389160156, "logps/rejected": -438.1299743652344, "loss": 0.5318, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.550104022026062, "rewards/margins": 0.7522678971290588, "rewards/rejected": -2.3023719787597656, "step": 5870 }, { "epoch": 0.38, "learning_rate": 3.8638714343299675e-06, "logits/chosen": -2.50346040725708, "logits/rejected": -1.835614800453186, "logps/chosen": -494.24493408203125, "logps/rejected": -441.7505798339844, "loss": 0.4546, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.589115858078003, "rewards/margins": 1.0198700428009033, "rewards/rejected": -2.608985424041748, "step": 5880 }, { "epoch": 0.39, "learning_rate": 3.859082524085414e-06, "logits/chosen": -2.236497402191162, "logits/rejected": -2.1372292041778564, "logps/chosen": -491.81549072265625, "logps/rejected": -572.0244750976562, "loss": 0.4813, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4131276607513428, "rewards/margins": 1.2808161973953247, "rewards/rejected": -2.693943738937378, "step": 5890 }, { "epoch": 0.39, "learning_rate": 3.854286524200659e-06, "logits/chosen": -2.0243048667907715, "logits/rejected": -1.983472466468811, "logps/chosen": -519.2561645507812, "logps/rejected": -610.3675537109375, "loss": 0.3632, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.828203797340393, "rewards/margins": 1.4534698724746704, "rewards/rejected": -3.2816739082336426, "step": 5900 }, { "epoch": 0.39, "learning_rate": 3.849483459693991e-06, "logits/chosen": -2.2442984580993652, "logits/rejected": -1.936551809310913, "logps/chosen": -518.1563720703125, "logps/rejected": -664.1353759765625, "loss": 0.4925, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9774038791656494, "rewards/margins": 1.205444097518921, "rewards/rejected": -3.182847499847412, "step": 5910 }, { "epoch": 0.39, "learning_rate": 3.844673355620544e-06, "logits/chosen": -1.9714431762695312, "logits/rejected": -1.6506202220916748, "logps/chosen": -553.7269287109375, "logps/rejected": -513.1190185546875, "loss": 0.593, "rewards/accuracies": 0.5, "rewards/chosen": -2.1814029216766357, "rewards/margins": 0.47208651900291443, "rewards/rejected": -2.653489828109741, "step": 5920 }, { "epoch": 0.39, "learning_rate": 3.839856237072178e-06, "logits/chosen": -2.135085105895996, "logits/rejected": -1.9646351337432861, "logps/chosen": -429.2771911621094, "logps/rejected": -498.9122619628906, "loss": 0.5099, "rewards/accuracies": 0.75, "rewards/chosen": -2.285665988922119, "rewards/margins": 0.9124363660812378, "rewards/rejected": -3.1981022357940674, "step": 5930 }, { "epoch": 0.39, "learning_rate": 3.8350321291773455e-06, "logits/chosen": -2.0477957725524902, "logits/rejected": -1.9287916421890259, "logps/chosen": -520.1895751953125, "logps/rejected": -633.580322265625, "loss": 0.6912, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5589051246643066, "rewards/margins": 0.9964600801467896, "rewards/rejected": -3.5553650856018066, "step": 5940 }, { "epoch": 0.39, "learning_rate": 3.830201057100953e-06, "logits/chosen": -1.998542070388794, "logits/rejected": -1.565536618232727, "logps/chosen": -591.3970947265625, "logps/rejected": -650.9102783203125, "loss": 0.5073, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.550248622894287, "rewards/margins": 1.3746483325958252, "rewards/rejected": -3.924896717071533, "step": 5950 }, { "epoch": 0.39, "learning_rate": 3.82536304604424e-06, "logits/chosen": -1.5837963819503784, "logits/rejected": -1.9000377655029297, "logps/chosen": -591.5118408203125, "logps/rejected": -706.6221923828125, "loss": 0.5133, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.512164831161499, "rewards/margins": 0.8565373420715332, "rewards/rejected": -3.368701934814453, "step": 5960 }, { "epoch": 0.39, "learning_rate": 3.8205181212446435e-06, "logits/chosen": -1.9696906805038452, "logits/rejected": -1.6013498306274414, "logps/chosen": -593.3405151367188, "logps/rejected": -571.490234375, "loss": 0.7762, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.121450901031494, "rewards/margins": 0.010794973000884056, "rewards/rejected": -3.1322457790374756, "step": 5970 }, { "epoch": 0.39, "learning_rate": 3.815666307975664e-06, "logits/chosen": -1.9981441497802734, "logits/rejected": -1.674731969833374, "logps/chosen": -493.15435791015625, "logps/rejected": -575.2023315429688, "loss": 0.4685, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.250596523284912, "rewards/margins": 1.1089481115341187, "rewards/rejected": -3.3595452308654785, "step": 5980 }, { "epoch": 0.39, "learning_rate": 3.8108076315467346e-06, "logits/chosen": -2.3160128593444824, "logits/rejected": -1.9766929149627686, "logps/chosen": -489.34112548828125, "logps/rejected": -657.270263671875, "loss": 0.4317, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1392629146575928, "rewards/margins": 1.506763219833374, "rewards/rejected": -3.646026134490967, "step": 5990 }, { "epoch": 0.39, "learning_rate": 3.805942117303093e-06, "logits/chosen": -2.19549560546875, "logits/rejected": -1.8671777248382568, "logps/chosen": -482.0986328125, "logps/rejected": -604.8792114257812, "loss": 0.4501, "rewards/accuracies": 0.75, "rewards/chosen": -1.711912751197815, "rewards/margins": 1.493793249130249, "rewards/rejected": -3.2057061195373535, "step": 6000 }, { "epoch": 0.39, "eval_logits/chosen": -2.0209784507751465, "eval_logits/rejected": -1.8483818769454956, "eval_logps/chosen": -507.349853515625, "eval_logps/rejected": -587.6927490234375, "eval_loss": 0.5436508655548096, "eval_rewards/accuracies": 0.7419999837875366, "eval_rewards/chosen": -2.1190431118011475, "eval_rewards/margins": 1.0038256645202637, "eval_rewards/rejected": -3.1228690147399902, "eval_runtime": 464.8352, "eval_samples_per_second": 4.303, "eval_steps_per_second": 2.151, "step": 6000 }, { "epoch": 0.39, "learning_rate": 3.8010697906256446e-06, "logits/chosen": -1.5868901014328003, "logits/rejected": -2.0350403785705566, "logps/chosen": -521.3289184570312, "logps/rejected": -640.9805908203125, "loss": 0.5745, "rewards/accuracies": 0.75, "rewards/chosen": -2.1810312271118164, "rewards/margins": 0.8306337594985962, "rewards/rejected": -3.011664628982544, "step": 6010 }, { "epoch": 0.39, "learning_rate": 3.7961906769308323e-06, "logits/chosen": -2.1721839904785156, "logits/rejected": -2.1664059162139893, "logps/chosen": -428.31365966796875, "logps/rejected": -519.9769897460938, "loss": 0.4193, "rewards/accuracies": 0.75, "rewards/chosen": -1.6097137928009033, "rewards/margins": 1.039236068725586, "rewards/rejected": -2.6489500999450684, "step": 6020 }, { "epoch": 0.39, "learning_rate": 3.7913048016705028e-06, "logits/chosen": -1.8027420043945312, "logits/rejected": -1.119469404220581, "logps/chosen": -405.3560791015625, "logps/rejected": -543.0703125, "loss": 0.428, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.032978057861328, "rewards/margins": 1.3919148445129395, "rewards/rejected": -3.4248929023742676, "step": 6030 }, { "epoch": 0.4, "learning_rate": 3.786412190331775e-06, "logits/chosen": -2.008702039718628, "logits/rejected": -1.9136741161346436, "logps/chosen": -495.7113342285156, "logps/rejected": -723.3904418945312, "loss": 0.3888, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.875732183456421, "rewards/margins": 1.702206015586853, "rewards/rejected": -3.577937602996826, "step": 6040 }, { "epoch": 0.4, "learning_rate": 3.781512868436906e-06, "logits/chosen": -2.2078769207000732, "logits/rejected": -2.072467565536499, "logps/chosen": -521.7391357421875, "logps/rejected": -561.3995971679688, "loss": 0.6048, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6842775344848633, "rewards/margins": 1.0569112300872803, "rewards/rejected": -2.7411885261535645, "step": 6050 }, { "epoch": 0.4, "learning_rate": 3.7766068615431605e-06, "logits/chosen": -2.3585803508758545, "logits/rejected": -2.168217658996582, "logps/chosen": -642.48681640625, "logps/rejected": -691.4666137695312, "loss": 0.5816, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.5773718357086182, "rewards/margins": 1.2659053802490234, "rewards/rejected": -2.8432772159576416, "step": 6060 }, { "epoch": 0.4, "learning_rate": 3.771694195242671e-06, "logits/chosen": -2.1833529472351074, "logits/rejected": -1.8416792154312134, "logps/chosen": -552.9678344726562, "logps/rejected": -465.968994140625, "loss": 0.5965, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2463607788085938, "rewards/margins": 0.32134681940078735, "rewards/rejected": -2.5677075386047363, "step": 6070 }, { "epoch": 0.4, "learning_rate": 3.766774895162314e-06, "logits/chosen": -1.4838708639144897, "logits/rejected": -1.8937629461288452, "logps/chosen": -481.39178466796875, "logps/rejected": -649.5086059570312, "loss": 0.5998, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.128632068634033, "rewards/margins": 0.9498635530471802, "rewards/rejected": -3.078495740890503, "step": 6080 }, { "epoch": 0.4, "learning_rate": 3.7618489869635666e-06, "logits/chosen": -2.0555758476257324, "logits/rejected": -1.9236648082733154, "logps/chosen": -573.3447265625, "logps/rejected": -530.6716918945312, "loss": 0.725, "rewards/accuracies": 0.5, "rewards/chosen": -2.4555697441101074, "rewards/margins": 0.5236064791679382, "rewards/rejected": -2.9791760444641113, "step": 6090 }, { "epoch": 0.4, "learning_rate": 3.756916496342379e-06, "logits/chosen": -1.6241865158081055, "logits/rejected": -1.6583588123321533, "logps/chosen": -433.23333740234375, "logps/rejected": -461.29180908203125, "loss": 0.6667, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3971171379089355, "rewards/margins": 0.457655668258667, "rewards/rejected": -2.8547730445861816, "step": 6100 }, { "epoch": 0.4, "learning_rate": 3.751977449029039e-06, "logits/chosen": -1.9116780757904053, "logits/rejected": -1.8591690063476562, "logps/chosen": -459.74652099609375, "logps/rejected": -484.01953125, "loss": 0.5498, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8744319677352905, "rewards/margins": 0.821243166923523, "rewards/rejected": -2.6956753730773926, "step": 6110 }, { "epoch": 0.4, "learning_rate": 3.747031870788037e-06, "logits/chosen": -2.1998138427734375, "logits/rejected": -1.8871634006500244, "logps/chosen": -454.06365966796875, "logps/rejected": -557.6979370117188, "loss": 0.472, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.895861029624939, "rewards/margins": 1.0318374633789062, "rewards/rejected": -2.9276986122131348, "step": 6120 }, { "epoch": 0.4, "learning_rate": 3.7420797874179326e-06, "logits/chosen": -1.7379772663116455, "logits/rejected": -1.4775676727294922, "logps/chosen": -511.3853454589844, "logps/rejected": -584.1634521484375, "loss": 0.4523, "rewards/accuracies": 0.75, "rewards/chosen": -2.1693828105926514, "rewards/margins": 1.03212571144104, "rewards/rejected": -3.2015087604522705, "step": 6130 }, { "epoch": 0.4, "learning_rate": 3.7371212247512167e-06, "logits/chosen": -1.8649704456329346, "logits/rejected": -2.244213581085205, "logps/chosen": -548.1846923828125, "logps/rejected": -826.314453125, "loss": 0.5413, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.369753122329712, "rewards/margins": 1.118550181388855, "rewards/rejected": -3.4883029460906982, "step": 6140 }, { "epoch": 0.4, "learning_rate": 3.7321562086541817e-06, "logits/chosen": -2.4249210357666016, "logits/rejected": -1.8241479396820068, "logps/chosen": -610.2730102539062, "logps/rejected": -584.8367309570312, "loss": 0.4082, "rewards/accuracies": 0.75, "rewards/chosen": -2.1096255779266357, "rewards/margins": 0.8866428136825562, "rewards/rejected": -2.9962685108184814, "step": 6150 }, { "epoch": 0.4, "learning_rate": 3.7271847650267834e-06, "logits/chosen": -1.5324722528457642, "logits/rejected": -1.7584350109100342, "logps/chosen": -479.8152770996094, "logps/rejected": -582.040771484375, "loss": 0.5012, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.697323799133301, "rewards/margins": 1.2286405563354492, "rewards/rejected": -3.92596435546875, "step": 6160 }, { "epoch": 0.4, "learning_rate": 3.7222069198025086e-06, "logits/chosen": -1.997467279434204, "logits/rejected": -1.7623908519744873, "logps/chosen": -577.2261352539062, "logps/rejected": -661.6519775390625, "loss": 0.5088, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.898735523223877, "rewards/margins": 1.1691944599151611, "rewards/rejected": -4.067929744720459, "step": 6170 }, { "epoch": 0.4, "learning_rate": 3.7172226989482353e-06, "logits/chosen": -2.120117664337158, "logits/rejected": -1.8180913925170898, "logps/chosen": -630.7166748046875, "logps/rejected": -645.8465576171875, "loss": 0.5931, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3682315349578857, "rewards/margins": 1.0373667478561401, "rewards/rejected": -3.4055984020233154, "step": 6180 }, { "epoch": 0.4, "learning_rate": 3.7122321284641007e-06, "logits/chosen": -1.8285562992095947, "logits/rejected": -1.9086427688598633, "logps/chosen": -559.5812377929688, "logps/rejected": -611.1234130859375, "loss": 0.5888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.270254135131836, "rewards/margins": 0.9550930261611938, "rewards/rejected": -3.2253470420837402, "step": 6190 }, { "epoch": 0.41, "learning_rate": 3.707235234383365e-06, "logits/chosen": -1.5866405963897705, "logits/rejected": -1.1846307516098022, "logps/chosen": -497.96295166015625, "logps/rejected": -517.0799560546875, "loss": 0.5348, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7328848838806152, "rewards/margins": 0.9069123268127441, "rewards/rejected": -3.639796733856201, "step": 6200 }, { "epoch": 0.41, "learning_rate": 3.702232042772277e-06, "logits/chosen": -1.9289581775665283, "logits/rejected": -2.16865873336792, "logps/chosen": -585.1903076171875, "logps/rejected": -650.5701293945312, "loss": 0.5983, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3935234546661377, "rewards/margins": 1.1612800359725952, "rewards/rejected": -3.5548033714294434, "step": 6210 }, { "epoch": 0.41, "learning_rate": 3.6972225797299325e-06, "logits/chosen": -1.6929620504379272, "logits/rejected": -1.8669235706329346, "logps/chosen": -590.72412109375, "logps/rejected": -693.8157958984375, "loss": 0.7619, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.2169342041015625, "rewards/margins": 0.41517114639282227, "rewards/rejected": -3.6321048736572266, "step": 6220 }, { "epoch": 0.41, "learning_rate": 3.692206871388147e-06, "logits/chosen": -2.4398531913757324, "logits/rejected": -1.8036763668060303, "logps/chosen": -618.9306640625, "logps/rejected": -543.3020629882812, "loss": 0.4643, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.38877010345459, "rewards/margins": 1.128936529159546, "rewards/rejected": -3.517707109451294, "step": 6230 }, { "epoch": 0.41, "learning_rate": 3.6871849439113115e-06, "logits/chosen": -1.8826773166656494, "logits/rejected": -1.5871411561965942, "logps/chosen": -531.6732788085938, "logps/rejected": -634.6497192382812, "loss": 0.3874, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.203376531600952, "rewards/margins": 1.1963980197906494, "rewards/rejected": -3.3997745513916016, "step": 6240 }, { "epoch": 0.41, "learning_rate": 3.682156823496259e-06, "logits/chosen": -1.8589789867401123, "logits/rejected": -1.4900842905044556, "logps/chosen": -540.5941772460938, "logps/rejected": -580.9816284179688, "loss": 0.5012, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5480470657348633, "rewards/margins": 1.0418312549591064, "rewards/rejected": -3.589878559112549, "step": 6250 }, { "epoch": 0.41, "learning_rate": 3.67712253637213e-06, "logits/chosen": -1.6938155889511108, "logits/rejected": -2.005298137664795, "logps/chosen": -497.44744873046875, "logps/rejected": -566.0697021484375, "loss": 0.6546, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0947582721710205, "rewards/margins": 0.5265017747879028, "rewards/rejected": -2.621260166168213, "step": 6260 }, { "epoch": 0.41, "learning_rate": 3.672082108800231e-06, "logits/chosen": -1.952540636062622, "logits/rejected": -1.4625334739685059, "logps/chosen": -527.0181884765625, "logps/rejected": -715.9548950195312, "loss": 0.4024, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1873886585235596, "rewards/margins": 1.4106473922729492, "rewards/rejected": -3.598036289215088, "step": 6270 }, { "epoch": 0.41, "learning_rate": 3.6670355670739012e-06, "logits/chosen": -1.6990658044815063, "logits/rejected": -1.9302780628204346, "logps/chosen": -547.1773681640625, "logps/rejected": -577.26708984375, "loss": 0.5239, "rewards/accuracies": 0.75, "rewards/chosen": -2.47194242477417, "rewards/margins": 0.6156947612762451, "rewards/rejected": -3.087637186050415, "step": 6280 }, { "epoch": 0.41, "learning_rate": 3.6619829375183745e-06, "logits/chosen": -2.0098588466644287, "logits/rejected": -1.8811092376708984, "logps/chosen": -555.290283203125, "logps/rejected": -684.6082763671875, "loss": 0.4423, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1225078105926514, "rewards/margins": 1.1310758590698242, "rewards/rejected": -3.2535834312438965, "step": 6290 }, { "epoch": 0.41, "learning_rate": 3.6569242464906427e-06, "logits/chosen": -1.8348238468170166, "logits/rejected": -1.5659939050674438, "logps/chosen": -561.0123291015625, "logps/rejected": -603.9790649414062, "loss": 0.6064, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.416660785675049, "rewards/margins": 0.7037839889526367, "rewards/rejected": -3.1204445362091064, "step": 6300 }, { "epoch": 0.41, "learning_rate": 3.6518595203793156e-06, "logits/chosen": -2.1522655487060547, "logits/rejected": -1.8314883708953857, "logps/chosen": -597.142578125, "logps/rejected": -642.1500854492188, "loss": 0.4829, "rewards/accuracies": 0.75, "rewards/chosen": -2.6272170543670654, "rewards/margins": 1.0413819551467896, "rewards/rejected": -3.6685993671417236, "step": 6310 }, { "epoch": 0.41, "learning_rate": 3.646788785604485e-06, "logits/chosen": -2.0718045234680176, "logits/rejected": -2.0843162536621094, "logps/chosen": -567.9349365234375, "logps/rejected": -625.3218994140625, "loss": 0.3936, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2843122482299805, "rewards/margins": 1.1147112846374512, "rewards/rejected": -3.3990235328674316, "step": 6320 }, { "epoch": 0.41, "learning_rate": 3.641712068617588e-06, "logits/chosen": -2.1838338375091553, "logits/rejected": -1.826072335243225, "logps/chosen": -531.1757202148438, "logps/rejected": -648.35546875, "loss": 0.5066, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4559993743896484, "rewards/margins": 1.0470889806747437, "rewards/rejected": -3.5030884742736816, "step": 6330 }, { "epoch": 0.41, "learning_rate": 3.6366293959012673e-06, "logits/chosen": -2.360943555831909, "logits/rejected": -1.382158875465393, "logps/chosen": -503.5301818847656, "logps/rejected": -572.0340576171875, "loss": 0.4061, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6652551889419556, "rewards/margins": 1.7123034000396729, "rewards/rejected": -3.3775582313537598, "step": 6340 }, { "epoch": 0.42, "learning_rate": 3.631540793969233e-06, "logits/chosen": -1.8118938207626343, "logits/rejected": -2.3862550258636475, "logps/chosen": -548.354736328125, "logps/rejected": -568.3483276367188, "loss": 0.5976, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.2418887615203857, "rewards/margins": 0.4210108816623688, "rewards/rejected": -2.6628997325897217, "step": 6350 }, { "epoch": 0.42, "learning_rate": 3.626446289366127e-06, "logits/chosen": -2.170741558074951, "logits/rejected": -1.74264395236969, "logps/chosen": -483.4600524902344, "logps/rejected": -540.6439208984375, "loss": 0.7371, "rewards/accuracies": 0.75, "rewards/chosen": -1.9748328924179077, "rewards/margins": 1.2782257795333862, "rewards/rejected": -3.253058671951294, "step": 6360 }, { "epoch": 0.42, "learning_rate": 3.6213459086673786e-06, "logits/chosen": -2.1250672340393066, "logits/rejected": -1.9678720235824585, "logps/chosen": -490.01263427734375, "logps/rejected": -500.30718994140625, "loss": 0.5438, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7553844451904297, "rewards/margins": 0.5862716436386108, "rewards/rejected": -2.341655969619751, "step": 6370 }, { "epoch": 0.42, "learning_rate": 3.6162396784790737e-06, "logits/chosen": -2.0813148021698, "logits/rejected": -1.9161512851715088, "logps/chosen": -536.7302856445312, "logps/rejected": -584.066650390625, "loss": 0.4712, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7031223773956299, "rewards/margins": 1.6962379217147827, "rewards/rejected": -3.399359941482544, "step": 6380 }, { "epoch": 0.42, "learning_rate": 3.6111276254378095e-06, "logits/chosen": -1.94598388671875, "logits/rejected": -1.7917381525039673, "logps/chosen": -453.65728759765625, "logps/rejected": -560.082275390625, "loss": 0.5562, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.231935977935791, "rewards/margins": 0.9571143388748169, "rewards/rejected": -3.1890501976013184, "step": 6390 }, { "epoch": 0.42, "learning_rate": 3.606009776210559e-06, "logits/chosen": -2.254138469696045, "logits/rejected": -2.0140843391418457, "logps/chosen": -528.5057373046875, "logps/rejected": -668.043212890625, "loss": 0.5675, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0886292457580566, "rewards/margins": 0.7729565501213074, "rewards/rejected": -2.861585855484009, "step": 6400 }, { "epoch": 0.42, "learning_rate": 3.600886157494531e-06, "logits/chosen": -1.9126689434051514, "logits/rejected": -1.7674872875213623, "logps/chosen": -491.20098876953125, "logps/rejected": -499.23876953125, "loss": 0.7688, "rewards/accuracies": 0.75, "rewards/chosen": -2.448800563812256, "rewards/margins": 0.6232201457023621, "rewards/rejected": -3.072021007537842, "step": 6410 }, { "epoch": 0.42, "learning_rate": 3.5957567960170304e-06, "logits/chosen": -1.9752228260040283, "logits/rejected": -1.8452541828155518, "logps/chosen": -527.0646362304688, "logps/rejected": -687.8794555664062, "loss": 0.6649, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -2.8853278160095215, "rewards/margins": 0.25008624792099, "rewards/rejected": -3.1354143619537354, "step": 6420 }, { "epoch": 0.42, "learning_rate": 3.590621718535319e-06, "logits/chosen": -2.364518404006958, "logits/rejected": -1.6936737298965454, "logps/chosen": -595.7276611328125, "logps/rejected": -576.9322509765625, "loss": 0.5857, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4467835426330566, "rewards/margins": 1.041294813156128, "rewards/rejected": -3.4880783557891846, "step": 6430 }, { "epoch": 0.42, "learning_rate": 3.5854809518364775e-06, "logits/chosen": -1.9299328327178955, "logits/rejected": -1.869680643081665, "logps/chosen": -533.153564453125, "logps/rejected": -628.454345703125, "loss": 0.4269, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0279541015625, "rewards/margins": 1.7927402257919312, "rewards/rejected": -3.8206939697265625, "step": 6440 }, { "epoch": 0.42, "learning_rate": 3.580334522737262e-06, "logits/chosen": -1.9989423751831055, "logits/rejected": -2.1659204959869385, "logps/chosen": -509.93365478515625, "logps/rejected": -597.7767333984375, "loss": 0.5775, "rewards/accuracies": 0.75, "rewards/chosen": -2.1766810417175293, "rewards/margins": 0.9996244311332703, "rewards/rejected": -3.1763057708740234, "step": 6450 }, { "epoch": 0.42, "learning_rate": 3.575182458083968e-06, "logits/chosen": -1.8486299514770508, "logits/rejected": -1.3089923858642578, "logps/chosen": -543.155517578125, "logps/rejected": -649.8765258789062, "loss": 0.5096, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2399661540985107, "rewards/margins": 1.5620733499526978, "rewards/rejected": -3.802039384841919, "step": 6460 }, { "epoch": 0.42, "learning_rate": 3.5700247847522883e-06, "logits/chosen": -1.926372766494751, "logits/rejected": -2.0225892066955566, "logps/chosen": -548.9107666015625, "logps/rejected": -559.2340087890625, "loss": 0.5155, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.181037664413452, "rewards/margins": 0.8243209719657898, "rewards/rejected": -3.0053586959838867, "step": 6470 }, { "epoch": 0.42, "learning_rate": 3.5648615296471743e-06, "logits/chosen": -2.098559856414795, "logits/rejected": -1.8494961261749268, "logps/chosen": -483.23919677734375, "logps/rejected": -678.0482177734375, "loss": 0.5595, "rewards/accuracies": 0.75, "rewards/chosen": -2.0355286598205566, "rewards/margins": 1.228212594985962, "rewards/rejected": -3.2637412548065186, "step": 6480 }, { "epoch": 0.42, "learning_rate": 3.559692719702693e-06, "logits/chosen": -1.9919559955596924, "logits/rejected": -2.085280656814575, "logps/chosen": -498.7488708496094, "logps/rejected": -582.7216796875, "loss": 0.6164, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.046030044555664, "rewards/margins": 0.35755324363708496, "rewards/rejected": -2.40358304977417, "step": 6490 }, { "epoch": 0.43, "learning_rate": 3.55451838188189e-06, "logits/chosen": -2.0272135734558105, "logits/rejected": -1.7676805257797241, "logps/chosen": -533.1506958007812, "logps/rejected": -594.387451171875, "loss": 0.5087, "rewards/accuracies": 0.75, "rewards/chosen": -2.218221426010132, "rewards/margins": 0.922869861125946, "rewards/rejected": -3.1410908699035645, "step": 6500 }, { "epoch": 0.43, "learning_rate": 3.549338543176645e-06, "logits/chosen": -1.6263704299926758, "logits/rejected": -1.7644325494766235, "logps/chosen": -408.6803283691406, "logps/rejected": -664.818359375, "loss": 0.567, "rewards/accuracies": 0.75, "rewards/chosen": -2.146083354949951, "rewards/margins": 1.5915114879608154, "rewards/rejected": -3.737595319747925, "step": 6510 }, { "epoch": 0.43, "learning_rate": 3.5441532306075342e-06, "logits/chosen": -2.2625370025634766, "logits/rejected": -2.0242671966552734, "logps/chosen": -556.9017333984375, "logps/rejected": -609.79931640625, "loss": 0.5176, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.1746726036071777, "rewards/margins": 1.2311522960662842, "rewards/rejected": -3.405825138092041, "step": 6520 }, { "epoch": 0.43, "learning_rate": 3.5389624712236894e-06, "logits/chosen": -2.2226243019104004, "logits/rejected": -2.257262706756592, "logps/chosen": -509.9283142089844, "logps/rejected": -627.7484130859375, "loss": 0.5179, "rewards/accuracies": 0.75, "rewards/chosen": -2.0390431880950928, "rewards/margins": 1.254080891609192, "rewards/rejected": -3.293123960494995, "step": 6530 }, { "epoch": 0.43, "learning_rate": 3.533766292102653e-06, "logits/chosen": -1.7512744665145874, "logits/rejected": -2.1317524909973145, "logps/chosen": -437.34356689453125, "logps/rejected": -606.0906982421875, "loss": 0.5225, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.106980562210083, "rewards/margins": 0.8636156320571899, "rewards/rejected": -2.9705960750579834, "step": 6540 }, { "epoch": 0.43, "learning_rate": 3.5285647203502404e-06, "logits/chosen": -2.1404430866241455, "logits/rejected": -1.6980934143066406, "logps/chosen": -517.9746704101562, "logps/rejected": -558.1184692382812, "loss": 0.4704, "rewards/accuracies": 0.75, "rewards/chosen": -2.068937063217163, "rewards/margins": 1.0138415098190308, "rewards/rejected": -3.0827784538269043, "step": 6550 }, { "epoch": 0.43, "learning_rate": 3.5233577831003983e-06, "logits/chosen": -1.9494342803955078, "logits/rejected": -1.800309419631958, "logps/chosen": -551.1097412109375, "logps/rejected": -552.0692138671875, "loss": 0.5604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.122185707092285, "rewards/margins": 0.7436209917068481, "rewards/rejected": -2.865807056427002, "step": 6560 }, { "epoch": 0.43, "learning_rate": 3.5181455075150628e-06, "logits/chosen": -2.3633298873901367, "logits/rejected": -1.604595422744751, "logps/chosen": -500.5062561035156, "logps/rejected": -592.0992431640625, "loss": 0.5148, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.738813042640686, "rewards/margins": 1.5005505084991455, "rewards/rejected": -3.239363431930542, "step": 6570 }, { "epoch": 0.43, "learning_rate": 3.512927920784016e-06, "logits/chosen": -1.9952714443206787, "logits/rejected": -1.5963308811187744, "logps/chosen": -401.2374572753906, "logps/rejected": -499.2310485839844, "loss": 0.4971, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0079140663146973, "rewards/margins": 1.016329050064087, "rewards/rejected": -3.0242433547973633, "step": 6580 }, { "epoch": 0.43, "learning_rate": 3.5077050501247457e-06, "logits/chosen": -2.3940505981445312, "logits/rejected": -2.106627941131592, "logps/chosen": -472.62774658203125, "logps/rejected": -550.3732299804688, "loss": 0.4836, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.9436047077178955, "rewards/margins": 0.7553819417953491, "rewards/rejected": -2.698986530303955, "step": 6590 }, { "epoch": 0.43, "learning_rate": 3.5024769227823042e-06, "logits/chosen": -1.9121567010879517, "logits/rejected": -1.7712024450302124, "logps/chosen": -485.36895751953125, "logps/rejected": -611.3514404296875, "loss": 0.5061, "rewards/accuracies": 0.75, "rewards/chosen": -2.3973145484924316, "rewards/margins": 1.1803823709487915, "rewards/rejected": -3.5776965618133545, "step": 6600 }, { "epoch": 0.43, "learning_rate": 3.4972435660291646e-06, "logits/chosen": -1.877173662185669, "logits/rejected": -1.741201639175415, "logps/chosen": -593.0254516601562, "logps/rejected": -672.1753540039062, "loss": 0.592, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.6820404529571533, "rewards/margins": 1.1979200839996338, "rewards/rejected": -2.879960536956787, "step": 6610 }, { "epoch": 0.43, "learning_rate": 3.492005007165079e-06, "logits/chosen": -1.6904380321502686, "logits/rejected": -2.0509963035583496, "logps/chosen": -576.3147583007812, "logps/rejected": -585.3676147460938, "loss": 0.666, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.614394426345825, "rewards/margins": 0.4233713746070862, "rewards/rejected": -3.0377657413482666, "step": 6620 }, { "epoch": 0.43, "learning_rate": 3.4867612735169377e-06, "logits/chosen": -1.859418511390686, "logits/rejected": -1.797196626663208, "logps/chosen": -466.6185607910156, "logps/rejected": -501.27105712890625, "loss": 0.5138, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.128791570663452, "rewards/margins": 0.7405956983566284, "rewards/rejected": -2.869387149810791, "step": 6630 }, { "epoch": 0.43, "learning_rate": 3.4815123924386226e-06, "logits/chosen": -2.1479945182800293, "logits/rejected": -1.7917789220809937, "logps/chosen": -501.1796875, "logps/rejected": -537.141845703125, "loss": 0.6099, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.8953443765640259, "rewards/margins": 1.2032743692398071, "rewards/rejected": -3.098618745803833, "step": 6640 }, { "epoch": 0.44, "learning_rate": 3.4762583913108696e-06, "logits/chosen": -1.9697473049163818, "logits/rejected": -1.8926702737808228, "logps/chosen": -629.2015380859375, "logps/rejected": -737.5687255859375, "loss": 0.512, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.999524712562561, "rewards/margins": 0.8353649377822876, "rewards/rejected": -2.8348896503448486, "step": 6650 }, { "epoch": 0.44, "learning_rate": 3.4709992975411217e-06, "logits/chosen": -2.1073319911956787, "logits/rejected": -1.863032341003418, "logps/chosen": -356.60394287109375, "logps/rejected": -415.05078125, "loss": 0.6121, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9784562587738037, "rewards/margins": 0.3868860602378845, "rewards/rejected": -2.365342617034912, "step": 6660 }, { "epoch": 0.44, "learning_rate": 3.4657351385633886e-06, "logits/chosen": -1.802716612815857, "logits/rejected": -2.103769063949585, "logps/chosen": -497.4820251464844, "logps/rejected": -602.2470703125, "loss": 0.4817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9606220722198486, "rewards/margins": 0.9812425374984741, "rewards/rejected": -2.941864490509033, "step": 6670 }, { "epoch": 0.44, "learning_rate": 3.4604659418381024e-06, "logits/chosen": -1.8399873971939087, "logits/rejected": -1.899836540222168, "logps/chosen": -421.61163330078125, "logps/rejected": -521.968994140625, "loss": 0.6611, "rewards/accuracies": 0.5, "rewards/chosen": -2.403350830078125, "rewards/margins": 0.4829896092414856, "rewards/rejected": -2.886340618133545, "step": 6680 }, { "epoch": 0.44, "learning_rate": 3.4551917348519744e-06, "logits/chosen": -2.3457393646240234, "logits/rejected": -2.0672945976257324, "logps/chosen": -618.3902587890625, "logps/rejected": -615.4583740234375, "loss": 0.6227, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1271519660949707, "rewards/margins": 1.0083070993423462, "rewards/rejected": -3.1354591846466064, "step": 6690 }, { "epoch": 0.44, "learning_rate": 3.4499125451178505e-06, "logits/chosen": -2.169529438018799, "logits/rejected": -2.244375467300415, "logps/chosen": -566.7544555664062, "logps/rejected": -646.224365234375, "loss": 0.604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3752753734588623, "rewards/margins": 0.8324453234672546, "rewards/rejected": -3.207720994949341, "step": 6700 }, { "epoch": 0.44, "learning_rate": 3.4446284001745723e-06, "logits/chosen": -2.130103349685669, "logits/rejected": -1.7955682277679443, "logps/chosen": -468.88348388671875, "logps/rejected": -552.3078002929688, "loss": 0.4814, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.294940233230591, "rewards/margins": 1.5448663234710693, "rewards/rejected": -3.8398067951202393, "step": 6710 }, { "epoch": 0.44, "learning_rate": 3.439339327586827e-06, "logits/chosen": -2.0202527046203613, "logits/rejected": -1.760382056236267, "logps/chosen": -525.0974731445312, "logps/rejected": -689.3110961914062, "loss": 0.5979, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.733820915222168, "rewards/margins": 0.8580193519592285, "rewards/rejected": -3.591839551925659, "step": 6720 }, { "epoch": 0.44, "learning_rate": 3.434045354945008e-06, "logits/chosen": -2.259902238845825, "logits/rejected": -2.089804172515869, "logps/chosen": -519.9620361328125, "logps/rejected": -715.9612426757812, "loss": 0.48, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.358236312866211, "rewards/margins": 1.4368010759353638, "rewards/rejected": -3.7950377464294434, "step": 6730 }, { "epoch": 0.44, "learning_rate": 3.4287465098650713e-06, "logits/chosen": -1.8449312448501587, "logits/rejected": -1.768151044845581, "logps/chosen": -508.92120361328125, "logps/rejected": -558.8959350585938, "loss": 0.495, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8265440464019775, "rewards/margins": 1.2664082050323486, "rewards/rejected": -3.092952251434326, "step": 6740 }, { "epoch": 0.44, "learning_rate": 3.423442819988387e-06, "logits/chosen": -2.011176347732544, "logits/rejected": -1.8518304824829102, "logps/chosen": -456.6439514160156, "logps/rejected": -559.4019165039062, "loss": 0.6557, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4466426372528076, "rewards/margins": 0.8426218032836914, "rewards/rejected": -3.28926420211792, "step": 6750 }, { "epoch": 0.44, "learning_rate": 3.4181343129816e-06, "logits/chosen": -2.3097541332244873, "logits/rejected": -2.193639039993286, "logps/chosen": -653.538330078125, "logps/rejected": -680.1073608398438, "loss": 0.5421, "rewards/accuracies": 0.75, "rewards/chosen": -2.3540773391723633, "rewards/margins": 0.6987882852554321, "rewards/rejected": -3.052865505218506, "step": 6760 }, { "epoch": 0.44, "learning_rate": 3.4128210165364837e-06, "logits/chosen": -2.04072904586792, "logits/rejected": -1.845476746559143, "logps/chosen": -520.7491455078125, "logps/rejected": -505.431640625, "loss": 0.5424, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9616371393203735, "rewards/margins": 0.9794682264328003, "rewards/rejected": -2.941105365753174, "step": 6770 }, { "epoch": 0.44, "learning_rate": 3.407502958369795e-06, "logits/chosen": -1.7737308740615845, "logits/rejected": -1.9110854864120483, "logps/chosen": -474.15850830078125, "logps/rejected": -509.0408630371094, "loss": 0.557, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0218558311462402, "rewards/margins": 0.6208639144897461, "rewards/rejected": -2.6427202224731445, "step": 6780 }, { "epoch": 0.44, "learning_rate": 3.4021801662231297e-06, "logits/chosen": -2.1233630180358887, "logits/rejected": -2.216717481613159, "logps/chosen": -588.3614501953125, "logps/rejected": -673.5755004882812, "loss": 0.4333, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7243669033050537, "rewards/margins": 1.1570664644241333, "rewards/rejected": -2.8814332485198975, "step": 6790 }, { "epoch": 0.44, "learning_rate": 3.3968526678627793e-06, "logits/chosen": -2.006848096847534, "logits/rejected": -1.90310800075531, "logps/chosen": -535.9368286132812, "logps/rejected": -562.5875244140625, "loss": 0.7555, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.227323532104492, "rewards/margins": 0.5417407751083374, "rewards/rejected": -2.769064426422119, "step": 6800 }, { "epoch": 0.45, "learning_rate": 3.391520491079586e-06, "logits/chosen": -2.16282320022583, "logits/rejected": -2.040290594100952, "logps/chosen": -467.48394775390625, "logps/rejected": -588.5347900390625, "loss": 0.4816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9816973209381104, "rewards/margins": 1.336430311203003, "rewards/rejected": -3.318127393722534, "step": 6810 }, { "epoch": 0.45, "learning_rate": 3.3861836636887936e-06, "logits/chosen": -1.7972116470336914, "logits/rejected": -1.7362552881240845, "logps/chosen": -619.1534423828125, "logps/rejected": -605.8143310546875, "loss": 0.5679, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6134886741638184, "rewards/margins": 0.9449461102485657, "rewards/rejected": -3.5584347248077393, "step": 6820 }, { "epoch": 0.45, "learning_rate": 3.3808422135299106e-06, "logits/chosen": -2.1757609844207764, "logits/rejected": -1.9568872451782227, "logps/chosen": -496.1884765625, "logps/rejected": -594.45654296875, "loss": 0.6461, "rewards/accuracies": 0.75, "rewards/chosen": -2.3930745124816895, "rewards/margins": 0.6214153170585632, "rewards/rejected": -3.0144896507263184, "step": 6830 }, { "epoch": 0.45, "learning_rate": 3.375496168466556e-06, "logits/chosen": -1.769409418106079, "logits/rejected": -1.8746048212051392, "logps/chosen": -503.7557678222656, "logps/rejected": -551.3370361328125, "loss": 0.6333, "rewards/accuracies": 0.75, "rewards/chosen": -2.382068157196045, "rewards/margins": 0.6661649346351624, "rewards/rejected": -3.0482332706451416, "step": 6840 }, { "epoch": 0.45, "learning_rate": 3.3701455563863205e-06, "logits/chosen": -2.1566174030303955, "logits/rejected": -2.090261936187744, "logps/chosen": -505.6607971191406, "logps/rejected": -505.560546875, "loss": 0.6342, "rewards/accuracies": 0.75, "rewards/chosen": -2.3432400226593018, "rewards/margins": 0.5142241716384888, "rewards/rejected": -2.857464075088501, "step": 6850 }, { "epoch": 0.45, "learning_rate": 3.3647904052006174e-06, "logits/chosen": -2.1792402267456055, "logits/rejected": -1.721308708190918, "logps/chosen": -511.5228576660156, "logps/rejected": -615.1655883789062, "loss": 0.3936, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4667131900787354, "rewards/margins": 1.5716593265533447, "rewards/rejected": -3.03837251663208, "step": 6860 }, { "epoch": 0.45, "learning_rate": 3.3594307428445383e-06, "logits/chosen": -2.051579236984253, "logits/rejected": -2.012957811355591, "logps/chosen": -560.08984375, "logps/rejected": -628.5303344726562, "loss": 0.5154, "rewards/accuracies": 0.75, "rewards/chosen": -2.2268595695495605, "rewards/margins": 0.9397394061088562, "rewards/rejected": -3.1665987968444824, "step": 6870 }, { "epoch": 0.45, "learning_rate": 3.354066597276707e-06, "logits/chosen": -1.7750266790390015, "logits/rejected": -1.9297701120376587, "logps/chosen": -437.557861328125, "logps/rejected": -588.8997192382812, "loss": 0.588, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2441320419311523, "rewards/margins": 1.2166695594787598, "rewards/rejected": -3.460801601409912, "step": 6880 }, { "epoch": 0.45, "learning_rate": 3.348697996479136e-06, "logits/chosen": -2.2891221046447754, "logits/rejected": -2.0664844512939453, "logps/chosen": -551.2601318359375, "logps/rejected": -479.00439453125, "loss": 0.465, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.6627784967422485, "rewards/margins": 0.9804137945175171, "rewards/rejected": -2.643192768096924, "step": 6890 }, { "epoch": 0.45, "learning_rate": 3.3433249684570757e-06, "logits/chosen": -1.4506876468658447, "logits/rejected": -1.8762842416763306, "logps/chosen": -418.89984130859375, "logps/rejected": -550.0848388671875, "loss": 0.5604, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.209401845932007, "rewards/margins": 0.9306854009628296, "rewards/rejected": -3.140087366104126, "step": 6900 }, { "epoch": 0.45, "learning_rate": 3.3379475412388724e-06, "logits/chosen": -2.157930850982666, "logits/rejected": -1.8227431774139404, "logps/chosen": -508.7159118652344, "logps/rejected": -634.552978515625, "loss": 0.529, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.124192953109741, "rewards/margins": 1.8052854537963867, "rewards/rejected": -3.929478168487549, "step": 6910 }, { "epoch": 0.45, "learning_rate": 3.3325657428758207e-06, "logits/chosen": -1.7919973134994507, "logits/rejected": -1.8608999252319336, "logps/chosen": -436.8871154785156, "logps/rejected": -589.9657592773438, "loss": 0.4744, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.220111131668091, "rewards/margins": 1.0796172618865967, "rewards/rejected": -3.2997279167175293, "step": 6920 }, { "epoch": 0.45, "learning_rate": 3.3271796014420175e-06, "logits/chosen": -2.2344298362731934, "logits/rejected": -1.459727168083191, "logps/chosen": -545.1464233398438, "logps/rejected": -551.0026245117188, "loss": 0.6576, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.1628594398498535, "rewards/margins": 0.9946087598800659, "rewards/rejected": -3.15746808052063, "step": 6930 }, { "epoch": 0.45, "learning_rate": 3.3217891450342142e-06, "logits/chosen": -2.228728771209717, "logits/rejected": -1.9757254123687744, "logps/chosen": -463.68450927734375, "logps/rejected": -558.191650390625, "loss": 0.5128, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.7646548748016357, "rewards/margins": 1.299391508102417, "rewards/rejected": -3.0640463829040527, "step": 6940 }, { "epoch": 0.45, "learning_rate": 3.3163944017716733e-06, "logits/chosen": -2.4919800758361816, "logits/rejected": -2.2972683906555176, "logps/chosen": -539.271240234375, "logps/rejected": -655.6963500976562, "loss": 0.4676, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7573680877685547, "rewards/margins": 1.1947455406188965, "rewards/rejected": -2.952113389968872, "step": 6950 }, { "epoch": 0.46, "learning_rate": 3.310995399796017e-06, "logits/chosen": -2.123279094696045, "logits/rejected": -1.9764589071273804, "logps/chosen": -563.791259765625, "logps/rejected": -586.9517822265625, "loss": 0.6602, "rewards/accuracies": 0.75, "rewards/chosen": -2.5124528408050537, "rewards/margins": 0.9271472692489624, "rewards/rejected": -3.4396004676818848, "step": 6960 }, { "epoch": 0.46, "learning_rate": 3.305592167271085e-06, "logits/chosen": -2.0485548973083496, "logits/rejected": -2.015782356262207, "logps/chosen": -599.0858154296875, "logps/rejected": -659.1200561523438, "loss": 0.4515, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.135643243789673, "rewards/margins": 1.304933786392212, "rewards/rejected": -3.4405770301818848, "step": 6970 }, { "epoch": 0.46, "learning_rate": 3.3001847323827846e-06, "logits/chosen": -2.3702428340911865, "logits/rejected": -2.0240466594696045, "logps/chosen": -573.703857421875, "logps/rejected": -617.8404541015625, "loss": 0.505, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.744354724884033, "rewards/margins": 1.0058718919754028, "rewards/rejected": -3.7502264976501465, "step": 6980 }, { "epoch": 0.46, "learning_rate": 3.2947731233389447e-06, "logits/chosen": -2.1859326362609863, "logits/rejected": -1.7747504711151123, "logps/chosen": -592.033203125, "logps/rejected": -566.8076171875, "loss": 0.51, "rewards/accuracies": 0.75, "rewards/chosen": -2.7474365234375, "rewards/margins": 0.7229793667793274, "rewards/rejected": -3.470416307449341, "step": 6990 }, { "epoch": 0.46, "learning_rate": 3.2893573683691706e-06, "logits/chosen": -1.8576014041900635, "logits/rejected": -1.935616135597229, "logps/chosen": -571.7442626953125, "logps/rejected": -666.6461181640625, "loss": 0.4546, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9634231328964233, "rewards/margins": 1.1846826076507568, "rewards/rejected": -3.1481058597564697, "step": 7000 }, { "epoch": 0.46, "learning_rate": 3.2839374957246915e-06, "logits/chosen": -1.8236987590789795, "logits/rejected": -2.0967044830322266, "logps/chosen": -503.366455078125, "logps/rejected": -510.83563232421875, "loss": 0.5869, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.859544277191162, "rewards/margins": 0.6328840255737305, "rewards/rejected": -3.4924283027648926, "step": 7010 }, { "epoch": 0.46, "learning_rate": 3.2785135336782187e-06, "logits/chosen": -1.9590002298355103, "logits/rejected": -1.7429759502410889, "logps/chosen": -504.9451599121094, "logps/rejected": -582.9620361328125, "loss": 0.6693, "rewards/accuracies": 0.75, "rewards/chosen": -1.8443717956542969, "rewards/margins": 1.3743021488189697, "rewards/rejected": -3.2186741828918457, "step": 7020 }, { "epoch": 0.46, "learning_rate": 3.2730855105237952e-06, "logits/chosen": -1.7093353271484375, "logits/rejected": -1.8194334506988525, "logps/chosen": -498.09857177734375, "logps/rejected": -631.9508666992188, "loss": 0.4945, "rewards/accuracies": 0.75, "rewards/chosen": -2.2490179538726807, "rewards/margins": 0.7945035696029663, "rewards/rejected": -3.0435216426849365, "step": 7030 }, { "epoch": 0.46, "learning_rate": 3.2676534545766486e-06, "logits/chosen": -1.8025137186050415, "logits/rejected": -1.8641912937164307, "logps/chosen": -565.27490234375, "logps/rejected": -656.0820922851562, "loss": 0.3891, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3689165115356445, "rewards/margins": 1.647971510887146, "rewards/rejected": -4.016888618469238, "step": 7040 }, { "epoch": 0.46, "learning_rate": 3.262217394173043e-06, "logits/chosen": -2.179482936859131, "logits/rejected": -1.9934285879135132, "logps/chosen": -565.6411743164062, "logps/rejected": -619.4987182617188, "loss": 0.4887, "rewards/accuracies": 0.75, "rewards/chosen": -2.432353973388672, "rewards/margins": 0.817031741142273, "rewards/rejected": -3.2493858337402344, "step": 7050 }, { "epoch": 0.46, "learning_rate": 3.2567773576701333e-06, "logits/chosen": -1.961511254310608, "logits/rejected": -2.085686206817627, "logps/chosen": -555.8421630859375, "logps/rejected": -600.7129516601562, "loss": 0.6391, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5808510780334473, "rewards/margins": 0.753670871257782, "rewards/rejected": -3.334522247314453, "step": 7060 }, { "epoch": 0.46, "learning_rate": 3.2513333734458154e-06, "logits/chosen": -1.9186073541641235, "logits/rejected": -1.8230812549591064, "logps/chosen": -550.2675170898438, "logps/rejected": -745.5703125, "loss": 0.5088, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3937816619873047, "rewards/margins": 1.4927335977554321, "rewards/rejected": -3.8865153789520264, "step": 7070 }, { "epoch": 0.46, "learning_rate": 3.245885469898576e-06, "logits/chosen": -2.157291889190674, "logits/rejected": -1.8229951858520508, "logps/chosen": -584.3994140625, "logps/rejected": -601.2501831054688, "loss": 0.6809, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.986717700958252, "rewards/margins": 0.6209832429885864, "rewards/rejected": -3.607701063156128, "step": 7080 }, { "epoch": 0.46, "learning_rate": 3.2404336754473497e-06, "logits/chosen": -1.654690146446228, "logits/rejected": -1.6594860553741455, "logps/chosen": -572.0872802734375, "logps/rejected": -654.669189453125, "loss": 0.5341, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.753058433532715, "rewards/margins": 0.994116485118866, "rewards/rejected": -3.7471747398376465, "step": 7090 }, { "epoch": 0.46, "learning_rate": 3.234978018531367e-06, "logits/chosen": -1.7258952856063843, "logits/rejected": -1.9482316970825195, "logps/chosen": -637.0924682617188, "logps/rejected": -730.4885864257812, "loss": 0.5761, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0775818824768066, "rewards/margins": 0.5728412866592407, "rewards/rejected": -3.6504225730895996, "step": 7100 }, { "epoch": 0.47, "learning_rate": 3.229518527610006e-06, "logits/chosen": -1.4942814111709595, "logits/rejected": -1.8945693969726562, "logps/chosen": -445.71954345703125, "logps/rejected": -536.7567138671875, "loss": 0.6735, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.618427038192749, "rewards/margins": 0.8297905921936035, "rewards/rejected": -3.4482178688049316, "step": 7110 }, { "epoch": 0.47, "learning_rate": 3.2240552311626465e-06, "logits/chosen": -1.9111835956573486, "logits/rejected": -1.954232931137085, "logps/chosen": -617.4752197265625, "logps/rejected": -665.8094482421875, "loss": 0.6307, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.846435070037842, "rewards/margins": 0.4398753046989441, "rewards/rejected": -3.2863106727600098, "step": 7120 }, { "epoch": 0.47, "learning_rate": 3.2185881576885193e-06, "logits/chosen": -2.338397979736328, "logits/rejected": -1.7893339395523071, "logps/chosen": -525.5226440429688, "logps/rejected": -591.5637817382812, "loss": 0.4398, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.201669454574585, "rewards/margins": 1.5123226642608643, "rewards/rejected": -3.71399188041687, "step": 7130 }, { "epoch": 0.47, "learning_rate": 3.213117335706557e-06, "logits/chosen": -1.9989426136016846, "logits/rejected": -1.545607089996338, "logps/chosen": -485.354736328125, "logps/rejected": -522.4522094726562, "loss": 0.4836, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.328815221786499, "rewards/margins": 0.7823472023010254, "rewards/rejected": -3.1111626625061035, "step": 7140 }, { "epoch": 0.47, "learning_rate": 3.2076427937552473e-06, "logits/chosen": -1.7991571426391602, "logits/rejected": -2.210568904876709, "logps/chosen": -554.9445190429688, "logps/rejected": -665.0664672851562, "loss": 0.6054, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2572712898254395, "rewards/margins": 1.0221856832504272, "rewards/rejected": -3.2794570922851562, "step": 7150 }, { "epoch": 0.47, "learning_rate": 3.2021645603924827e-06, "logits/chosen": -2.112433910369873, "logits/rejected": -2.141270160675049, "logps/chosen": -458.9959411621094, "logps/rejected": -521.6207275390625, "loss": 0.5506, "rewards/accuracies": 0.5, "rewards/chosen": -2.4281132221221924, "rewards/margins": 0.4478052258491516, "rewards/rejected": -2.875918388366699, "step": 7160 }, { "epoch": 0.47, "learning_rate": 3.196682664195412e-06, "logits/chosen": -2.0608747005462646, "logits/rejected": -2.0447030067443848, "logps/chosen": -528.4253540039062, "logps/rejected": -657.8396606445312, "loss": 0.4791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.070521116256714, "rewards/margins": 1.1912587881088257, "rewards/rejected": -3.261780261993408, "step": 7170 }, { "epoch": 0.47, "learning_rate": 3.191197133760291e-06, "logits/chosen": -1.872523546218872, "logits/rejected": -1.9694303274154663, "logps/chosen": -565.5282592773438, "logps/rejected": -575.5656127929688, "loss": 0.5671, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.6287894248962402, "rewards/margins": 0.6487969160079956, "rewards/rejected": -3.2775864601135254, "step": 7180 }, { "epoch": 0.47, "learning_rate": 3.185707997702334e-06, "logits/chosen": -2.3510260581970215, "logits/rejected": -1.7899143695831299, "logps/chosen": -515.1236572265625, "logps/rejected": -526.2489624023438, "loss": 0.6378, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5627970695495605, "rewards/margins": 0.5820015072822571, "rewards/rejected": -3.144798755645752, "step": 7190 }, { "epoch": 0.47, "learning_rate": 3.1802152846555624e-06, "logits/chosen": -2.259308338165283, "logits/rejected": -1.6995160579681396, "logps/chosen": -555.3382568359375, "logps/rejected": -688.6828002929688, "loss": 0.4379, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4361205101013184, "rewards/margins": 1.8397639989852905, "rewards/rejected": -4.27588415145874, "step": 7200 }, { "epoch": 0.47, "learning_rate": 3.174719023272659e-06, "logits/chosen": -2.1534180641174316, "logits/rejected": -2.1583364009857178, "logps/chosen": -621.2399291992188, "logps/rejected": -691.5772705078125, "loss": 0.5478, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7099742889404297, "rewards/margins": 0.8596396446228027, "rewards/rejected": -3.5696136951446533, "step": 7210 }, { "epoch": 0.47, "learning_rate": 3.169219242224816e-06, "logits/chosen": -2.2311336994171143, "logits/rejected": -1.8312718868255615, "logps/chosen": -637.9315185546875, "logps/rejected": -664.3123779296875, "loss": 0.5815, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2736587524414062, "rewards/margins": 1.4779915809631348, "rewards/rejected": -3.75165057182312, "step": 7220 }, { "epoch": 0.47, "learning_rate": 3.1637159702015837e-06, "logits/chosen": -2.2375268936157227, "logits/rejected": -2.0711469650268555, "logps/chosen": -615.912109375, "logps/rejected": -613.326416015625, "loss": 0.5244, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.7703040838241577, "rewards/margins": 0.9314772486686707, "rewards/rejected": -2.7017810344696045, "step": 7230 }, { "epoch": 0.47, "learning_rate": 3.1582092359107263e-06, "logits/chosen": -2.2372851371765137, "logits/rejected": -1.9184343814849854, "logps/chosen": -551.5802001953125, "logps/rejected": -714.7056884765625, "loss": 0.3888, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.209808349609375, "rewards/margins": 1.9521551132202148, "rewards/rejected": -4.16196346282959, "step": 7240 }, { "epoch": 0.47, "learning_rate": 3.152699068078067e-06, "logits/chosen": -1.6327937841415405, "logits/rejected": -1.676995873451233, "logps/chosen": -417.7352600097656, "logps/rejected": -610.2640380859375, "loss": 0.4914, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6484999656677246, "rewards/margins": 0.7113903760910034, "rewards/rejected": -3.3598904609680176, "step": 7250 }, { "epoch": 0.48, "learning_rate": 3.1471854954473415e-06, "logits/chosen": -2.270559787750244, "logits/rejected": -2.0879476070404053, "logps/chosen": -601.9791870117188, "logps/rejected": -638.6385498046875, "loss": 0.5273, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.566265821456909, "rewards/margins": 0.6942251920700073, "rewards/rejected": -3.260490894317627, "step": 7260 }, { "epoch": 0.48, "learning_rate": 3.1416685467800436e-06, "logits/chosen": -2.1702325344085693, "logits/rejected": -2.05281138420105, "logps/chosen": -677.0984497070312, "logps/rejected": -686.2244262695312, "loss": 0.4073, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.608895778656006, "rewards/margins": 1.11374831199646, "rewards/rejected": -3.7226436138153076, "step": 7270 }, { "epoch": 0.48, "learning_rate": 3.1361482508552803e-06, "logits/chosen": -2.2912166118621826, "logits/rejected": -2.0227737426757812, "logps/chosen": -630.8660888671875, "logps/rejected": -597.8320922851562, "loss": 0.6156, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.68625545501709, "rewards/margins": 0.6883805394172668, "rewards/rejected": -3.374636173248291, "step": 7280 }, { "epoch": 0.48, "learning_rate": 3.1306246364696198e-06, "logits/chosen": -2.2401461601257324, "logits/rejected": -2.0388596057891846, "logps/chosen": -536.4967041015625, "logps/rejected": -630.9244995117188, "loss": 0.5952, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8389899730682373, "rewards/margins": 0.7212721109390259, "rewards/rejected": -3.5602622032165527, "step": 7290 }, { "epoch": 0.48, "learning_rate": 3.1250977324369413e-06, "logits/chosen": -1.5504920482635498, "logits/rejected": -1.8194414377212524, "logps/chosen": -581.7468872070312, "logps/rejected": -643.1931762695312, "loss": 0.5003, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.213700532913208, "rewards/margins": 0.786910891532898, "rewards/rejected": -4.000611305236816, "step": 7300 }, { "epoch": 0.48, "learning_rate": 3.1195675675882825e-06, "logits/chosen": -1.7138620615005493, "logits/rejected": -1.9886878728866577, "logps/chosen": -532.3826904296875, "logps/rejected": -676.5599975585938, "loss": 0.5468, "rewards/accuracies": 0.75, "rewards/chosen": -3.1613645553588867, "rewards/margins": 1.1969727277755737, "rewards/rejected": -4.358336448669434, "step": 7310 }, { "epoch": 0.48, "learning_rate": 3.1140341707716926e-06, "logits/chosen": -2.1033859252929688, "logits/rejected": -1.4388631582260132, "logps/chosen": -497.56658935546875, "logps/rejected": -699.9725341796875, "loss": 0.6106, "rewards/accuracies": 0.75, "rewards/chosen": -2.9511947631835938, "rewards/margins": 1.3797348737716675, "rewards/rejected": -4.330929279327393, "step": 7320 }, { "epoch": 0.48, "learning_rate": 3.1084975708520803e-06, "logits/chosen": -2.092902421951294, "logits/rejected": -1.963867425918579, "logps/chosen": -516.0930786132812, "logps/rejected": -649.5531616210938, "loss": 0.5842, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7223544120788574, "rewards/margins": 1.2275700569152832, "rewards/rejected": -3.9499244689941406, "step": 7330 }, { "epoch": 0.48, "learning_rate": 3.1029577967110625e-06, "logits/chosen": -2.3549771308898926, "logits/rejected": -1.8174254894256592, "logps/chosen": -562.1286010742188, "logps/rejected": -582.4174194335938, "loss": 0.6719, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.910454511642456, "rewards/margins": 0.6111830472946167, "rewards/rejected": -3.521637439727783, "step": 7340 }, { "epoch": 0.48, "learning_rate": 3.097414877246814e-06, "logits/chosen": -2.3740878105163574, "logits/rejected": -1.9598724842071533, "logps/chosen": -605.2304077148438, "logps/rejected": -613.1309814453125, "loss": 0.5582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4840683937072754, "rewards/margins": 0.8388042449951172, "rewards/rejected": -3.3228728771209717, "step": 7350 }, { "epoch": 0.48, "learning_rate": 3.0918688413739197e-06, "logits/chosen": -2.122352123260498, "logits/rejected": -1.6851507425308228, "logps/chosen": -598.9288330078125, "logps/rejected": -622.5264892578125, "loss": 0.4821, "rewards/accuracies": 0.75, "rewards/chosen": -2.8729500770568848, "rewards/margins": 0.9844695925712585, "rewards/rejected": -3.85741925239563, "step": 7360 }, { "epoch": 0.48, "learning_rate": 3.0863197180232178e-06, "logits/chosen": -2.2433207035064697, "logits/rejected": -2.0860939025878906, "logps/chosen": -630.0677490234375, "logps/rejected": -593.6553955078125, "loss": 0.5065, "rewards/accuracies": 0.75, "rewards/chosen": -2.967116117477417, "rewards/margins": 0.7031041979789734, "rewards/rejected": -3.670220136642456, "step": 7370 }, { "epoch": 0.48, "learning_rate": 3.0807675361416554e-06, "logits/chosen": -2.351539134979248, "logits/rejected": -1.8410431146621704, "logps/chosen": -582.3271484375, "logps/rejected": -596.9386596679688, "loss": 0.5604, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.467467784881592, "rewards/margins": 0.7326360940933228, "rewards/rejected": -3.200104236602783, "step": 7380 }, { "epoch": 0.48, "learning_rate": 3.0752123246921327e-06, "logits/chosen": -1.9665231704711914, "logits/rejected": -2.0008537769317627, "logps/chosen": -475.9529724121094, "logps/rejected": -584.5914916992188, "loss": 0.5119, "rewards/accuracies": 0.75, "rewards/chosen": -2.1385912895202637, "rewards/margins": 1.027195692062378, "rewards/rejected": -3.1657867431640625, "step": 7390 }, { "epoch": 0.48, "learning_rate": 3.069654112653353e-06, "logits/chosen": -1.7564090490341187, "logits/rejected": -1.7700964212417603, "logps/chosen": -577.4471435546875, "logps/rejected": -689.7198486328125, "loss": 0.459, "rewards/accuracies": 0.75, "rewards/chosen": -2.926849126815796, "rewards/margins": 1.3047624826431274, "rewards/rejected": -4.231611728668213, "step": 7400 }, { "epoch": 0.48, "learning_rate": 3.064092929019673e-06, "logits/chosen": -1.8226516246795654, "logits/rejected": -1.5395643711090088, "logps/chosen": -557.78955078125, "logps/rejected": -618.7718505859375, "loss": 0.5415, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.797609329223633, "rewards/margins": 0.6625191569328308, "rewards/rejected": -3.4601283073425293, "step": 7410 }, { "epoch": 0.49, "learning_rate": 3.058528802800952e-06, "logits/chosen": -2.0301146507263184, "logits/rejected": -2.247178316116333, "logps/chosen": -646.4931640625, "logps/rejected": -829.6624755859375, "loss": 0.5092, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2833645343780518, "rewards/margins": 1.4274927377700806, "rewards/rejected": -3.710857391357422, "step": 7420 }, { "epoch": 0.49, "learning_rate": 3.052961763022397e-06, "logits/chosen": -1.8858773708343506, "logits/rejected": -2.2624406814575195, "logps/chosen": -570.9578857421875, "logps/rejected": -716.4319458007812, "loss": 0.4434, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4454357624053955, "rewards/margins": 1.0548083782196045, "rewards/rejected": -3.500244140625, "step": 7430 }, { "epoch": 0.49, "learning_rate": 3.047391838724415e-06, "logits/chosen": -2.4916012287139893, "logits/rejected": -1.750939965248108, "logps/chosen": -706.2770385742188, "logps/rejected": -721.5274047851562, "loss": 0.4539, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9347124099731445, "rewards/margins": 1.29623281955719, "rewards/rejected": -4.230945587158203, "step": 7440 }, { "epoch": 0.49, "learning_rate": 3.0418190589624587e-06, "logits/chosen": -2.0243852138519287, "logits/rejected": -2.1274046897888184, "logps/chosen": -535.9006958007812, "logps/rejected": -807.1841430664062, "loss": 0.5301, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7507853507995605, "rewards/margins": 0.9818651080131531, "rewards/rejected": -3.7326502799987793, "step": 7450 }, { "epoch": 0.49, "learning_rate": 3.0362434528068784e-06, "logits/chosen": -1.406724214553833, "logits/rejected": -2.0237207412719727, "logps/chosen": -536.3141479492188, "logps/rejected": -646.9801025390625, "loss": 0.5888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.90315842628479, "rewards/margins": 0.8274177312850952, "rewards/rejected": -3.7305760383605957, "step": 7460 }, { "epoch": 0.49, "learning_rate": 3.0306650493427657e-06, "logits/chosen": -2.1693460941314697, "logits/rejected": -2.2521302700042725, "logps/chosen": -550.40234375, "logps/rejected": -742.0943603515625, "loss": 0.3351, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.001873731613159, "rewards/margins": 1.350730299949646, "rewards/rejected": -4.352604389190674, "step": 7470 }, { "epoch": 0.49, "learning_rate": 3.0250838776698077e-06, "logits/chosen": -2.1576735973358154, "logits/rejected": -2.0279083251953125, "logps/chosen": -535.0850830078125, "logps/rejected": -600.74072265625, "loss": 0.4392, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.38101863861084, "rewards/margins": 0.6856867074966431, "rewards/rejected": -3.0667052268981934, "step": 7480 }, { "epoch": 0.49, "learning_rate": 3.0194999669021275e-06, "logits/chosen": -2.003931760787964, "logits/rejected": -1.7968238592147827, "logps/chosen": -559.0775756835938, "logps/rejected": -549.0262451171875, "loss": 0.5302, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.790287733078003, "rewards/margins": 0.9173834919929504, "rewards/rejected": -3.7076709270477295, "step": 7490 }, { "epoch": 0.49, "learning_rate": 3.0139133461681403e-06, "logits/chosen": -2.3907244205474854, "logits/rejected": -2.0973424911499023, "logps/chosen": -606.6914672851562, "logps/rejected": -637.0055541992188, "loss": 0.4609, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.213467597961426, "rewards/margins": 1.0296380519866943, "rewards/rejected": -3.243105411529541, "step": 7500 }, { "epoch": 0.49, "learning_rate": 3.0083240446103965e-06, "logits/chosen": -1.9050096273422241, "logits/rejected": -1.8977136611938477, "logps/chosen": -590.552734375, "logps/rejected": -677.3780517578125, "loss": 0.5305, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0147809982299805, "rewards/margins": 1.719355583190918, "rewards/rejected": -4.734137058258057, "step": 7510 }, { "epoch": 0.49, "learning_rate": 3.0027320913854306e-06, "logits/chosen": -2.0884788036346436, "logits/rejected": -1.149019718170166, "logps/chosen": -538.6810913085938, "logps/rejected": -519.0482788085938, "loss": 0.5387, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.799140453338623, "rewards/margins": 0.6474732160568237, "rewards/rejected": -3.4466137886047363, "step": 7520 }, { "epoch": 0.49, "learning_rate": 2.997137515663609e-06, "logits/chosen": -1.813439130783081, "logits/rejected": -2.1003925800323486, "logps/chosen": -560.1712036132812, "logps/rejected": -700.1915283203125, "loss": 0.5146, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.738173484802246, "rewards/margins": 1.099260926246643, "rewards/rejected": -3.8374342918395996, "step": 7530 }, { "epoch": 0.49, "learning_rate": 2.991540346628981e-06, "logits/chosen": -1.7849540710449219, "logits/rejected": -1.9049389362335205, "logps/chosen": -625.4427490234375, "logps/rejected": -718.796875, "loss": 0.679, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.618159532546997, "rewards/margins": 0.27094143629074097, "rewards/rejected": -3.8891005516052246, "step": 7540 }, { "epoch": 0.49, "learning_rate": 2.985940613479121e-06, "logits/chosen": -1.992310881614685, "logits/rejected": -1.9023126363754272, "logps/chosen": -556.0280151367188, "logps/rejected": -602.31787109375, "loss": 0.8377, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.62559175491333, "rewards/margins": 0.700847327709198, "rewards/rejected": -3.3264389038085938, "step": 7550 }, { "epoch": 0.49, "learning_rate": 2.980338345424981e-06, "logits/chosen": -2.198732852935791, "logits/rejected": -2.181652069091797, "logps/chosen": -590.9139404296875, "logps/rejected": -680.5958251953125, "loss": 0.4671, "rewards/accuracies": 0.75, "rewards/chosen": -3.351149082183838, "rewards/margins": 1.126341462135315, "rewards/rejected": -4.477490425109863, "step": 7560 }, { "epoch": 0.5, "learning_rate": 2.974733571690735e-06, "logits/chosen": -2.1074137687683105, "logits/rejected": -1.6007953882217407, "logps/chosen": -622.3907470703125, "logps/rejected": -614.3546142578125, "loss": 0.6541, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.3112881183624268, "rewards/margins": 0.5598162412643433, "rewards/rejected": -3.8711044788360596, "step": 7570 }, { "epoch": 0.5, "learning_rate": 2.9691263215136274e-06, "logits/chosen": -1.917372465133667, "logits/rejected": -2.2247812747955322, "logps/chosen": -559.89599609375, "logps/rejected": -755.5455322265625, "loss": 0.5172, "rewards/accuracies": 0.75, "rewards/chosen": -3.157663106918335, "rewards/margins": 1.2236320972442627, "rewards/rejected": -4.381295204162598, "step": 7580 }, { "epoch": 0.5, "learning_rate": 2.963516624143823e-06, "logits/chosen": -1.9110147953033447, "logits/rejected": -1.9160484075546265, "logps/chosen": -566.1820678710938, "logps/rejected": -635.0407104492188, "loss": 0.5732, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1472010612487793, "rewards/margins": 0.6961196660995483, "rewards/rejected": -3.843320846557617, "step": 7590 }, { "epoch": 0.5, "learning_rate": 2.9579045088442504e-06, "logits/chosen": -1.8146030902862549, "logits/rejected": -2.293555736541748, "logps/chosen": -662.2504272460938, "logps/rejected": -803.9173583984375, "loss": 0.6416, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.340151309967041, "rewards/margins": 0.6970237493515015, "rewards/rejected": -4.037175178527832, "step": 7600 }, { "epoch": 0.5, "learning_rate": 2.9522900048904534e-06, "logits/chosen": -2.024477481842041, "logits/rejected": -2.045595645904541, "logps/chosen": -546.9307250976562, "logps/rejected": -599.1251220703125, "loss": 0.616, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.589736223220825, "rewards/margins": 0.8832021951675415, "rewards/rejected": -3.472938060760498, "step": 7610 }, { "epoch": 0.5, "learning_rate": 2.9466731415704343e-06, "logits/chosen": -1.8404598236083984, "logits/rejected": -1.3947179317474365, "logps/chosen": -658.2296142578125, "logps/rejected": -744.2030029296875, "loss": 0.5928, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.3021206855773926, "rewards/margins": 0.7320025563240051, "rewards/rejected": -4.034123420715332, "step": 7620 }, { "epoch": 0.5, "learning_rate": 2.941053948184503e-06, "logits/chosen": -2.174973487854004, "logits/rejected": -2.1286215782165527, "logps/chosen": -636.4111328125, "logps/rejected": -579.2327270507812, "loss": 0.5121, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6131653785705566, "rewards/margins": 0.3719381093978882, "rewards/rejected": -2.985103130340576, "step": 7630 }, { "epoch": 0.5, "learning_rate": 2.935432454045125e-06, "logits/chosen": -1.993748664855957, "logits/rejected": -1.7756856679916382, "logps/chosen": -507.26373291015625, "logps/rejected": -602.416015625, "loss": 0.5149, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.748135566711426, "rewards/margins": 0.6938328742980957, "rewards/rejected": -3.4419684410095215, "step": 7640 }, { "epoch": 0.5, "learning_rate": 2.929808688476768e-06, "logits/chosen": -1.8288981914520264, "logits/rejected": -1.465491533279419, "logps/chosen": -577.890380859375, "logps/rejected": -576.1401977539062, "loss": 0.5258, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.685096502304077, "rewards/margins": 0.8233284950256348, "rewards/rejected": -3.508425235748291, "step": 7650 }, { "epoch": 0.5, "learning_rate": 2.924182680815748e-06, "logits/chosen": -1.9489812850952148, "logits/rejected": -1.8596827983856201, "logps/chosen": -533.8180541992188, "logps/rejected": -677.320556640625, "loss": 0.5126, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3371520042419434, "rewards/margins": 1.4479613304138184, "rewards/rejected": -3.7851130962371826, "step": 7660 }, { "epoch": 0.5, "learning_rate": 2.9185544604100765e-06, "logits/chosen": -2.1623177528381348, "logits/rejected": -1.8493192195892334, "logps/chosen": -606.8828125, "logps/rejected": -761.3399658203125, "loss": 0.6452, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8764357566833496, "rewards/margins": 0.8948076367378235, "rewards/rejected": -3.7712433338165283, "step": 7670 }, { "epoch": 0.5, "learning_rate": 2.9129240566193083e-06, "logits/chosen": -2.0953116416931152, "logits/rejected": -1.8859145641326904, "logps/chosen": -606.9465942382812, "logps/rejected": -633.6063842773438, "loss": 0.5791, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3007349967956543, "rewards/margins": 1.156020164489746, "rewards/rejected": -3.4567553997039795, "step": 7680 }, { "epoch": 0.5, "learning_rate": 2.9072914988143874e-06, "logits/chosen": -1.8157374858856201, "logits/rejected": -1.3965591192245483, "logps/chosen": -524.5069580078125, "logps/rejected": -614.9376220703125, "loss": 0.6726, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.7255921363830566, "rewards/margins": 0.8382970094680786, "rewards/rejected": -3.5638890266418457, "step": 7690 }, { "epoch": 0.5, "learning_rate": 2.9016568163774956e-06, "logits/chosen": -1.7731380462646484, "logits/rejected": -1.441528081893921, "logps/chosen": -490.97412109375, "logps/rejected": -556.3486938476562, "loss": 0.6411, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.0641536712646484, "rewards/margins": 0.7212511301040649, "rewards/rejected": -3.785404920578003, "step": 7700 }, { "epoch": 0.5, "learning_rate": 2.8960200387018942e-06, "logits/chosen": -1.8296773433685303, "logits/rejected": -1.2881231307983398, "logps/chosen": -549.3494873046875, "logps/rejected": -603.0115966796875, "loss": 0.6182, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.878037214279175, "rewards/margins": 0.7086192965507507, "rewards/rejected": -3.5866565704345703, "step": 7710 }, { "epoch": 0.51, "learning_rate": 2.8903811951917792e-06, "logits/chosen": -2.2229435443878174, "logits/rejected": -1.958284616470337, "logps/chosen": -506.4098205566406, "logps/rejected": -623.1008911132812, "loss": 0.3227, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.339682102203369, "rewards/margins": 1.6844733953475952, "rewards/rejected": -4.024155616760254, "step": 7720 }, { "epoch": 0.51, "learning_rate": 2.88474031526212e-06, "logits/chosen": -1.440571904182434, "logits/rejected": -1.1719087362289429, "logps/chosen": -575.4572143554688, "logps/rejected": -750.7650146484375, "loss": 0.5327, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.3039677143096924, "rewards/margins": 1.0600093603134155, "rewards/rejected": -4.363976955413818, "step": 7730 }, { "epoch": 0.51, "learning_rate": 2.879097428338509e-06, "logits/chosen": -1.9522345066070557, "logits/rejected": -1.9992725849151611, "logps/chosen": -740.0489501953125, "logps/rejected": -771.926513671875, "loss": 0.6117, "rewards/accuracies": 0.75, "rewards/chosen": -2.375081777572632, "rewards/margins": 0.974968433380127, "rewards/rejected": -3.350050449371338, "step": 7740 }, { "epoch": 0.51, "learning_rate": 2.8734525638570094e-06, "logits/chosen": -1.8566176891326904, "logits/rejected": -1.8508907556533813, "logps/chosen": -519.3516235351562, "logps/rejected": -715.54736328125, "loss": 0.4383, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.836459159851074, "rewards/margins": 1.5149306058883667, "rewards/rejected": -4.3513898849487305, "step": 7750 }, { "epoch": 0.51, "learning_rate": 2.8678057512639982e-06, "logits/chosen": -2.220822334289551, "logits/rejected": -2.013430118560791, "logps/chosen": -572.8555908203125, "logps/rejected": -565.0475463867188, "loss": 0.5743, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.433563709259033, "rewards/margins": 0.9110967516899109, "rewards/rejected": -3.3446602821350098, "step": 7760 }, { "epoch": 0.51, "learning_rate": 2.8621570200160172e-06, "logits/chosen": -2.3124799728393555, "logits/rejected": -1.682875394821167, "logps/chosen": -638.2242431640625, "logps/rejected": -681.7181396484375, "loss": 0.4721, "rewards/accuracies": 0.75, "rewards/chosen": -2.2771756649017334, "rewards/margins": 1.1830321550369263, "rewards/rejected": -3.460207462310791, "step": 7770 }, { "epoch": 0.51, "learning_rate": 2.856506399579615e-06, "logits/chosen": -1.984126329421997, "logits/rejected": -1.962230920791626, "logps/chosen": -581.7562866210938, "logps/rejected": -631.6907348632812, "loss": 0.4095, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4491162300109863, "rewards/margins": 1.3098396062850952, "rewards/rejected": -3.758955717086792, "step": 7780 }, { "epoch": 0.51, "learning_rate": 2.8508539194311964e-06, "logits/chosen": -1.8445428609848022, "logits/rejected": -1.3157740831375122, "logps/chosen": -543.8956298828125, "logps/rejected": -637.4561157226562, "loss": 0.3878, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.0309014320373535, "rewards/margins": 0.7112507820129395, "rewards/rejected": -3.7421517372131348, "step": 7790 }, { "epoch": 0.51, "learning_rate": 2.8451996090568656e-06, "logits/chosen": -2.2228527069091797, "logits/rejected": -2.2127482891082764, "logps/chosen": -584.2711181640625, "logps/rejected": -585.3831787109375, "loss": 0.6862, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.6453375816345215, "rewards/margins": 0.6250047087669373, "rewards/rejected": -3.2703425884246826, "step": 7800 }, { "epoch": 0.51, "learning_rate": 2.839543497952276e-06, "logits/chosen": -1.9297635555267334, "logits/rejected": -1.6234534978866577, "logps/chosen": -576.9271240234375, "logps/rejected": -643.7190551757812, "loss": 0.7176, "rewards/accuracies": 0.75, "rewards/chosen": -2.2241415977478027, "rewards/margins": 1.2629339694976807, "rewards/rejected": -3.4870758056640625, "step": 7810 }, { "epoch": 0.51, "learning_rate": 2.833885615622474e-06, "logits/chosen": -2.187995195388794, "logits/rejected": -2.138394594192505, "logps/chosen": -568.1746215820312, "logps/rejected": -569.004150390625, "loss": 0.5921, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4697365760803223, "rewards/margins": 0.4508172869682312, "rewards/rejected": -2.9205539226531982, "step": 7820 }, { "epoch": 0.51, "learning_rate": 2.8282259915817454e-06, "logits/chosen": -2.0024540424346924, "logits/rejected": -2.0259571075439453, "logps/chosen": -491.30303955078125, "logps/rejected": -553.1123657226562, "loss": 0.5645, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.398832082748413, "rewards/margins": 0.8270804286003113, "rewards/rejected": -3.225912570953369, "step": 7830 }, { "epoch": 0.51, "learning_rate": 2.8225646553534614e-06, "logits/chosen": -2.020742416381836, "logits/rejected": -1.9773505926132202, "logps/chosen": -591.7495727539062, "logps/rejected": -621.8164672851562, "loss": 0.5181, "rewards/accuracies": 0.75, "rewards/chosen": -2.5809714794158936, "rewards/margins": 1.1599743366241455, "rewards/rejected": -3.740945816040039, "step": 7840 }, { "epoch": 0.51, "learning_rate": 2.8169016364699255e-06, "logits/chosen": -2.116873264312744, "logits/rejected": -1.9739990234375, "logps/chosen": -645.8020629882812, "logps/rejected": -601.5529174804688, "loss": 0.4981, "rewards/accuracies": 0.75, "rewards/chosen": -2.3944008350372314, "rewards/margins": 1.0491383075714111, "rewards/rejected": -3.4435393810272217, "step": 7850 }, { "epoch": 0.51, "learning_rate": 2.811236964472217e-06, "logits/chosen": -1.8916501998901367, "logits/rejected": -2.0408172607421875, "logps/chosen": -615.2294311523438, "logps/rejected": -675.1611328125, "loss": 0.5551, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5767409801483154, "rewards/margins": 0.8729038238525391, "rewards/rejected": -3.4496452808380127, "step": 7860 }, { "epoch": 0.51, "learning_rate": 2.805570668910041e-06, "logits/chosen": -2.265167474746704, "logits/rejected": -1.8190959692001343, "logps/chosen": -616.2484130859375, "logps/rejected": -596.4984130859375, "loss": 0.482, "rewards/accuracies": 0.75, "rewards/chosen": -2.5450892448425293, "rewards/margins": 1.3092725276947021, "rewards/rejected": -3.8543620109558105, "step": 7870 }, { "epoch": 0.52, "learning_rate": 2.7999027793415695e-06, "logits/chosen": -1.995221495628357, "logits/rejected": -1.8924598693847656, "logps/chosen": -523.5404052734375, "logps/rejected": -646.7638549804688, "loss": 0.5813, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0813703536987305, "rewards/margins": 0.8624595403671265, "rewards/rejected": -2.9438297748565674, "step": 7880 }, { "epoch": 0.52, "learning_rate": 2.794233325333293e-06, "logits/chosen": -2.136390209197998, "logits/rejected": -1.8115367889404297, "logps/chosen": -530.805908203125, "logps/rejected": -636.94970703125, "loss": 0.5549, "rewards/accuracies": 0.75, "rewards/chosen": -2.795574188232422, "rewards/margins": 1.2153223752975464, "rewards/rejected": -4.010896682739258, "step": 7890 }, { "epoch": 0.52, "learning_rate": 2.7885623364598597e-06, "logits/chosen": -1.9392417669296265, "logits/rejected": -1.6217542886734009, "logps/chosen": -475.58160400390625, "logps/rejected": -630.3709716796875, "loss": 0.3976, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.8851187229156494, "rewards/margins": 1.734307050704956, "rewards/rejected": -3.6194255352020264, "step": 7900 }, { "epoch": 0.52, "learning_rate": 2.782889842303926e-06, "logits/chosen": -2.280442714691162, "logits/rejected": -1.943890929222107, "logps/chosen": -546.3623657226562, "logps/rejected": -614.0206298828125, "loss": 0.5306, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.287599563598633, "rewards/margins": 1.1138108968734741, "rewards/rejected": -3.4014103412628174, "step": 7910 }, { "epoch": 0.52, "learning_rate": 2.7772158724559987e-06, "logits/chosen": -2.0244617462158203, "logits/rejected": -1.449589490890503, "logps/chosen": -476.2408752441406, "logps/rejected": -687.8169555664062, "loss": 0.4509, "rewards/accuracies": 0.75, "rewards/chosen": -2.570810317993164, "rewards/margins": 1.615671157836914, "rewards/rejected": -4.186481475830078, "step": 7920 }, { "epoch": 0.52, "learning_rate": 2.7715404565142856e-06, "logits/chosen": -2.1096596717834473, "logits/rejected": -1.701581597328186, "logps/chosen": -510.6554260253906, "logps/rejected": -584.50732421875, "loss": 0.6696, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2401087284088135, "rewards/margins": 0.8200968503952026, "rewards/rejected": -3.0602056980133057, "step": 7930 }, { "epoch": 0.52, "learning_rate": 2.7658636240845354e-06, "logits/chosen": -1.7610938549041748, "logits/rejected": -1.8943217992782593, "logps/chosen": -433.93353271484375, "logps/rejected": -493.35736083984375, "loss": 0.5261, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.544769287109375, "rewards/margins": 0.5446535348892212, "rewards/rejected": -3.0894227027893066, "step": 7940 }, { "epoch": 0.52, "learning_rate": 2.7601854047798872e-06, "logits/chosen": -2.537883758544922, "logits/rejected": -2.118722915649414, "logps/chosen": -635.4991455078125, "logps/rejected": -613.1102905273438, "loss": 0.5925, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.841888427734375, "rewards/margins": 0.7391589879989624, "rewards/rejected": -2.581047296524048, "step": 7950 }, { "epoch": 0.52, "learning_rate": 2.7545058282207148e-06, "logits/chosen": -2.077910900115967, "logits/rejected": -1.944738745689392, "logps/chosen": -593.0620727539062, "logps/rejected": -686.479736328125, "loss": 0.4804, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.03245210647583, "rewards/margins": 1.290585994720459, "rewards/rejected": -3.323037624359131, "step": 7960 }, { "epoch": 0.52, "learning_rate": 2.748824924034471e-06, "logits/chosen": -2.2640910148620605, "logits/rejected": -2.149371862411499, "logps/chosen": -556.6024169921875, "logps/rejected": -612.380859375, "loss": 0.4687, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.009018659591675, "rewards/margins": 1.4819543361663818, "rewards/rejected": -3.4909729957580566, "step": 7970 }, { "epoch": 0.52, "learning_rate": 2.743142721855536e-06, "logits/chosen": -2.1707024574279785, "logits/rejected": -1.8723373413085938, "logps/chosen": -507.60791015625, "logps/rejected": -539.167724609375, "loss": 0.7497, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.035862445831299, "rewards/margins": 0.5101307034492493, "rewards/rejected": -2.5459933280944824, "step": 7980 }, { "epoch": 0.52, "learning_rate": 2.737459251325058e-06, "logits/chosen": -2.2062249183654785, "logits/rejected": -2.106947422027588, "logps/chosen": -587.2820434570312, "logps/rejected": -550.6422119140625, "loss": 0.5874, "rewards/accuracies": 0.75, "rewards/chosen": -1.542938470840454, "rewards/margins": 0.6787039041519165, "rewards/rejected": -2.221642255783081, "step": 7990 }, { "epoch": 0.52, "learning_rate": 2.731774542090804e-06, "logits/chosen": -2.335017681121826, "logits/rejected": -1.8789472579956055, "logps/chosen": -478.0525817871094, "logps/rejected": -576.288330078125, "loss": 0.5073, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8263978958129883, "rewards/margins": 1.1661754846572876, "rewards/rejected": -2.9925730228424072, "step": 8000 }, { "epoch": 0.52, "learning_rate": 2.7260886238070034e-06, "logits/chosen": -1.8527981042861938, "logits/rejected": -1.994049072265625, "logps/chosen": -495.73193359375, "logps/rejected": -610.97900390625, "loss": 0.5424, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4737749099731445, "rewards/margins": 0.9065383672714233, "rewards/rejected": -3.3803133964538574, "step": 8010 }, { "epoch": 0.52, "learning_rate": 2.72040152613419e-06, "logits/chosen": -1.8517286777496338, "logits/rejected": -2.2476627826690674, "logps/chosen": -539.6078491210938, "logps/rejected": -745.31982421875, "loss": 0.4956, "rewards/accuracies": 0.75, "rewards/chosen": -2.0612945556640625, "rewards/margins": 1.223171591758728, "rewards/rejected": -3.28446626663208, "step": 8020 }, { "epoch": 0.53, "learning_rate": 2.7147132787390516e-06, "logits/chosen": -1.8884389400482178, "logits/rejected": -1.7699356079101562, "logps/chosen": -534.407470703125, "logps/rejected": -597.0699462890625, "loss": 0.4777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3583502769470215, "rewards/margins": 0.9624267816543579, "rewards/rejected": -3.3207767009735107, "step": 8030 }, { "epoch": 0.53, "learning_rate": 2.709023911294273e-06, "logits/chosen": -2.1596829891204834, "logits/rejected": -1.7965008020401, "logps/chosen": -496.8246154785156, "logps/rejected": -651.1993408203125, "loss": 0.5823, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.7730891704559326, "rewards/margins": 1.466064691543579, "rewards/rejected": -3.239154100418091, "step": 8040 }, { "epoch": 0.53, "learning_rate": 2.7033334534783806e-06, "logits/chosen": -2.329775333404541, "logits/rejected": -1.6206992864608765, "logps/chosen": -534.77392578125, "logps/rejected": -617.8519897460938, "loss": 0.5103, "rewards/accuracies": 0.75, "rewards/chosen": -2.2892603874206543, "rewards/margins": 1.2485960721969604, "rewards/rejected": -3.5378565788269043, "step": 8050 }, { "epoch": 0.53, "learning_rate": 2.697641934975592e-06, "logits/chosen": -1.8848540782928467, "logits/rejected": -1.6786342859268188, "logps/chosen": -503.2972106933594, "logps/rejected": -617.09521484375, "loss": 0.3394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5366880893707275, "rewards/margins": 1.0685951709747314, "rewards/rejected": -3.60528302192688, "step": 8060 }, { "epoch": 0.53, "learning_rate": 2.691949385475654e-06, "logits/chosen": -2.042297601699829, "logits/rejected": -1.7582738399505615, "logps/chosen": -531.7859497070312, "logps/rejected": -550.2825927734375, "loss": 0.5953, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7436671257019043, "rewards/margins": 1.0303003787994385, "rewards/rejected": -3.7739672660827637, "step": 8070 }, { "epoch": 0.53, "learning_rate": 2.6862558346736937e-06, "logits/chosen": -2.389453411102295, "logits/rejected": -2.338557481765747, "logps/chosen": -642.6590576171875, "logps/rejected": -727.0181274414062, "loss": 0.4292, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8770205974578857, "rewards/margins": 1.48763906955719, "rewards/rejected": -3.3646597862243652, "step": 8080 }, { "epoch": 0.53, "learning_rate": 2.6805613122700617e-06, "logits/chosen": -1.9517875909805298, "logits/rejected": -2.075026035308838, "logps/chosen": -415.518798828125, "logps/rejected": -472.64398193359375, "loss": 0.4211, "rewards/accuracies": 0.75, "rewards/chosen": -1.528329849243164, "rewards/margins": 0.8481928706169128, "rewards/rejected": -2.3765225410461426, "step": 8090 }, { "epoch": 0.53, "learning_rate": 2.674865847970176e-06, "logits/chosen": -1.7727165222167969, "logits/rejected": -2.060763120651245, "logps/chosen": -469.1021423339844, "logps/rejected": -458.26763916015625, "loss": 0.4274, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1519832611083984, "rewards/margins": 0.7703964114189148, "rewards/rejected": -2.922379732131958, "step": 8100 }, { "epoch": 0.53, "learning_rate": 2.669169471484368e-06, "logits/chosen": -1.7492620944976807, "logits/rejected": -1.66244637966156, "logps/chosen": -560.0657958984375, "logps/rejected": -617.1555786132812, "loss": 0.5842, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8839614391326904, "rewards/margins": 0.47338324785232544, "rewards/rejected": -3.35734486579895, "step": 8110 }, { "epoch": 0.53, "learning_rate": 2.6634722125277278e-06, "logits/chosen": -2.0980145931243896, "logits/rejected": -1.6550731658935547, "logps/chosen": -477.326416015625, "logps/rejected": -490.2355041503906, "loss": 0.4816, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6155064105987549, "rewards/margins": 0.9734585881233215, "rewards/rejected": -2.5889651775360107, "step": 8120 }, { "epoch": 0.53, "learning_rate": 2.6577741008199498e-06, "logits/chosen": -1.685837984085083, "logits/rejected": -1.5082666873931885, "logps/chosen": -562.9486083984375, "logps/rejected": -691.534912109375, "loss": 0.5931, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.028662919998169, "rewards/margins": 0.8841172456741333, "rewards/rejected": -3.912780284881592, "step": 8130 }, { "epoch": 0.53, "learning_rate": 2.652075166085175e-06, "logits/chosen": -1.9452396631240845, "logits/rejected": -2.100419521331787, "logps/chosen": -493.06060791015625, "logps/rejected": -645.3451538085938, "loss": 0.4167, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.8556363582611084, "rewards/margins": 1.4208673238754272, "rewards/rejected": -3.276503801345825, "step": 8140 }, { "epoch": 0.53, "learning_rate": 2.6463754380518395e-06, "logits/chosen": -2.252941131591797, "logits/rejected": -1.7829294204711914, "logps/chosen": -621.8170776367188, "logps/rejected": -660.5582275390625, "loss": 0.6381, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8197734355926514, "rewards/margins": 0.8733735084533691, "rewards/rejected": -3.6931469440460205, "step": 8150 }, { "epoch": 0.53, "learning_rate": 2.6406749464525167e-06, "logits/chosen": -2.0800015926361084, "logits/rejected": -2.3741583824157715, "logps/chosen": -509.7748107910156, "logps/rejected": -587.732421875, "loss": 0.6648, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1928679943084717, "rewards/margins": 0.18233470618724823, "rewards/rejected": -2.3752026557922363, "step": 8160 }, { "epoch": 0.53, "learning_rate": 2.634973721023762e-06, "logits/chosen": -1.9787933826446533, "logits/rejected": -2.109104871749878, "logps/chosen": -439.8349609375, "logps/rejected": -533.9478759765625, "loss": 0.547, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5185022354125977, "rewards/margins": 0.8148691058158875, "rewards/rejected": -3.33337140083313, "step": 8170 }, { "epoch": 0.54, "learning_rate": 2.6292717915059605e-06, "logits/chosen": -2.510175943374634, "logits/rejected": -2.1551032066345215, "logps/chosen": -553.9089965820312, "logps/rejected": -695.7274169921875, "loss": 0.4459, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9888378381729126, "rewards/margins": 1.4000943899154663, "rewards/rejected": -3.388932466506958, "step": 8180 }, { "epoch": 0.54, "learning_rate": 2.6235691876431706e-06, "logits/chosen": -2.161252498626709, "logits/rejected": -1.870642900466919, "logps/chosen": -588.6565551757812, "logps/rejected": -673.9668579101562, "loss": 0.5259, "rewards/accuracies": 0.75, "rewards/chosen": -2.203993082046509, "rewards/margins": 1.1176879405975342, "rewards/rejected": -3.321680784225464, "step": 8190 }, { "epoch": 0.54, "learning_rate": 2.6178659391829673e-06, "logits/chosen": -1.940239667892456, "logits/rejected": -1.860875129699707, "logps/chosen": -469.7591247558594, "logps/rejected": -469.07171630859375, "loss": 0.5019, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2931342124938965, "rewards/margins": 0.6397870182991028, "rewards/rejected": -2.9329214096069336, "step": 8200 }, { "epoch": 0.54, "learning_rate": 2.6121620758762877e-06, "logits/chosen": -1.9328632354736328, "logits/rejected": -2.13527774810791, "logps/chosen": -391.53863525390625, "logps/rejected": -537.9548950195312, "loss": 0.3995, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.0132150650024414, "rewards/margins": 0.770500898361206, "rewards/rejected": -2.7837162017822266, "step": 8210 }, { "epoch": 0.54, "learning_rate": 2.606457627477277e-06, "logits/chosen": -2.294654369354248, "logits/rejected": -1.6887871026992798, "logps/chosen": -506.1493225097656, "logps/rejected": -584.48388671875, "loss": 0.4721, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9555402994155884, "rewards/margins": 1.706686019897461, "rewards/rejected": -3.6622262001037598, "step": 8220 }, { "epoch": 0.54, "learning_rate": 2.6007526237431324e-06, "logits/chosen": -2.1083502769470215, "logits/rejected": -1.6280349493026733, "logps/chosen": -593.1681518554688, "logps/rejected": -582.7951049804688, "loss": 0.7511, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.666224956512451, "rewards/margins": 0.6657021641731262, "rewards/rejected": -3.3319270610809326, "step": 8230 }, { "epoch": 0.54, "learning_rate": 2.5950470944339478e-06, "logits/chosen": -2.0212769508361816, "logits/rejected": -2.3718209266662598, "logps/chosen": -608.8660278320312, "logps/rejected": -666.3863525390625, "loss": 0.4838, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9420130252838135, "rewards/margins": 1.0956814289093018, "rewards/rejected": -3.0376946926116943, "step": 8240 }, { "epoch": 0.54, "learning_rate": 2.58934106931256e-06, "logits/chosen": -1.927168607711792, "logits/rejected": -1.600671410560608, "logps/chosen": -461.33953857421875, "logps/rejected": -605.0335083007812, "loss": 0.3508, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.1052207946777344, "rewards/margins": 1.9401206970214844, "rewards/rejected": -4.045341491699219, "step": 8250 }, { "epoch": 0.54, "learning_rate": 2.58363457814439e-06, "logits/chosen": -2.3823981285095215, "logits/rejected": -1.7535566091537476, "logps/chosen": -635.8217163085938, "logps/rejected": -611.2908325195312, "loss": 0.6405, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.046607494354248, "rewards/margins": 0.7194627523422241, "rewards/rejected": -3.7660701274871826, "step": 8260 }, { "epoch": 0.54, "learning_rate": 2.5779276506972924e-06, "logits/chosen": -2.0197932720184326, "logits/rejected": -1.9778947830200195, "logps/chosen": -543.31884765625, "logps/rejected": -660.4400634765625, "loss": 0.6526, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.641650438308716, "rewards/margins": 0.852077305316925, "rewards/rejected": -3.493727445602417, "step": 8270 }, { "epoch": 0.54, "learning_rate": 2.5722203167413945e-06, "logits/chosen": -1.8766615390777588, "logits/rejected": -1.3877742290496826, "logps/chosen": -449.7989196777344, "logps/rejected": -610.9358520507812, "loss": 0.4744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.8375191688537598, "rewards/margins": 1.3484939336776733, "rewards/rejected": -4.186013221740723, "step": 8280 }, { "epoch": 0.54, "learning_rate": 2.5665126060489476e-06, "logits/chosen": -1.892877221107483, "logits/rejected": -1.736212134361267, "logps/chosen": -544.884521484375, "logps/rejected": -616.0360717773438, "loss": 0.523, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8708701133728027, "rewards/margins": 0.8997025489807129, "rewards/rejected": -3.7705726623535156, "step": 8290 }, { "epoch": 0.54, "learning_rate": 2.560804548394165e-06, "logits/chosen": -1.9360411167144775, "logits/rejected": -2.0777390003204346, "logps/chosen": -484.18707275390625, "logps/rejected": -608.6365966796875, "loss": 0.6754, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4959094524383545, "rewards/margins": 0.8614379167556763, "rewards/rejected": -3.3573474884033203, "step": 8300 }, { "epoch": 0.54, "learning_rate": 2.5550961735530734e-06, "logits/chosen": -2.0347352027893066, "logits/rejected": -1.6178643703460693, "logps/chosen": -506.50091552734375, "logps/rejected": -632.1541748046875, "loss": 0.4265, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2434918880462646, "rewards/margins": 1.5187815427780151, "rewards/rejected": -3.7622733116149902, "step": 8310 }, { "epoch": 0.54, "learning_rate": 2.549387511303351e-06, "logits/chosen": -2.024339199066162, "logits/rejected": -1.7003933191299438, "logps/chosen": -438.71697998046875, "logps/rejected": -716.5164794921875, "loss": 0.2969, "rewards/accuracies": 0.75, "rewards/chosen": -1.8050426244735718, "rewards/margins": 1.772749900817871, "rewards/rejected": -3.5777924060821533, "step": 8320 }, { "epoch": 0.55, "learning_rate": 2.5436785914241774e-06, "logits/chosen": -2.4603960514068604, "logits/rejected": -1.6958461999893188, "logps/chosen": -467.468017578125, "logps/rejected": -594.4733276367188, "loss": 0.4276, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.301406979560852, "rewards/margins": 1.6142040491104126, "rewards/rejected": -2.9156107902526855, "step": 8330 }, { "epoch": 0.55, "learning_rate": 2.5379694436960746e-06, "logits/chosen": -2.165832281112671, "logits/rejected": -1.8188012838363647, "logps/chosen": -667.1691284179688, "logps/rejected": -676.0819702148438, "loss": 0.489, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6188740730285645, "rewards/margins": 0.7326040267944336, "rewards/rejected": -3.351478099822998, "step": 8340 }, { "epoch": 0.55, "learning_rate": 2.5322600979007533e-06, "logits/chosen": -1.8008044958114624, "logits/rejected": -2.0625736713409424, "logps/chosen": -565.9276123046875, "logps/rejected": -608.2940673828125, "loss": 0.4996, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9943790435791016, "rewards/margins": 0.9581402540206909, "rewards/rejected": -2.952519416809082, "step": 8350 }, { "epoch": 0.55, "learning_rate": 2.5265505838209592e-06, "logits/chosen": -1.8943090438842773, "logits/rejected": -1.882758378982544, "logps/chosen": -507.8409729003906, "logps/rejected": -650.7178955078125, "loss": 0.5263, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.271254301071167, "rewards/margins": 1.2633321285247803, "rewards/rejected": -3.5345866680145264, "step": 8360 }, { "epoch": 0.55, "learning_rate": 2.520840931240314e-06, "logits/chosen": -2.142739772796631, "logits/rejected": -1.6363484859466553, "logps/chosen": -534.8933715820312, "logps/rejected": -612.6922607421875, "loss": 0.5113, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1984381675720215, "rewards/margins": 0.9206544756889343, "rewards/rejected": -3.1190924644470215, "step": 8370 }, { "epoch": 0.55, "learning_rate": 2.515131169943162e-06, "logits/chosen": -1.9574629068374634, "logits/rejected": -1.7319316864013672, "logps/chosen": -486.0719299316406, "logps/rejected": -586.4020385742188, "loss": 0.5304, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3126673698425293, "rewards/margins": 1.0584437847137451, "rewards/rejected": -3.3711113929748535, "step": 8380 }, { "epoch": 0.55, "learning_rate": 2.509421329714416e-06, "logits/chosen": -2.012256145477295, "logits/rejected": -1.9683516025543213, "logps/chosen": -503.61798095703125, "logps/rejected": -625.0061645507812, "loss": 0.53, "rewards/accuracies": 0.75, "rewards/chosen": -2.458313465118408, "rewards/margins": 1.1787118911743164, "rewards/rejected": -3.637025833129883, "step": 8390 }, { "epoch": 0.55, "learning_rate": 2.5037114403393987e-06, "logits/chosen": -2.151923656463623, "logits/rejected": -1.5198156833648682, "logps/chosen": -541.1068115234375, "logps/rejected": -530.3890991210938, "loss": 0.6136, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6220719814300537, "rewards/margins": 1.383866548538208, "rewards/rejected": -4.005938529968262, "step": 8400 }, { "epoch": 0.55, "learning_rate": 2.4980015316036908e-06, "logits/chosen": -2.0013201236724854, "logits/rejected": -1.9275219440460205, "logps/chosen": -482.7667541503906, "logps/rejected": -672.0367431640625, "loss": 0.5668, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.384315252304077, "rewards/margins": 0.8206236958503723, "rewards/rejected": -3.2049388885498047, "step": 8410 }, { "epoch": 0.55, "learning_rate": 2.4922916332929725e-06, "logits/chosen": -1.9645591974258423, "logits/rejected": -1.5715625286102295, "logps/chosen": -548.0907592773438, "logps/rejected": -720.3143310546875, "loss": 0.5238, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.744553804397583, "rewards/margins": 1.1086987257003784, "rewards/rejected": -3.853252410888672, "step": 8420 }, { "epoch": 0.55, "learning_rate": 2.4865817751928716e-06, "logits/chosen": -2.183432102203369, "logits/rejected": -1.9140920639038086, "logps/chosen": -598.3126220703125, "logps/rejected": -632.6630859375, "loss": 0.6305, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.114100456237793, "rewards/margins": 0.8765825033187866, "rewards/rejected": -3.9906837940216064, "step": 8430 }, { "epoch": 0.55, "learning_rate": 2.4808719870888037e-06, "logits/chosen": -2.294104814529419, "logits/rejected": -1.6438252925872803, "logps/chosen": -486.91192626953125, "logps/rejected": -639.6134033203125, "loss": 0.478, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.013448476791382, "rewards/margins": 1.6693031787872314, "rewards/rejected": -3.682751417160034, "step": 8440 }, { "epoch": 0.55, "learning_rate": 2.4751622987658206e-06, "logits/chosen": -1.4400765895843506, "logits/rejected": -1.8438045978546143, "logps/chosen": -475.89605712890625, "logps/rejected": -678.0491943359375, "loss": 0.6017, "rewards/accuracies": 0.75, "rewards/chosen": -2.999626636505127, "rewards/margins": 1.1372374296188354, "rewards/rejected": -4.136864185333252, "step": 8450 }, { "epoch": 0.55, "learning_rate": 2.4694527400084546e-06, "logits/chosen": -1.9232698678970337, "logits/rejected": -1.4870688915252686, "logps/chosen": -568.2553100585938, "logps/rejected": -500.90887451171875, "loss": 0.5996, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.601705312728882, "rewards/margins": 0.5879272222518921, "rewards/rejected": -3.1896328926086426, "step": 8460 }, { "epoch": 0.55, "learning_rate": 2.4637433406005607e-06, "logits/chosen": -1.7582013607025146, "logits/rejected": -1.7778558731079102, "logps/chosen": -554.71484375, "logps/rejected": -532.88720703125, "loss": 0.4793, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.553034782409668, "rewards/margins": 0.6445740461349487, "rewards/rejected": -3.1976089477539062, "step": 8470 }, { "epoch": 0.55, "learning_rate": 2.4580341303251628e-06, "logits/chosen": -2.312466621398926, "logits/rejected": -1.7932695150375366, "logps/chosen": -528.412353515625, "logps/rejected": -575.6771240234375, "loss": 0.4946, "rewards/accuracies": 0.75, "rewards/chosen": -2.548846960067749, "rewards/margins": 0.6522601246833801, "rewards/rejected": -3.2011075019836426, "step": 8480 }, { "epoch": 0.56, "learning_rate": 2.4523251389642984e-06, "logits/chosen": -2.352581024169922, "logits/rejected": -1.9734636545181274, "logps/chosen": -500.60797119140625, "logps/rejected": -564.6924438476562, "loss": 0.5307, "rewards/accuracies": 0.75, "rewards/chosen": -2.1165223121643066, "rewards/margins": 0.9649814367294312, "rewards/rejected": -3.0815041065216064, "step": 8490 }, { "epoch": 0.56, "learning_rate": 2.4466163962988626e-06, "logits/chosen": -2.381840705871582, "logits/rejected": -2.000971555709839, "logps/chosen": -584.6832885742188, "logps/rejected": -670.2131958007812, "loss": 0.6757, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4059548377990723, "rewards/margins": 0.6139851212501526, "rewards/rejected": -3.01993989944458, "step": 8500 }, { "epoch": 0.56, "learning_rate": 2.4409079321084543e-06, "logits/chosen": -2.1239302158355713, "logits/rejected": -2.165752649307251, "logps/chosen": -523.6356811523438, "logps/rejected": -714.048828125, "loss": 0.77, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8900184631347656, "rewards/margins": 0.8145949244499207, "rewards/rejected": -3.704613208770752, "step": 8510 }, { "epoch": 0.56, "learning_rate": 2.4351997761712184e-06, "logits/chosen": -1.9498107433319092, "logits/rejected": -1.6921589374542236, "logps/chosen": -650.4454956054688, "logps/rejected": -674.9125366210938, "loss": 0.4921, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.603874683380127, "rewards/margins": 1.356870174407959, "rewards/rejected": -3.9607443809509277, "step": 8520 }, { "epoch": 0.56, "learning_rate": 2.4294919582636933e-06, "logits/chosen": -1.516585350036621, "logits/rejected": -1.6425130367279053, "logps/chosen": -594.2330322265625, "logps/rejected": -834.4051513671875, "loss": 0.3158, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.875373363494873, "rewards/margins": 1.6923511028289795, "rewards/rejected": -4.567724227905273, "step": 8530 }, { "epoch": 0.56, "learning_rate": 2.423784508160652e-06, "logits/chosen": -1.718045949935913, "logits/rejected": -1.8462913036346436, "logps/chosen": -637.2362670898438, "logps/rejected": -767.963134765625, "loss": 0.6127, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.217985153198242, "rewards/margins": 0.6419950127601624, "rewards/rejected": -3.8599801063537598, "step": 8540 }, { "epoch": 0.56, "learning_rate": 2.418077455634951e-06, "logits/chosen": -1.7958965301513672, "logits/rejected": -2.0573506355285645, "logps/chosen": -538.6891479492188, "logps/rejected": -604.0670776367188, "loss": 0.6724, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.5720856189727783, "rewards/margins": 0.8947946429252625, "rewards/rejected": -3.4668803215026855, "step": 8550 }, { "epoch": 0.56, "learning_rate": 2.4123708304573714e-06, "logits/chosen": -1.7513185739517212, "logits/rejected": -1.7228978872299194, "logps/chosen": -569.3314208984375, "logps/rejected": -668.5128173828125, "loss": 0.3983, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.264718532562256, "rewards/margins": 1.522810697555542, "rewards/rejected": -4.787528991699219, "step": 8560 }, { "epoch": 0.56, "learning_rate": 2.406664662396465e-06, "logits/chosen": -1.6329927444458008, "logits/rejected": -1.8342996835708618, "logps/chosen": -630.3096923828125, "logps/rejected": -726.5323486328125, "loss": 0.5556, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3960113525390625, "rewards/margins": 0.6501597166061401, "rewards/rejected": -3.046171188354492, "step": 8570 }, { "epoch": 0.56, "learning_rate": 2.4009589812184012e-06, "logits/chosen": -1.9765291213989258, "logits/rejected": -1.6217161417007446, "logps/chosen": -490.7498474121094, "logps/rejected": -631.5427856445312, "loss": 0.4344, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6923301219940186, "rewards/margins": 1.2100067138671875, "rewards/rejected": -3.902336835861206, "step": 8580 }, { "epoch": 0.56, "learning_rate": 2.3952538166868073e-06, "logits/chosen": -2.1259334087371826, "logits/rejected": -1.6824983358383179, "logps/chosen": -632.2239990234375, "logps/rejected": -669.2704467773438, "loss": 0.491, "rewards/accuracies": 0.75, "rewards/chosen": -2.824948310852051, "rewards/margins": 1.0109959840774536, "rewards/rejected": -3.835944414138794, "step": 8590 }, { "epoch": 0.56, "learning_rate": 2.389549198562616e-06, "logits/chosen": -1.9586637020111084, "logits/rejected": -1.9438788890838623, "logps/chosen": -549.78662109375, "logps/rejected": -682.9403076171875, "loss": 0.4568, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.497349262237549, "rewards/margins": 1.207899808883667, "rewards/rejected": -3.705249071121216, "step": 8600 }, { "epoch": 0.56, "learning_rate": 2.3838451566039098e-06, "logits/chosen": -1.9294124841690063, "logits/rejected": -2.0406670570373535, "logps/chosen": -567.9529418945312, "logps/rejected": -750.5849609375, "loss": 0.5347, "rewards/accuracies": 0.75, "rewards/chosen": -2.2828116416931152, "rewards/margins": 1.2478787899017334, "rewards/rejected": -3.5306904315948486, "step": 8610 }, { "epoch": 0.56, "learning_rate": 2.3781417205657662e-06, "logits/chosen": -2.027916669845581, "logits/rejected": -1.8153997659683228, "logps/chosen": -517.7577514648438, "logps/rejected": -642.97705078125, "loss": 0.5145, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.032965660095215, "rewards/margins": 1.262226939201355, "rewards/rejected": -3.295192241668701, "step": 8620 }, { "epoch": 0.56, "learning_rate": 2.3724389202001006e-06, "logits/chosen": -1.9544061422348022, "logits/rejected": -1.5058300495147705, "logps/chosen": -559.7622680664062, "logps/rejected": -668.08251953125, "loss": 0.432, "rewards/accuracies": 0.75, "rewards/chosen": -2.5795223712921143, "rewards/margins": 1.103283166885376, "rewards/rejected": -3.682805299758911, "step": 8630 }, { "epoch": 0.57, "learning_rate": 2.366736785255514e-06, "logits/chosen": -2.1814701557159424, "logits/rejected": -1.883183240890503, "logps/chosen": -529.947265625, "logps/rejected": -600.7251586914062, "loss": 0.5234, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2991366386413574, "rewards/margins": 1.3504598140716553, "rewards/rejected": -3.6495964527130127, "step": 8640 }, { "epoch": 0.57, "learning_rate": 2.3610353454771355e-06, "logits/chosen": -2.1669187545776367, "logits/rejected": -2.116823196411133, "logps/chosen": -559.6239013671875, "logps/rejected": -643.1777954101562, "loss": 0.4503, "rewards/accuracies": 0.75, "rewards/chosen": -2.8764381408691406, "rewards/margins": 0.7580646276473999, "rewards/rejected": -3.63450288772583, "step": 8650 }, { "epoch": 0.57, "learning_rate": 2.355334630606467e-06, "logits/chosen": -2.248654842376709, "logits/rejected": -1.7833881378173828, "logps/chosen": -544.1170654296875, "logps/rejected": -719.9376220703125, "loss": 0.5325, "rewards/accuracies": 0.75, "rewards/chosen": -2.4501070976257324, "rewards/margins": 1.2582969665527344, "rewards/rejected": -3.708404064178467, "step": 8660 }, { "epoch": 0.57, "learning_rate": 2.349634670381231e-06, "logits/chosen": -1.9623810052871704, "logits/rejected": -1.966509222984314, "logps/chosen": -521.1162719726562, "logps/rejected": -625.997314453125, "loss": 0.6938, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.571371078491211, "rewards/margins": 1.166871428489685, "rewards/rejected": -3.7382426261901855, "step": 8670 }, { "epoch": 0.57, "learning_rate": 2.3439354945352104e-06, "logits/chosen": -1.8034248352050781, "logits/rejected": -1.8189328908920288, "logps/chosen": -581.9903564453125, "logps/rejected": -672.5491943359375, "loss": 0.4387, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5983049869537354, "rewards/margins": 1.2358529567718506, "rewards/rejected": -3.834158420562744, "step": 8680 }, { "epoch": 0.57, "learning_rate": 2.3382371327981e-06, "logits/chosen": -1.6109882593154907, "logits/rejected": -1.9092413187026978, "logps/chosen": -531.2320556640625, "logps/rejected": -657.5418701171875, "loss": 0.7228, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8226723670959473, "rewards/margins": 0.6856005191802979, "rewards/rejected": -3.5082733631134033, "step": 8690 }, { "epoch": 0.57, "learning_rate": 2.3325396148953456e-06, "logits/chosen": -2.23337721824646, "logits/rejected": -1.8812425136566162, "logps/chosen": -537.9326782226562, "logps/rejected": -645.2062377929688, "loss": 0.4033, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6055963039398193, "rewards/margins": 1.3089559078216553, "rewards/rejected": -3.9145522117614746, "step": 8700 }, { "epoch": 0.57, "learning_rate": 2.3268429705479915e-06, "logits/chosen": -1.838443398475647, "logits/rejected": -1.89572012424469, "logps/chosen": -487.03253173828125, "logps/rejected": -597.0982666015625, "loss": 0.5256, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.876188278198242, "rewards/margins": 1.1435706615447998, "rewards/rejected": -4.019758701324463, "step": 8710 }, { "epoch": 0.57, "learning_rate": 2.3211472294725248e-06, "logits/chosen": -2.0660183429718018, "logits/rejected": -1.859236478805542, "logps/chosen": -588.6038208007812, "logps/rejected": -677.5938720703125, "loss": 0.4918, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4605069160461426, "rewards/margins": 1.6218595504760742, "rewards/rejected": -4.082365989685059, "step": 8720 }, { "epoch": 0.57, "learning_rate": 2.315452421380721e-06, "logits/chosen": -2.0983052253723145, "logits/rejected": -1.5631288290023804, "logps/chosen": -577.9644775390625, "logps/rejected": -642.124755859375, "loss": 0.3914, "rewards/accuracies": 0.75, "rewards/chosen": -2.718008518218994, "rewards/margins": 0.8872051239013672, "rewards/rejected": -3.6052136421203613, "step": 8730 }, { "epoch": 0.57, "learning_rate": 2.3097585759794886e-06, "logits/chosen": -2.3067686557769775, "logits/rejected": -2.0007030963897705, "logps/chosen": -577.6544799804688, "logps/rejected": -582.8269653320312, "loss": 0.4402, "rewards/accuracies": 0.75, "rewards/chosen": -2.1569647789001465, "rewards/margins": 1.6096376180648804, "rewards/rejected": -3.7666027545928955, "step": 8740 }, { "epoch": 0.57, "learning_rate": 2.3040657229707155e-06, "logits/chosen": -1.7420654296875, "logits/rejected": -1.9039827585220337, "logps/chosen": -656.5730590820312, "logps/rejected": -680.1136474609375, "loss": 0.6174, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.26708984375, "rewards/margins": 0.5174268484115601, "rewards/rejected": -3.7845168113708496, "step": 8750 }, { "epoch": 0.57, "learning_rate": 2.2983738920511104e-06, "logits/chosen": -2.079326868057251, "logits/rejected": -1.9476079940795898, "logps/chosen": -595.2271118164062, "logps/rejected": -624.8593139648438, "loss": 0.4396, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.79783296585083, "rewards/margins": 1.1170275211334229, "rewards/rejected": -3.914860486984253, "step": 8760 }, { "epoch": 0.57, "learning_rate": 2.2926831129120523e-06, "logits/chosen": -2.1405370235443115, "logits/rejected": -1.6271158456802368, "logps/chosen": -491.77783203125, "logps/rejected": -587.8319702148438, "loss": 0.5622, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3106017112731934, "rewards/margins": 0.8187249898910522, "rewards/rejected": -3.129326581954956, "step": 8770 }, { "epoch": 0.57, "learning_rate": 2.2869934152394323e-06, "logits/chosen": -2.487213611602783, "logits/rejected": -2.0195982456207275, "logps/chosen": -612.7380981445312, "logps/rejected": -603.2962036132812, "loss": 0.5513, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.550365924835205, "rewards/margins": 0.7269454598426819, "rewards/rejected": -3.2773118019104004, "step": 8780 }, { "epoch": 0.58, "learning_rate": 2.281304828713501e-06, "logits/chosen": -2.2334394454956055, "logits/rejected": -1.6651496887207031, "logps/chosen": -595.5431518554688, "logps/rejected": -643.3426513671875, "loss": 0.5202, "rewards/accuracies": 0.75, "rewards/chosen": -2.3620035648345947, "rewards/margins": 1.4718233346939087, "rewards/rejected": -3.833826780319214, "step": 8790 }, { "epoch": 0.58, "learning_rate": 2.275617383008711e-06, "logits/chosen": -2.304520606994629, "logits/rejected": -1.7181825637817383, "logps/chosen": -570.1748046875, "logps/rejected": -560.0604858398438, "loss": 0.6478, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.6325135231018066, "rewards/margins": 0.6104687452316284, "rewards/rejected": -3.2429816722869873, "step": 8800 }, { "epoch": 0.58, "learning_rate": 2.269931107793567e-06, "logits/chosen": -1.7754180431365967, "logits/rejected": -1.9540001153945923, "logps/chosen": -511.11834716796875, "logps/rejected": -521.2728271484375, "loss": 0.5108, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5714993476867676, "rewards/margins": 0.6436102986335754, "rewards/rejected": -3.2151095867156982, "step": 8810 }, { "epoch": 0.58, "learning_rate": 2.2642460327304655e-06, "logits/chosen": -2.2850050926208496, "logits/rejected": -2.1239497661590576, "logps/chosen": -663.78466796875, "logps/rejected": -669.0499877929688, "loss": 0.5477, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.511131763458252, "rewards/margins": 0.9971479177474976, "rewards/rejected": -3.508279323577881, "step": 8820 }, { "epoch": 0.58, "learning_rate": 2.258562187475543e-06, "logits/chosen": -2.2841639518737793, "logits/rejected": -2.083108425140381, "logps/chosen": -518.9134521484375, "logps/rejected": -691.3600463867188, "loss": 0.3406, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.835007667541504, "rewards/margins": 1.801963448524475, "rewards/rejected": -3.6369712352752686, "step": 8830 }, { "epoch": 0.58, "learning_rate": 2.2528796016785196e-06, "logits/chosen": -2.287487030029297, "logits/rejected": -2.0592336654663086, "logps/chosen": -574.352783203125, "logps/rejected": -623.5216064453125, "loss": 0.4739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.657583713531494, "rewards/margins": 1.017406702041626, "rewards/rejected": -3.674990177154541, "step": 8840 }, { "epoch": 0.58, "learning_rate": 2.247198304982548e-06, "logits/chosen": -1.7157217264175415, "logits/rejected": -1.6318271160125732, "logps/chosen": -508.4742126464844, "logps/rejected": -640.5726318359375, "loss": 0.5662, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.0853447914123535, "rewards/margins": 1.3192092180252075, "rewards/rejected": -3.4045543670654297, "step": 8850 }, { "epoch": 0.58, "learning_rate": 2.2415183270240533e-06, "logits/chosen": -2.2637226581573486, "logits/rejected": -1.8087360858917236, "logps/chosen": -564.3175048828125, "logps/rejected": -577.1688842773438, "loss": 0.4104, "rewards/accuracies": 0.75, "rewards/chosen": -2.231301784515381, "rewards/margins": 1.0309956073760986, "rewards/rejected": -3.2622971534729004, "step": 8860 }, { "epoch": 0.58, "learning_rate": 2.2358396974325837e-06, "logits/chosen": -1.8857953548431396, "logits/rejected": -2.095496416091919, "logps/chosen": -644.1954345703125, "logps/rejected": -772.6640625, "loss": 0.5618, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6326816082000732, "rewards/margins": 0.6671141982078552, "rewards/rejected": -3.2997958660125732, "step": 8870 }, { "epoch": 0.58, "learning_rate": 2.2301624458306525e-06, "logits/chosen": -2.007840633392334, "logits/rejected": -1.8935134410858154, "logps/chosen": -596.2510375976562, "logps/rejected": -559.5049438476562, "loss": 0.7535, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.280665159225464, "rewards/margins": 0.4504348337650299, "rewards/rejected": -3.731100559234619, "step": 8880 }, { "epoch": 0.58, "learning_rate": 2.2244866018335855e-06, "logits/chosen": -1.8352820873260498, "logits/rejected": -1.730767011642456, "logps/chosen": -522.18212890625, "logps/rejected": -626.891845703125, "loss": 0.573, "rewards/accuracies": 0.75, "rewards/chosen": -2.457399845123291, "rewards/margins": 1.2562861442565918, "rewards/rejected": -3.713685989379883, "step": 8890 }, { "epoch": 0.58, "learning_rate": 2.2188121950493648e-06, "logits/chosen": -2.008808135986328, "logits/rejected": -1.604680061340332, "logps/chosen": -621.1892700195312, "logps/rejected": -616.54052734375, "loss": 0.5277, "rewards/accuracies": 0.75, "rewards/chosen": -3.031632900238037, "rewards/margins": 0.9094408750534058, "rewards/rejected": -3.9410743713378906, "step": 8900 }, { "epoch": 0.58, "learning_rate": 2.2131392550784766e-06, "logits/chosen": -2.350640058517456, "logits/rejected": -2.0902938842773438, "logps/chosen": -584.5798950195312, "logps/rejected": -655.9541015625, "loss": 0.5626, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2919318675994873, "rewards/margins": 1.0417548418045044, "rewards/rejected": -3.3336868286132812, "step": 8910 }, { "epoch": 0.58, "learning_rate": 2.2074678115137533e-06, "logits/chosen": -2.2224221229553223, "logits/rejected": -1.7978475093841553, "logps/chosen": -533.0066528320312, "logps/rejected": -592.8658447265625, "loss": 0.4884, "rewards/accuracies": 0.75, "rewards/chosen": -2.7765023708343506, "rewards/margins": 0.9631174206733704, "rewards/rejected": -3.739619493484497, "step": 8920 }, { "epoch": 0.58, "learning_rate": 2.201797893940224e-06, "logits/chosen": -2.1494221687316895, "logits/rejected": -1.9534950256347656, "logps/chosen": -535.6815185546875, "logps/rejected": -551.4397583007812, "loss": 0.7491, "rewards/accuracies": 0.75, "rewards/chosen": -2.32279896736145, "rewards/margins": 0.542815089225769, "rewards/rejected": -2.8656139373779297, "step": 8930 }, { "epoch": 0.58, "learning_rate": 2.196129531934956e-06, "logits/chosen": -1.8187973499298096, "logits/rejected": -1.8088195323944092, "logps/chosen": -587.8333129882812, "logps/rejected": -656.7921142578125, "loss": 0.3842, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.524751663208008, "rewards/margins": 1.7834068536758423, "rewards/rejected": -4.3081583976745605, "step": 8940 }, { "epoch": 0.59, "learning_rate": 2.190462755066902e-06, "logits/chosen": -2.367126941680908, "logits/rejected": -1.478229284286499, "logps/chosen": -559.9871215820312, "logps/rejected": -643.1048583984375, "loss": 0.4527, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1893820762634277, "rewards/margins": 1.1641318798065186, "rewards/rejected": -3.353513717651367, "step": 8950 }, { "epoch": 0.59, "learning_rate": 2.184797592896746e-06, "logits/chosen": -2.319800615310669, "logits/rejected": -1.4292646646499634, "logps/chosen": -589.6014404296875, "logps/rejected": -580.06640625, "loss": 0.4834, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.5645852088928223, "rewards/margins": 1.3727290630340576, "rewards/rejected": -3.93731427192688, "step": 8960 }, { "epoch": 0.59, "learning_rate": 2.17913407497675e-06, "logits/chosen": -1.7828247547149658, "logits/rejected": -1.5657999515533447, "logps/chosen": -552.4095458984375, "logps/rejected": -668.5966186523438, "loss": 0.5277, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7630295753479004, "rewards/margins": 1.062851071357727, "rewards/rejected": -3.825880765914917, "step": 8970 }, { "epoch": 0.59, "learning_rate": 2.173472230850596e-06, "logits/chosen": -1.9459810256958008, "logits/rejected": -1.230505347251892, "logps/chosen": -559.1304321289062, "logps/rejected": -572.7885131835938, "loss": 0.479, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3328442573547363, "rewards/margins": 1.3568191528320312, "rewards/rejected": -3.6896634101867676, "step": 8980 }, { "epoch": 0.59, "learning_rate": 2.1678120900532375e-06, "logits/chosen": -1.905078649520874, "logits/rejected": -1.9692771434783936, "logps/chosen": -591.5587768554688, "logps/rejected": -695.5579833984375, "loss": 0.4987, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7419822216033936, "rewards/margins": 1.1332275867462158, "rewards/rejected": -3.8752098083496094, "step": 8990 }, { "epoch": 0.59, "learning_rate": 2.1621536821107412e-06, "logits/chosen": -1.8924217224121094, "logits/rejected": -1.3386434316635132, "logps/chosen": -503.5406799316406, "logps/rejected": -694.8475341796875, "loss": 0.3399, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4634346961975098, "rewards/margins": 1.3909974098205566, "rewards/rejected": -3.8544325828552246, "step": 9000 }, { "epoch": 0.59, "eval_logits/chosen": -2.0022943019866943, "eval_logits/rejected": -1.820178747177124, "eval_logps/chosen": -542.1044921875, "eval_logps/rejected": -637.0339965820312, "eval_loss": 0.5424858331680298, "eval_rewards/accuracies": 0.7409999966621399, "eval_rewards/chosen": -2.46658992767334, "eval_rewards/margins": 1.1496917009353638, "eval_rewards/rejected": -3.616281509399414, "eval_runtime": 464.9712, "eval_samples_per_second": 4.301, "eval_steps_per_second": 2.151, "step": 9000 }, { "epoch": 0.59, "learning_rate": 2.1564970365401346e-06, "logits/chosen": -2.1641273498535156, "logits/rejected": -1.9229265451431274, "logps/chosen": -520.37255859375, "logps/rejected": -689.4486083984375, "loss": 0.484, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.614044189453125, "rewards/margins": 1.3849103450775146, "rewards/rejected": -3.9989547729492188, "step": 9010 }, { "epoch": 0.59, "learning_rate": 2.1508421828492527e-06, "logits/chosen": -1.9114001989364624, "logits/rejected": -2.0205748081207275, "logps/chosen": -548.467529296875, "logps/rejected": -674.73779296875, "loss": 0.5263, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2811312675476074, "rewards/margins": 1.1583563089370728, "rewards/rejected": -3.4394874572753906, "step": 9020 }, { "epoch": 0.59, "learning_rate": 2.145189150536582e-06, "logits/chosen": -1.9899876117706299, "logits/rejected": -2.2059924602508545, "logps/chosen": -585.5611572265625, "logps/rejected": -647.5160522460938, "loss": 0.5881, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4526724815368652, "rewards/margins": 0.7929941415786743, "rewards/rejected": -3.245666980743408, "step": 9030 }, { "epoch": 0.59, "learning_rate": 2.139537969091107e-06, "logits/chosen": -1.997889757156372, "logits/rejected": -1.4408007860183716, "logps/chosen": -589.4309692382812, "logps/rejected": -603.2365112304688, "loss": 0.661, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.2231433391571045, "rewards/margins": 0.8774444460868835, "rewards/rejected": -4.100587844848633, "step": 9040 }, { "epoch": 0.59, "learning_rate": 2.1338886679921603e-06, "logits/chosen": -2.291400194168091, "logits/rejected": -1.6803371906280518, "logps/chosen": -533.8601684570312, "logps/rejected": -636.6980590820312, "loss": 0.5748, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5815556049346924, "rewards/margins": 0.851997971534729, "rewards/rejected": -3.4335532188415527, "step": 9050 }, { "epoch": 0.59, "learning_rate": 2.128241276709263e-06, "logits/chosen": -2.2793195247650146, "logits/rejected": -2.075969934463501, "logps/chosen": -532.822265625, "logps/rejected": -600.0733032226562, "loss": 0.4538, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9714977741241455, "rewards/margins": 1.158085823059082, "rewards/rejected": -3.1295838356018066, "step": 9060 }, { "epoch": 0.59, "learning_rate": 2.1225958247019746e-06, "logits/chosen": -2.2013304233551025, "logits/rejected": -1.8609364032745361, "logps/chosen": -447.49896240234375, "logps/rejected": -562.5231323242188, "loss": 0.4513, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9393889904022217, "rewards/margins": 1.7084639072418213, "rewards/rejected": -3.647852659225464, "step": 9070 }, { "epoch": 0.59, "learning_rate": 2.1169523414197383e-06, "logits/chosen": -2.081878662109375, "logits/rejected": -1.931777000427246, "logps/chosen": -563.480712890625, "logps/rejected": -615.7371215820312, "loss": 0.478, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9521706104278564, "rewards/margins": 1.0098683834075928, "rewards/rejected": -2.96203875541687, "step": 9080 }, { "epoch": 0.59, "learning_rate": 2.1113108563017267e-06, "logits/chosen": -2.106600046157837, "logits/rejected": -1.7770551443099976, "logps/chosen": -617.9705810546875, "logps/rejected": -740.1712646484375, "loss": 0.6053, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3663182258605957, "rewards/margins": 0.9891064763069153, "rewards/rejected": -3.3554248809814453, "step": 9090 }, { "epoch": 0.6, "learning_rate": 2.1056713987766905e-06, "logits/chosen": -1.7630764245986938, "logits/rejected": -1.5825989246368408, "logps/chosen": -464.11767578125, "logps/rejected": -558.0419921875, "loss": 0.5286, "rewards/accuracies": 0.75, "rewards/chosen": -3.2644290924072266, "rewards/margins": 1.071807622909546, "rewards/rejected": -4.336236476898193, "step": 9100 }, { "epoch": 0.6, "learning_rate": 2.1000339982628022e-06, "logits/chosen": -1.8941562175750732, "logits/rejected": -1.6353633403778076, "logps/chosen": -608.3696899414062, "logps/rejected": -621.0068359375, "loss": 0.6304, "rewards/accuracies": 0.75, "rewards/chosen": -2.9169669151306152, "rewards/margins": 0.7194188237190247, "rewards/rejected": -3.636385679244995, "step": 9110 }, { "epoch": 0.6, "learning_rate": 2.0943986841675043e-06, "logits/chosen": -2.0865185260772705, "logits/rejected": -2.3169753551483154, "logps/chosen": -589.1450805664062, "logps/rejected": -657.9476318359375, "loss": 0.6209, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.759594440460205, "rewards/margins": 0.7825464010238647, "rewards/rejected": -3.5421414375305176, "step": 9120 }, { "epoch": 0.6, "learning_rate": 2.088765485887356e-06, "logits/chosen": -2.305544376373291, "logits/rejected": -1.736236333847046, "logps/chosen": -502.2201232910156, "logps/rejected": -603.9275512695312, "loss": 0.3673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.0719895362854004, "rewards/margins": 1.5007104873657227, "rewards/rejected": -3.572699785232544, "step": 9130 }, { "epoch": 0.6, "learning_rate": 2.083134432807879e-06, "logits/chosen": -2.0251638889312744, "logits/rejected": -2.3476920127868652, "logps/chosen": -565.7601318359375, "logps/rejected": -609.1180419921875, "loss": 0.6335, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.030787706375122, "rewards/margins": 1.0215563774108887, "rewards/rejected": -3.0523440837860107, "step": 9140 }, { "epoch": 0.6, "learning_rate": 2.077505554303404e-06, "logits/chosen": -2.367225408554077, "logits/rejected": -1.573440432548523, "logps/chosen": -550.5197143554688, "logps/rejected": -619.6304931640625, "loss": 0.483, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.22603702545166, "rewards/margins": 1.6922342777252197, "rewards/rejected": -3.9182708263397217, "step": 9150 }, { "epoch": 0.6, "learning_rate": 2.071878879736918e-06, "logits/chosen": -2.223241090774536, "logits/rejected": -1.6001701354980469, "logps/chosen": -615.3475952148438, "logps/rejected": -580.7982177734375, "loss": 0.7404, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.9252007007598877, "rewards/margins": 0.4547927975654602, "rewards/rejected": -3.379993438720703, "step": 9160 }, { "epoch": 0.6, "learning_rate": 2.0662544384599136e-06, "logits/chosen": -1.8714954853057861, "logits/rejected": -1.802222490310669, "logps/chosen": -539.7855224609375, "logps/rejected": -655.821533203125, "loss": 0.4154, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3042709827423096, "rewards/margins": 1.7545344829559326, "rewards/rejected": -4.058804988861084, "step": 9170 }, { "epoch": 0.6, "learning_rate": 2.0606322598122314e-06, "logits/chosen": -2.063321828842163, "logits/rejected": -2.150560140609741, "logps/chosen": -515.9948120117188, "logps/rejected": -631.8154907226562, "loss": 0.5244, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2739298343658447, "rewards/margins": 1.2714461088180542, "rewards/rejected": -3.5453758239746094, "step": 9180 }, { "epoch": 0.6, "learning_rate": 2.0550123731219085e-06, "logits/chosen": -2.152060031890869, "logits/rejected": -1.9297168254852295, "logps/chosen": -564.4265747070312, "logps/rejected": -679.7371826171875, "loss": 0.645, "rewards/accuracies": 0.75, "rewards/chosen": -2.527137279510498, "rewards/margins": 1.0509201288223267, "rewards/rejected": -3.5780577659606934, "step": 9190 }, { "epoch": 0.6, "learning_rate": 2.0493948077050267e-06, "logits/chosen": -2.3841280937194824, "logits/rejected": -1.8136011362075806, "logps/chosen": -605.9197387695312, "logps/rejected": -682.6570434570312, "loss": 0.638, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.816260576248169, "rewards/margins": 1.0166782140731812, "rewards/rejected": -3.8329384326934814, "step": 9200 }, { "epoch": 0.6, "learning_rate": 2.0437795928655596e-06, "logits/chosen": -1.8025420904159546, "logits/rejected": -1.828176736831665, "logps/chosen": -525.0386352539062, "logps/rejected": -610.8030395507812, "loss": 0.577, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5464606285095215, "rewards/margins": 0.6083902716636658, "rewards/rejected": -3.154850959777832, "step": 9210 }, { "epoch": 0.6, "learning_rate": 2.0381667578952184e-06, "logits/chosen": -1.8286422491073608, "logits/rejected": -1.7795965671539307, "logps/chosen": -505.29742431640625, "logps/rejected": -559.3614501953125, "loss": 0.6331, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -3.038729667663574, "rewards/margins": 0.616546094417572, "rewards/rejected": -3.65527606010437, "step": 9220 }, { "epoch": 0.6, "learning_rate": 2.0325563320732995e-06, "logits/chosen": -2.0447893142700195, "logits/rejected": -1.742436408996582, "logps/chosen": -467.5716247558594, "logps/rejected": -578.86181640625, "loss": 0.5913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6618003845214844, "rewards/margins": 0.9829275012016296, "rewards/rejected": -3.6447277069091797, "step": 9230 }, { "epoch": 0.6, "learning_rate": 2.026948344666532e-06, "logits/chosen": -2.147611379623413, "logits/rejected": -1.6617320775985718, "logps/chosen": -566.64453125, "logps/rejected": -610.9962768554688, "loss": 0.4258, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.365844488143921, "rewards/margins": 1.2829093933105469, "rewards/rejected": -3.6487536430358887, "step": 9240 }, { "epoch": 0.61, "learning_rate": 2.0213428249289257e-06, "logits/chosen": -2.0575060844421387, "logits/rejected": -1.766715407371521, "logps/chosen": -520.9926147460938, "logps/rejected": -624.228515625, "loss": 0.5106, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6971800327301025, "rewards/margins": 0.8146166801452637, "rewards/rejected": -3.5117969512939453, "step": 9250 }, { "epoch": 0.61, "learning_rate": 2.0157398021016175e-06, "logits/chosen": -2.0321450233459473, "logits/rejected": -1.6661357879638672, "logps/chosen": -520.1958618164062, "logps/rejected": -647.04052734375, "loss": 0.5692, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.661426067352295, "rewards/margins": 0.8946744799613953, "rewards/rejected": -3.556100368499756, "step": 9260 }, { "epoch": 0.61, "learning_rate": 2.010139305412719e-06, "logits/chosen": -1.6539812088012695, "logits/rejected": -1.6137405633926392, "logps/chosen": -604.7357177734375, "logps/rejected": -658.6666870117188, "loss": 0.5299, "rewards/accuracies": 0.75, "rewards/chosen": -3.1034462451934814, "rewards/margins": 1.02749502658844, "rewards/rejected": -4.130940914154053, "step": 9270 }, { "epoch": 0.61, "learning_rate": 2.0045413640771644e-06, "logits/chosen": -1.9617812633514404, "logits/rejected": -1.632447600364685, "logps/chosen": -478.29766845703125, "logps/rejected": -611.6834716796875, "loss": 0.56, "rewards/accuracies": 0.75, "rewards/chosen": -2.4144785404205322, "rewards/margins": 1.2469799518585205, "rewards/rejected": -3.6614582538604736, "step": 9280 }, { "epoch": 0.61, "learning_rate": 1.998946007296558e-06, "logits/chosen": -2.048708915710449, "logits/rejected": -1.7788597345352173, "logps/chosen": -515.7967529296875, "logps/rejected": -620.9700317382812, "loss": 0.5435, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.980874538421631, "rewards/margins": 0.6445003151893616, "rewards/rejected": -3.6253750324249268, "step": 9290 }, { "epoch": 0.61, "learning_rate": 1.9933532642590215e-06, "logits/chosen": -1.670326590538025, "logits/rejected": -0.9932015538215637, "logps/chosen": -559.5738525390625, "logps/rejected": -637.4277954101562, "loss": 0.5583, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1597964763641357, "rewards/margins": 0.7928120493888855, "rewards/rejected": -3.952608585357666, "step": 9300 }, { "epoch": 0.61, "learning_rate": 1.987763164139042e-06, "logits/chosen": -1.5591435432434082, "logits/rejected": -1.7700544595718384, "logps/chosen": -441.77294921875, "logps/rejected": -577.7332153320312, "loss": 0.4779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2711105346679688, "rewards/margins": 1.0710574388504028, "rewards/rejected": -3.342167615890503, "step": 9310 }, { "epoch": 0.61, "learning_rate": 1.982175736097321e-06, "logits/chosen": -1.8563480377197266, "logits/rejected": -1.8127315044403076, "logps/chosen": -562.796875, "logps/rejected": -550.6398315429688, "loss": 0.6156, "rewards/accuracies": 0.75, "rewards/chosen": -2.2904465198516846, "rewards/margins": 0.5200685262680054, "rewards/rejected": -2.8105149269104004, "step": 9320 }, { "epoch": 0.61, "learning_rate": 1.9765910092806196e-06, "logits/chosen": -1.920570731163025, "logits/rejected": -1.916369080543518, "logps/chosen": -608.796875, "logps/rejected": -609.5429077148438, "loss": 0.5016, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.900730848312378, "rewards/margins": 0.667815625667572, "rewards/rejected": -3.568546772003174, "step": 9330 }, { "epoch": 0.61, "learning_rate": 1.9710090128216083e-06, "logits/chosen": -2.0497214794158936, "logits/rejected": -1.7055015563964844, "logps/chosen": -491.4339904785156, "logps/rejected": -572.2652587890625, "loss": 0.6743, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.984422206878662, "rewards/margins": 0.7262072563171387, "rewards/rejected": -3.710629940032959, "step": 9340 }, { "epoch": 0.61, "learning_rate": 1.9654297758387155e-06, "logits/chosen": -2.1629855632781982, "logits/rejected": -2.0053813457489014, "logps/chosen": -576.7559814453125, "logps/rejected": -723.0887451171875, "loss": 0.469, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.744321346282959, "rewards/margins": 1.1882708072662354, "rewards/rejected": -3.9325923919677734, "step": 9350 }, { "epoch": 0.61, "learning_rate": 1.9598533274359736e-06, "logits/chosen": -2.2159430980682373, "logits/rejected": -1.965995192527771, "logps/chosen": -522.80419921875, "logps/rejected": -573.6424560546875, "loss": 0.553, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6606602668762207, "rewards/margins": 0.6701160073280334, "rewards/rejected": -3.330775737762451, "step": 9360 }, { "epoch": 0.61, "learning_rate": 1.9542796967028697e-06, "logits/chosen": -2.061631441116333, "logits/rejected": -1.8505007028579712, "logps/chosen": -628.1642456054688, "logps/rejected": -639.5188598632812, "loss": 0.6624, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.1050140857696533, "rewards/margins": 1.1158678531646729, "rewards/rejected": -3.220881700515747, "step": 9370 }, { "epoch": 0.61, "learning_rate": 1.948708912714192e-06, "logits/chosen": -1.7678762674331665, "logits/rejected": -1.9105870723724365, "logps/chosen": -540.94091796875, "logps/rejected": -547.921142578125, "loss": 0.6858, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.675358295440674, "rewards/margins": 0.9142533540725708, "rewards/rejected": -3.589611768722534, "step": 9380 }, { "epoch": 0.61, "learning_rate": 1.9431410045298786e-06, "logits/chosen": -2.333967685699463, "logits/rejected": -2.0058631896972656, "logps/chosen": -558.5092163085938, "logps/rejected": -702.185791015625, "loss": 0.4988, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4004080295562744, "rewards/margins": 1.0809009075164795, "rewards/rejected": -3.481309175491333, "step": 9390 }, { "epoch": 0.62, "learning_rate": 1.9375760011948654e-06, "logits/chosen": -2.1702075004577637, "logits/rejected": -1.756667137145996, "logps/chosen": -627.5929565429688, "logps/rejected": -633.3089599609375, "loss": 0.5063, "rewards/accuracies": 0.75, "rewards/chosen": -2.946218729019165, "rewards/margins": 0.907780647277832, "rewards/rejected": -3.853998899459839, "step": 9400 }, { "epoch": 0.62, "learning_rate": 1.932013931738937e-06, "logits/chosen": -2.0589101314544678, "logits/rejected": -1.8918129205703735, "logps/chosen": -520.4500732421875, "logps/rejected": -549.6915283203125, "loss": 0.3908, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3042593002319336, "rewards/margins": 0.8746353387832642, "rewards/rejected": -3.178894519805908, "step": 9410 }, { "epoch": 0.62, "learning_rate": 1.9264548251765717e-06, "logits/chosen": -2.0337188243865967, "logits/rejected": -1.747370958328247, "logps/chosen": -567.445068359375, "logps/rejected": -643.7906494140625, "loss": 0.5603, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.6033923625946045, "rewards/margins": 1.404759168624878, "rewards/rejected": -4.008151531219482, "step": 9420 }, { "epoch": 0.62, "learning_rate": 1.9208987105067924e-06, "logits/chosen": -1.8125396966934204, "logits/rejected": -1.8225065469741821, "logps/chosen": -601.48876953125, "logps/rejected": -971.3978271484375, "loss": 0.4689, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6848011016845703, "rewards/margins": 2.0545504093170166, "rewards/rejected": -4.739351749420166, "step": 9430 }, { "epoch": 0.62, "learning_rate": 1.9153456167130154e-06, "logits/chosen": -2.2428884506225586, "logits/rejected": -2.1689774990081787, "logps/chosen": -542.6967163085938, "logps/rejected": -516.2056884765625, "loss": 0.5726, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.138211965560913, "rewards/margins": 1.0557899475097656, "rewards/rejected": -3.1940014362335205, "step": 9440 }, { "epoch": 0.62, "learning_rate": 1.9097955727628975e-06, "logits/chosen": -2.2795727252960205, "logits/rejected": -2.303039073944092, "logps/chosen": -602.3601684570312, "logps/rejected": -527.6361083984375, "loss": 0.6025, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.211545705795288, "rewards/margins": 0.08048735558986664, "rewards/rejected": -2.2920329570770264, "step": 9450 }, { "epoch": 0.62, "learning_rate": 1.904248607608187e-06, "logits/chosen": -2.111039400100708, "logits/rejected": -1.5743972063064575, "logps/chosen": -533.7442626953125, "logps/rejected": -579.7639770507812, "loss": 0.5136, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.898390531539917, "rewards/margins": 0.8783761262893677, "rewards/rejected": -3.776766300201416, "step": 9460 }, { "epoch": 0.62, "learning_rate": 1.8987047501845714e-06, "logits/chosen": -2.220080852508545, "logits/rejected": -2.1884331703186035, "logps/chosen": -537.1690673828125, "logps/rejected": -652.2893676757812, "loss": 0.5149, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.9553630352020264, "rewards/margins": 0.9699887037277222, "rewards/rejected": -2.925351619720459, "step": 9470 }, { "epoch": 0.62, "learning_rate": 1.8931640294115267e-06, "logits/chosen": -2.340770959854126, "logits/rejected": -1.3254892826080322, "logps/chosen": -550.0404052734375, "logps/rejected": -653.9843139648438, "loss": 0.4111, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.198329448699951, "rewards/margins": 1.6379525661468506, "rewards/rejected": -3.8362820148468018, "step": 9480 }, { "epoch": 0.62, "learning_rate": 1.8876264741921662e-06, "logits/chosen": -2.047311305999756, "logits/rejected": -1.9576759338378906, "logps/chosen": -566.8899536132812, "logps/rejected": -596.0674438476562, "loss": 0.7663, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.7725155353546143, "rewards/margins": 0.497785747051239, "rewards/rejected": -3.270301103591919, "step": 9490 }, { "epoch": 0.62, "learning_rate": 1.8820921134130912e-06, "logits/chosen": -2.081411600112915, "logits/rejected": -1.9306904077529907, "logps/chosen": -478.1241760253906, "logps/rejected": -616.6199951171875, "loss": 0.5073, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.968158483505249, "rewards/margins": 1.5478565692901611, "rewards/rejected": -3.516014814376831, "step": 9500 }, { "epoch": 0.62, "learning_rate": 1.8765609759442378e-06, "logits/chosen": -1.7763296365737915, "logits/rejected": -1.8750178813934326, "logps/chosen": -628.099609375, "logps/rejected": -672.4976806640625, "loss": 0.5003, "rewards/accuracies": 0.75, "rewards/chosen": -2.796745777130127, "rewards/margins": 0.9968738555908203, "rewards/rejected": -3.7936196327209473, "step": 9510 }, { "epoch": 0.62, "learning_rate": 1.8710330906387288e-06, "logits/chosen": -1.9623371362686157, "logits/rejected": -1.1978862285614014, "logps/chosen": -513.8328857421875, "logps/rejected": -598.662109375, "loss": 0.5912, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5714448690414429, "rewards/margins": 1.6823393106460571, "rewards/rejected": -3.2537841796875, "step": 9520 }, { "epoch": 0.62, "learning_rate": 1.8655084863327222e-06, "logits/chosen": -1.8784821033477783, "logits/rejected": -1.9442542791366577, "logps/chosen": -518.8257446289062, "logps/rejected": -678.2208251953125, "loss": 0.4694, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6659276485443115, "rewards/margins": 0.6673368811607361, "rewards/rejected": -3.3332645893096924, "step": 9530 }, { "epoch": 0.62, "learning_rate": 1.8599871918452603e-06, "logits/chosen": -1.886810302734375, "logits/rejected": -2.0606863498687744, "logps/chosen": -558.5216064453125, "logps/rejected": -647.117919921875, "loss": 0.4529, "rewards/accuracies": 0.75, "rewards/chosen": -2.2705154418945312, "rewards/margins": 1.0462725162506104, "rewards/rejected": -3.3167881965637207, "step": 9540 }, { "epoch": 0.62, "learning_rate": 1.8544692359781192e-06, "logits/chosen": -2.2128937244415283, "logits/rejected": -2.2743053436279297, "logps/chosen": -486.0628967285156, "logps/rejected": -597.64599609375, "loss": 0.5026, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9931297302246094, "rewards/margins": 1.2624160051345825, "rewards/rejected": -3.2555458545684814, "step": 9550 }, { "epoch": 0.63, "learning_rate": 1.8489546475156602e-06, "logits/chosen": -1.8360512256622314, "logits/rejected": -1.9772145748138428, "logps/chosen": -470.145263671875, "logps/rejected": -592.4556884765625, "loss": 0.5319, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.4911038875579834, "rewards/margins": 0.7783809900283813, "rewards/rejected": -3.2694849967956543, "step": 9560 }, { "epoch": 0.63, "learning_rate": 1.8434434552246778e-06, "logits/chosen": -1.7942237854003906, "logits/rejected": -2.0650551319122314, "logps/chosen": -463.28192138671875, "logps/rejected": -603.6610717773438, "loss": 0.4579, "rewards/accuracies": 0.75, "rewards/chosen": -2.378836154937744, "rewards/margins": 0.7164266705513, "rewards/rejected": -3.0952630043029785, "step": 9570 }, { "epoch": 0.63, "learning_rate": 1.837935687854251e-06, "logits/chosen": -2.18629789352417, "logits/rejected": -2.0240304470062256, "logps/chosen": -573.1627197265625, "logps/rejected": -728.3543701171875, "loss": 0.4139, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.989349126815796, "rewards/margins": 1.6226472854614258, "rewards/rejected": -3.6119964122772217, "step": 9580 }, { "epoch": 0.63, "learning_rate": 1.832431374135592e-06, "logits/chosen": -1.8767648935317993, "logits/rejected": -1.5892021656036377, "logps/chosen": -476.7618103027344, "logps/rejected": -605.6596069335938, "loss": 0.474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.086083173751831, "rewards/margins": 1.416338562965393, "rewards/rejected": -3.5024218559265137, "step": 9590 }, { "epoch": 0.63, "learning_rate": 1.8269305427818977e-06, "logits/chosen": -2.020125150680542, "logits/rejected": -2.155333995819092, "logps/chosen": -516.5859985351562, "logps/rejected": -632.4111938476562, "loss": 0.4812, "rewards/accuracies": 0.75, "rewards/chosen": -2.2657718658447266, "rewards/margins": 1.0013291835784912, "rewards/rejected": -3.267101287841797, "step": 9600 }, { "epoch": 0.63, "learning_rate": 1.821433222488199e-06, "logits/chosen": -2.1384525299072266, "logits/rejected": -1.779611587524414, "logps/chosen": -508.69366455078125, "logps/rejected": -654.6775512695312, "loss": 0.5838, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3500173091888428, "rewards/margins": 1.4160211086273193, "rewards/rejected": -3.766038417816162, "step": 9610 }, { "epoch": 0.63, "learning_rate": 1.8159394419312112e-06, "logits/chosen": -2.125851631164551, "logits/rejected": -1.3714392185211182, "logps/chosen": -589.6465454101562, "logps/rejected": -636.9295043945312, "loss": 0.3293, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6726527214050293, "rewards/margins": 1.416653037071228, "rewards/rejected": -4.089305877685547, "step": 9620 }, { "epoch": 0.63, "learning_rate": 1.8104492297691845e-06, "logits/chosen": -2.600778102874756, "logits/rejected": -2.02959942817688, "logps/chosen": -728.420166015625, "logps/rejected": -682.1693725585938, "loss": 0.5269, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0615074634552, "rewards/margins": 1.0232912302017212, "rewards/rejected": -3.084798574447632, "step": 9630 }, { "epoch": 0.63, "learning_rate": 1.8049626146417562e-06, "logits/chosen": -1.8293802738189697, "logits/rejected": -1.7707229852676392, "logps/chosen": -459.9698791503906, "logps/rejected": -655.4939575195312, "loss": 0.5349, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5275912284851074, "rewards/margins": 2.15377140045166, "rewards/rejected": -4.681363105773926, "step": 9640 }, { "epoch": 0.63, "learning_rate": 1.7994796251697983e-06, "logits/chosen": -2.037261486053467, "logits/rejected": -2.1613070964813232, "logps/chosen": -496.0302734375, "logps/rejected": -599.44189453125, "loss": 0.5668, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.432239055633545, "rewards/margins": 0.7840698957443237, "rewards/rejected": -3.21630859375, "step": 9650 }, { "epoch": 0.63, "learning_rate": 1.794000289955269e-06, "logits/chosen": -1.9210374355316162, "logits/rejected": -2.0788938999176025, "logps/chosen": -522.751708984375, "logps/rejected": -671.2202758789062, "loss": 0.6424, "rewards/accuracies": 0.75, "rewards/chosen": -2.7306416034698486, "rewards/margins": 0.8474380373954773, "rewards/rejected": -3.57807993888855, "step": 9660 }, { "epoch": 0.63, "learning_rate": 1.7885246375810646e-06, "logits/chosen": -2.1408586502075195, "logits/rejected": -2.3029913902282715, "logps/chosen": -589.8209838867188, "logps/rejected": -596.8001708984375, "loss": 0.5855, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9752256870269775, "rewards/margins": 0.6550930738449097, "rewards/rejected": -3.6303184032440186, "step": 9670 }, { "epoch": 0.63, "learning_rate": 1.7830526966108713e-06, "logits/chosen": -1.8824348449707031, "logits/rejected": -1.7505286931991577, "logps/chosen": -562.9407958984375, "logps/rejected": -667.536865234375, "loss": 0.3848, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7581934928894043, "rewards/margins": 1.1899961233139038, "rewards/rejected": -3.9481894969940186, "step": 9680 }, { "epoch": 0.63, "learning_rate": 1.7775844955890129e-06, "logits/chosen": -2.3579797744750977, "logits/rejected": -1.6902892589569092, "logps/chosen": -620.387451171875, "logps/rejected": -675.849365234375, "loss": 0.5617, "rewards/accuracies": 0.75, "rewards/chosen": -2.790555715560913, "rewards/margins": 1.1491975784301758, "rewards/rejected": -3.939753770828247, "step": 9690 }, { "epoch": 0.63, "learning_rate": 1.7721200630403046e-06, "logits/chosen": -2.2002413272857666, "logits/rejected": -2.145082950592041, "logps/chosen": -572.17919921875, "logps/rejected": -632.3953247070312, "loss": 0.5239, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.124516725540161, "rewards/margins": 1.1394758224487305, "rewards/rejected": -3.2639923095703125, "step": 9700 }, { "epoch": 0.64, "learning_rate": 1.7666594274699037e-06, "logits/chosen": -1.9743484258651733, "logits/rejected": -1.4204148054122925, "logps/chosen": -544.8038330078125, "logps/rejected": -578.5160522460938, "loss": 0.4681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.116889715194702, "rewards/margins": 0.9321670532226562, "rewards/rejected": -4.049056529998779, "step": 9710 }, { "epoch": 0.64, "learning_rate": 1.76120261736316e-06, "logits/chosen": -1.8082326650619507, "logits/rejected": -1.49305260181427, "logps/chosen": -551.1629638671875, "logps/rejected": -637.9384155273438, "loss": 0.7645, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.505378007888794, "rewards/margins": 0.28692299127578735, "rewards/rejected": -3.7923007011413574, "step": 9720 }, { "epoch": 0.64, "learning_rate": 1.755749661185468e-06, "logits/chosen": -2.0321710109710693, "logits/rejected": -1.9102131128311157, "logps/chosen": -550.4937133789062, "logps/rejected": -652.8010864257812, "loss": 0.4133, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.035109281539917, "rewards/margins": 1.307720422744751, "rewards/rejected": -3.342829942703247, "step": 9730 }, { "epoch": 0.64, "learning_rate": 1.7503005873821183e-06, "logits/chosen": -1.872251272201538, "logits/rejected": -1.6513566970825195, "logps/chosen": -566.5404663085938, "logps/rejected": -628.6839599609375, "loss": 0.5487, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.969191312789917, "rewards/margins": 0.6317653656005859, "rewards/rejected": -3.600956678390503, "step": 9740 }, { "epoch": 0.64, "learning_rate": 1.744855424378148e-06, "logits/chosen": -2.311882495880127, "logits/rejected": -1.8144493103027344, "logps/chosen": -566.9302368164062, "logps/rejected": -618.9942626953125, "loss": 0.5155, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.352794885635376, "rewards/margins": 1.1586731672286987, "rewards/rejected": -3.5114681720733643, "step": 9750 }, { "epoch": 0.64, "learning_rate": 1.7394142005781973e-06, "logits/chosen": -2.0415024757385254, "logits/rejected": -1.596631646156311, "logps/chosen": -515.461669921875, "logps/rejected": -670.7274169921875, "loss": 0.5212, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.803544521331787, "rewards/margins": 1.0397040843963623, "rewards/rejected": -3.8432490825653076, "step": 9760 }, { "epoch": 0.64, "learning_rate": 1.7339769443663528e-06, "logits/chosen": -2.1776394844055176, "logits/rejected": -2.0415406227111816, "logps/chosen": -502.97320556640625, "logps/rejected": -607.7816772460938, "loss": 0.3718, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.1641182899475098, "rewards/margins": 1.462161660194397, "rewards/rejected": -3.626279830932617, "step": 9770 }, { "epoch": 0.64, "learning_rate": 1.7285436841060078e-06, "logits/chosen": -2.3086421489715576, "logits/rejected": -2.1996123790740967, "logps/chosen": -667.8643798828125, "logps/rejected": -674.9031982421875, "loss": 0.5214, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.371945858001709, "rewards/margins": 0.7595013380050659, "rewards/rejected": -3.1314473152160645, "step": 9780 }, { "epoch": 0.64, "learning_rate": 1.7231144481397083e-06, "logits/chosen": -2.240018129348755, "logits/rejected": -1.5356919765472412, "logps/chosen": -632.2506103515625, "logps/rejected": -654.18994140625, "loss": 0.5387, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7113661766052246, "rewards/margins": 0.6184327006340027, "rewards/rejected": -3.329799175262451, "step": 9790 }, { "epoch": 0.64, "learning_rate": 1.7176892647890092e-06, "logits/chosen": -2.3555355072021484, "logits/rejected": -1.7661718130111694, "logps/chosen": -566.7734985351562, "logps/rejected": -609.5919189453125, "loss": 0.5801, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.8732264041900635, "rewards/margins": 1.266191840171814, "rewards/rejected": -3.139418125152588, "step": 9800 }, { "epoch": 0.64, "learning_rate": 1.7122681623543239e-06, "logits/chosen": -2.204519748687744, "logits/rejected": -1.31349778175354, "logps/chosen": -583.7943115234375, "logps/rejected": -637.7953491210938, "loss": 0.6432, "rewards/accuracies": 0.75, "rewards/chosen": -3.27911376953125, "rewards/margins": 1.301857590675354, "rewards/rejected": -4.5809712409973145, "step": 9810 }, { "epoch": 0.64, "learning_rate": 1.7068511691147788e-06, "logits/chosen": -2.087271213531494, "logits/rejected": -1.6732345819473267, "logps/chosen": -505.6923828125, "logps/rejected": -655.382568359375, "loss": 0.4241, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.321934461593628, "rewards/margins": 1.5012768507003784, "rewards/rejected": -3.823211669921875, "step": 9820 }, { "epoch": 0.64, "learning_rate": 1.7014383133280636e-06, "logits/chosen": -1.8397657871246338, "logits/rejected": -1.8332821130752563, "logps/chosen": -583.0677490234375, "logps/rejected": -586.8565673828125, "loss": 0.5852, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8137660026550293, "rewards/margins": 0.7917875051498413, "rewards/rejected": -3.605553388595581, "step": 9830 }, { "epoch": 0.64, "learning_rate": 1.696029623230286e-06, "logits/chosen": -1.8087882995605469, "logits/rejected": -2.0043318271636963, "logps/chosen": -473.6741638183594, "logps/rejected": -636.8434448242188, "loss": 0.4475, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.895564556121826, "rewards/margins": 1.1002885103225708, "rewards/rejected": -3.995853900909424, "step": 9840 }, { "epoch": 0.64, "learning_rate": 1.6906251270358229e-06, "logits/chosen": -2.3192977905273438, "logits/rejected": -1.9405720233917236, "logps/chosen": -591.8472900390625, "logps/rejected": -636.525146484375, "loss": 0.5559, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4123001098632812, "rewards/margins": 1.067739486694336, "rewards/rejected": -3.480039119720459, "step": 9850 }, { "epoch": 0.65, "learning_rate": 1.685224852937174e-06, "logits/chosen": -1.7449123859405518, "logits/rejected": -1.7458549737930298, "logps/chosen": -437.24456787109375, "logps/rejected": -725.3290405273438, "loss": 0.4406, "rewards/accuracies": 0.75, "rewards/chosen": -2.4435203075408936, "rewards/margins": 1.7831284999847412, "rewards/rejected": -4.226648807525635, "step": 9860 }, { "epoch": 0.65, "learning_rate": 1.6798288291048136e-06, "logits/chosen": -2.501311779022217, "logits/rejected": -2.0939345359802246, "logps/chosen": -609.2479858398438, "logps/rejected": -655.9998779296875, "loss": 0.5679, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.2404470443725586, "rewards/margins": 1.110357642173767, "rewards/rejected": -3.350804567337036, "step": 9870 }, { "epoch": 0.65, "learning_rate": 1.6744370836870466e-06, "logits/chosen": -2.0165457725524902, "logits/rejected": -2.0733141899108887, "logps/chosen": -537.8299560546875, "logps/rejected": -608.09765625, "loss": 0.4918, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3343470096588135, "rewards/margins": 1.3215583562850952, "rewards/rejected": -3.6559054851531982, "step": 9880 }, { "epoch": 0.65, "learning_rate": 1.6690496448098576e-06, "logits/chosen": -1.6395372152328491, "logits/rejected": -1.5579493045806885, "logps/chosen": -574.619140625, "logps/rejected": -686.0782470703125, "loss": 0.5562, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.0321133136749268, "rewards/margins": 1.00796377658844, "rewards/rejected": -4.040077209472656, "step": 9890 }, { "epoch": 0.65, "learning_rate": 1.6636665405767666e-06, "logits/chosen": -2.1289894580841064, "logits/rejected": -1.7371848821640015, "logps/chosen": -671.19921875, "logps/rejected": -714.276611328125, "loss": 0.6902, "rewards/accuracies": 0.5, "rewards/chosen": -2.9874866008758545, "rewards/margins": 0.4317949712276459, "rewards/rejected": -3.4192817211151123, "step": 9900 }, { "epoch": 0.65, "learning_rate": 1.6582877990686827e-06, "logits/chosen": -2.2885613441467285, "logits/rejected": -1.443866491317749, "logps/chosen": -533.60400390625, "logps/rejected": -611.7394409179688, "loss": 0.438, "rewards/accuracies": 0.75, "rewards/chosen": -2.6467955112457275, "rewards/margins": 1.2188901901245117, "rewards/rejected": -3.8656857013702393, "step": 9910 }, { "epoch": 0.65, "learning_rate": 1.6529134483437562e-06, "logits/chosen": -1.6534805297851562, "logits/rejected": -1.2551298141479492, "logps/chosen": -536.8887939453125, "logps/rejected": -668.57275390625, "loss": 0.5066, "rewards/accuracies": 0.75, "rewards/chosen": -3.161116123199463, "rewards/margins": 1.3622961044311523, "rewards/rejected": -4.523411750793457, "step": 9920 }, { "epoch": 0.65, "learning_rate": 1.647543516437233e-06, "logits/chosen": -1.8923059701919556, "logits/rejected": -1.8489351272583008, "logps/chosen": -535.9762573242188, "logps/rejected": -653.535400390625, "loss": 0.5691, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6559455394744873, "rewards/margins": 1.115609049797058, "rewards/rejected": -3.771554470062256, "step": 9930 }, { "epoch": 0.65, "learning_rate": 1.6421780313613088e-06, "logits/chosen": -1.6177890300750732, "logits/rejected": -1.644838571548462, "logps/chosen": -591.5128784179688, "logps/rejected": -762.8223876953125, "loss": 0.4213, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9691004753112793, "rewards/margins": 1.322816014289856, "rewards/rejected": -4.291916370391846, "step": 9940 }, { "epoch": 0.65, "learning_rate": 1.6368170211049816e-06, "logits/chosen": -2.2172207832336426, "logits/rejected": -1.9700168371200562, "logps/chosen": -635.7975463867188, "logps/rejected": -708.9931030273438, "loss": 0.4309, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1618759632110596, "rewards/margins": 1.135585069656372, "rewards/rejected": -4.297461032867432, "step": 9950 }, { "epoch": 0.65, "learning_rate": 1.6314605136339074e-06, "logits/chosen": -1.5088202953338623, "logits/rejected": -1.7007286548614502, "logps/chosen": -557.1663818359375, "logps/rejected": -690.3626708984375, "loss": 0.5372, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.117827892303467, "rewards/margins": 1.187142014503479, "rewards/rejected": -4.304969787597656, "step": 9960 }, { "epoch": 0.65, "learning_rate": 1.6261085368902526e-06, "logits/chosen": -1.6839511394500732, "logits/rejected": -1.834540605545044, "logps/chosen": -605.2606811523438, "logps/rejected": -610.1304931640625, "loss": 0.4778, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9750895500183105, "rewards/margins": 1.0680017471313477, "rewards/rejected": -4.043091297149658, "step": 9970 }, { "epoch": 0.65, "learning_rate": 1.6207611187925503e-06, "logits/chosen": -1.7885392904281616, "logits/rejected": -1.3947422504425049, "logps/chosen": -523.8977661132812, "logps/rejected": -595.2725830078125, "loss": 0.4977, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.456044912338257, "rewards/margins": 1.3109487295150757, "rewards/rejected": -3.766993761062622, "step": 9980 }, { "epoch": 0.65, "learning_rate": 1.6154182872355512e-06, "logits/chosen": -2.070439100265503, "logits/rejected": -1.7619030475616455, "logps/chosen": -579.6802978515625, "logps/rejected": -636.4152221679688, "loss": 0.5686, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.139418601989746, "rewards/margins": 1.029687523841858, "rewards/rejected": -4.169106483459473, "step": 9990 }, { "epoch": 0.65, "learning_rate": 1.610080070090084e-06, "logits/chosen": -2.3029110431671143, "logits/rejected": -2.0725982189178467, "logps/chosen": -669.8273315429688, "logps/rejected": -659.200439453125, "loss": 0.6379, "rewards/accuracies": 0.75, "rewards/chosen": -2.107666492462158, "rewards/margins": 1.127187728881836, "rewards/rejected": -3.234853744506836, "step": 10000 }, { "epoch": 0.65, "learning_rate": 1.6047464952029034e-06, "logits/chosen": -2.423081636428833, "logits/rejected": -1.1933789253234863, "logps/chosen": -524.700439453125, "logps/rejected": -526.11376953125, "loss": 0.4875, "rewards/accuracies": 0.75, "rewards/chosen": -2.9380455017089844, "rewards/margins": 0.9216144680976868, "rewards/rejected": -3.8596603870391846, "step": 10010 }, { "epoch": 0.66, "learning_rate": 1.5994175903965486e-06, "logits/chosen": -2.223822832107544, "logits/rejected": -1.7636470794677734, "logps/chosen": -618.0186767578125, "logps/rejected": -664.9786376953125, "loss": 0.5231, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1082825660705566, "rewards/margins": 0.6293025016784668, "rewards/rejected": -3.7375850677490234, "step": 10020 }, { "epoch": 0.66, "learning_rate": 1.5940933834691977e-06, "logits/chosen": -2.235905408859253, "logits/rejected": -1.820980429649353, "logps/chosen": -634.0506591796875, "logps/rejected": -723.7987060546875, "loss": 0.3591, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5085694789886475, "rewards/margins": 1.4282045364379883, "rewards/rejected": -3.9367740154266357, "step": 10030 }, { "epoch": 0.66, "learning_rate": 1.588773902194522e-06, "logits/chosen": -2.003666400909424, "logits/rejected": -1.8740298748016357, "logps/chosen": -648.436767578125, "logps/rejected": -629.9469604492188, "loss": 0.6544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.544236421585083, "rewards/margins": 0.5117180943489075, "rewards/rejected": -4.055954933166504, "step": 10040 }, { "epoch": 0.66, "learning_rate": 1.583459174321541e-06, "logits/chosen": -2.2216591835021973, "logits/rejected": -1.599656343460083, "logps/chosen": -520.3749389648438, "logps/rejected": -669.2892456054688, "loss": 0.6699, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5063908100128174, "rewards/margins": 1.1887396574020386, "rewards/rejected": -3.6951301097869873, "step": 10050 }, { "epoch": 0.66, "learning_rate": 1.5781492275744797e-06, "logits/chosen": -1.8967546224594116, "logits/rejected": -2.1360228061676025, "logps/chosen": -500.9303283691406, "logps/rejected": -546.4171142578125, "loss": 0.7569, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.699618101119995, "rewards/margins": 0.2735558748245239, "rewards/rejected": -2.9731738567352295, "step": 10060 }, { "epoch": 0.66, "learning_rate": 1.5728440896526215e-06, "logits/chosen": -2.0227789878845215, "logits/rejected": -2.035097599029541, "logps/chosen": -556.853271484375, "logps/rejected": -697.7138671875, "loss": 0.5068, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4388175010681152, "rewards/margins": 1.3052314519882202, "rewards/rejected": -3.744049072265625, "step": 10070 }, { "epoch": 0.66, "learning_rate": 1.5675437882301633e-06, "logits/chosen": -1.372617483139038, "logits/rejected": -1.4249074459075928, "logps/chosen": -545.9317016601562, "logps/rejected": -718.7574462890625, "loss": 0.7194, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.4789633750915527, "rewards/margins": 1.4255876541137695, "rewards/rejected": -4.9045515060424805, "step": 10080 }, { "epoch": 0.66, "learning_rate": 1.5622483509560748e-06, "logits/chosen": -2.2033467292785645, "logits/rejected": -2.130833148956299, "logps/chosen": -560.2772216796875, "logps/rejected": -674.1600952148438, "loss": 0.3568, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.332453489303589, "rewards/margins": 1.278029441833496, "rewards/rejected": -3.610483169555664, "step": 10090 }, { "epoch": 0.66, "learning_rate": 1.5569578054539506e-06, "logits/chosen": -1.7471822500228882, "logits/rejected": -1.7679237127304077, "logps/chosen": -570.5139770507812, "logps/rejected": -669.4573364257812, "loss": 0.614, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.061995506286621, "rewards/margins": 0.6868606805801392, "rewards/rejected": -3.7488560676574707, "step": 10100 }, { "epoch": 0.66, "learning_rate": 1.551672179321867e-06, "logits/chosen": -2.377981662750244, "logits/rejected": -2.1207613945007324, "logps/chosen": -584.4964599609375, "logps/rejected": -615.555419921875, "loss": 0.6713, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.39784574508667, "rewards/margins": 0.9840338826179504, "rewards/rejected": -3.3818798065185547, "step": 10110 }, { "epoch": 0.66, "learning_rate": 1.5463915001322398e-06, "logits/chosen": -2.314440965652466, "logits/rejected": -1.6883175373077393, "logps/chosen": -631.7022094726562, "logps/rejected": -669.0033569335938, "loss": 0.6365, "rewards/accuracies": 0.75, "rewards/chosen": -2.81516695022583, "rewards/margins": 0.9370850324630737, "rewards/rejected": -3.7522521018981934, "step": 10120 }, { "epoch": 0.66, "learning_rate": 1.5411157954316784e-06, "logits/chosen": -2.0021495819091797, "logits/rejected": -1.4604111909866333, "logps/chosen": -496.84881591796875, "logps/rejected": -670.7314453125, "loss": 0.4258, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8273229598999023, "rewards/margins": 1.669306993484497, "rewards/rejected": -4.4966301918029785, "step": 10130 }, { "epoch": 0.66, "learning_rate": 1.535845092740843e-06, "logits/chosen": -2.4046027660369873, "logits/rejected": -2.195103406906128, "logps/chosen": -662.22705078125, "logps/rejected": -719.7885131835938, "loss": 0.481, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.198734760284424, "rewards/margins": 1.1839425563812256, "rewards/rejected": -3.3826770782470703, "step": 10140 }, { "epoch": 0.66, "learning_rate": 1.5305794195543005e-06, "logits/chosen": -1.5913124084472656, "logits/rejected": -1.6547712087631226, "logps/chosen": -525.8160400390625, "logps/rejected": -589.795166015625, "loss": 0.602, "rewards/accuracies": 0.75, "rewards/chosen": -3.008615732192993, "rewards/margins": 0.8577588200569153, "rewards/rejected": -3.8663744926452637, "step": 10150 }, { "epoch": 0.66, "learning_rate": 1.5253188033403816e-06, "logits/chosen": -2.0906519889831543, "logits/rejected": -1.8118690252304077, "logps/chosen": -604.877197265625, "logps/rejected": -674.4083251953125, "loss": 0.6464, "rewards/accuracies": 0.75, "rewards/chosen": -3.0363845825195312, "rewards/margins": 0.5893286466598511, "rewards/rejected": -3.6257128715515137, "step": 10160 }, { "epoch": 0.67, "learning_rate": 1.520063271541037e-06, "logits/chosen": -1.967552900314331, "logits/rejected": -1.936489462852478, "logps/chosen": -548.0531005859375, "logps/rejected": -648.8834838867188, "loss": 0.5697, "rewards/accuracies": 0.75, "rewards/chosen": -3.128791332244873, "rewards/margins": 1.1066060066223145, "rewards/rejected": -4.235397815704346, "step": 10170 }, { "epoch": 0.67, "learning_rate": 1.5148128515716954e-06, "logits/chosen": -2.1086018085479736, "logits/rejected": -1.317168116569519, "logps/chosen": -588.2862548828125, "logps/rejected": -667.6849975585938, "loss": 0.5786, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.9626882076263428, "rewards/margins": 1.2267658710479736, "rewards/rejected": -4.189453601837158, "step": 10180 }, { "epoch": 0.67, "learning_rate": 1.5095675708211197e-06, "logits/chosen": -1.7038860321044922, "logits/rejected": -1.7373549938201904, "logps/chosen": -510.05029296875, "logps/rejected": -598.3043212890625, "loss": 0.7414, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.886688232421875, "rewards/margins": 0.5040321350097656, "rewards/rejected": -3.3907203674316406, "step": 10190 }, { "epoch": 0.67, "learning_rate": 1.504327456651263e-06, "logits/chosen": -2.347780704498291, "logits/rejected": -2.138662815093994, "logps/chosen": -569.8806762695312, "logps/rejected": -697.031494140625, "loss": 0.3543, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3554959297180176, "rewards/margins": 1.7921050786972046, "rewards/rejected": -4.147601127624512, "step": 10200 }, { "epoch": 0.67, "learning_rate": 1.4990925363971284e-06, "logits/chosen": -1.9000709056854248, "logits/rejected": -1.8390512466430664, "logps/chosen": -650.6390991210938, "logps/rejected": -682.9108276367188, "loss": 0.5153, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.575188159942627, "rewards/margins": 0.5928482413291931, "rewards/rejected": -4.168036460876465, "step": 10210 }, { "epoch": 0.67, "learning_rate": 1.4938628373666236e-06, "logits/chosen": -2.047325849533081, "logits/rejected": -2.0585241317749023, "logps/chosen": -555.3873291015625, "logps/rejected": -583.078369140625, "loss": 0.5613, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.083057403564453, "rewards/margins": 0.7428995966911316, "rewards/rejected": -3.8259568214416504, "step": 10220 }, { "epoch": 0.67, "learning_rate": 1.4886383868404203e-06, "logits/chosen": -1.6983534097671509, "logits/rejected": -1.745357871055603, "logps/chosen": -581.9317016601562, "logps/rejected": -669.6945190429688, "loss": 0.6381, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.7163448333740234, "rewards/margins": 0.955405056476593, "rewards/rejected": -4.671749591827393, "step": 10230 }, { "epoch": 0.67, "learning_rate": 1.483419212071813e-06, "logits/chosen": -2.0277183055877686, "logits/rejected": -2.0014185905456543, "logps/chosen": -546.6410522460938, "logps/rejected": -748.728515625, "loss": 0.4608, "rewards/accuracies": 0.75, "rewards/chosen": -2.4395718574523926, "rewards/margins": 1.5673308372497559, "rewards/rejected": -4.006903171539307, "step": 10240 }, { "epoch": 0.67, "learning_rate": 1.478205340286573e-06, "logits/chosen": -2.0982167720794678, "logits/rejected": -2.0167489051818848, "logps/chosen": -541.8618774414062, "logps/rejected": -570.8551025390625, "loss": 0.5548, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.229513645172119, "rewards/margins": 0.7931705713272095, "rewards/rejected": -3.02268385887146, "step": 10250 }, { "epoch": 0.67, "learning_rate": 1.4729967986828104e-06, "logits/chosen": -2.021984815597534, "logits/rejected": -2.096220016479492, "logps/chosen": -601.5169677734375, "logps/rejected": -682.0748291015625, "loss": 0.597, "rewards/accuracies": 0.75, "rewards/chosen": -2.8182146549224854, "rewards/margins": 1.1306148767471313, "rewards/rejected": -3.948829174041748, "step": 10260 }, { "epoch": 0.67, "learning_rate": 1.4677936144308286e-06, "logits/chosen": -2.0748703479766846, "logits/rejected": -2.051962375640869, "logps/chosen": -708.6862182617188, "logps/rejected": -712.1474609375, "loss": 0.6064, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.689055919647217, "rewards/margins": 0.6155595779418945, "rewards/rejected": -3.3046157360076904, "step": 10270 }, { "epoch": 0.67, "learning_rate": 1.4625958146729864e-06, "logits/chosen": -2.340296983718872, "logits/rejected": -1.7309805154800415, "logps/chosen": -576.7568969726562, "logps/rejected": -639.8253173828125, "loss": 0.4493, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4062986373901367, "rewards/margins": 1.3249199390411377, "rewards/rejected": -3.7312183380126953, "step": 10280 }, { "epoch": 0.67, "learning_rate": 1.4574034265235523e-06, "logits/chosen": -2.414374828338623, "logits/rejected": -2.1356239318847656, "logps/chosen": -620.1607666015625, "logps/rejected": -600.0708618164062, "loss": 0.6447, "rewards/accuracies": 0.75, "rewards/chosen": -2.010559320449829, "rewards/margins": 0.8874303698539734, "rewards/rejected": -2.897989511489868, "step": 10290 }, { "epoch": 0.67, "learning_rate": 1.452216477068568e-06, "logits/chosen": -1.9850013256072998, "logits/rejected": -1.7580883502960205, "logps/chosen": -517.3007202148438, "logps/rejected": -688.1248779296875, "loss": 0.5061, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.390328884124756, "rewards/margins": 1.4543589353561401, "rewards/rejected": -3.8446884155273438, "step": 10300 }, { "epoch": 0.67, "learning_rate": 1.4470349933657004e-06, "logits/chosen": -2.2277064323425293, "logits/rejected": -2.022275447845459, "logps/chosen": -511.62371826171875, "logps/rejected": -570.8912353515625, "loss": 0.5835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.646738052368164, "rewards/margins": 0.7845074534416199, "rewards/rejected": -3.4312453269958496, "step": 10310 }, { "epoch": 0.68, "learning_rate": 1.4418590024441096e-06, "logits/chosen": -1.8264057636260986, "logits/rejected": -1.7782049179077148, "logps/chosen": -566.3980712890625, "logps/rejected": -676.2247314453125, "loss": 0.7352, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8814053535461426, "rewards/margins": 0.19260264933109283, "rewards/rejected": -3.074007987976074, "step": 10320 }, { "epoch": 0.68, "learning_rate": 1.436688531304297e-06, "logits/chosen": -1.926770567893982, "logits/rejected": -1.6952238082885742, "logps/chosen": -555.2659912109375, "logps/rejected": -568.3193969726562, "loss": 0.4095, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.293551445007324, "rewards/margins": 1.1236270666122437, "rewards/rejected": -3.4171783924102783, "step": 10330 }, { "epoch": 0.68, "learning_rate": 1.431523606917974e-06, "logits/chosen": -2.0901577472686768, "logits/rejected": -1.7170679569244385, "logps/chosen": -594.2264404296875, "logps/rejected": -695.5934448242188, "loss": 0.4763, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.049959659576416, "rewards/margins": 1.3577804565429688, "rewards/rejected": -4.407739639282227, "step": 10340 }, { "epoch": 0.68, "learning_rate": 1.4263642562279162e-06, "logits/chosen": -1.9279849529266357, "logits/rejected": -1.704097032546997, "logps/chosen": -515.13037109375, "logps/rejected": -701.1751098632812, "loss": 0.4295, "rewards/accuracies": 0.75, "rewards/chosen": -2.3443188667297363, "rewards/margins": 1.0688626766204834, "rewards/rejected": -3.413181781768799, "step": 10350 }, { "epoch": 0.68, "learning_rate": 1.4212105061478257e-06, "logits/chosen": -2.2097935676574707, "logits/rejected": -1.745566725730896, "logps/chosen": -628.4295654296875, "logps/rejected": -493.8260803222656, "loss": 0.5575, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.012010097503662, "rewards/margins": 0.5730947256088257, "rewards/rejected": -2.5851049423217773, "step": 10360 }, { "epoch": 0.68, "learning_rate": 1.4160623835621848e-06, "logits/chosen": -2.002143144607544, "logits/rejected": -1.9734750986099243, "logps/chosen": -690.5743408203125, "logps/rejected": -660.885498046875, "loss": 0.6055, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6461167335510254, "rewards/margins": 1.0223562717437744, "rewards/rejected": -3.668473482131958, "step": 10370 }, { "epoch": 0.68, "learning_rate": 1.4109199153261249e-06, "logits/chosen": -1.7158596515655518, "logits/rejected": -2.035625696182251, "logps/chosen": -531.4519653320312, "logps/rejected": -691.5840454101562, "loss": 0.3877, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5169262886047363, "rewards/margins": 1.0818930864334106, "rewards/rejected": -3.5988192558288574, "step": 10380 }, { "epoch": 0.68, "learning_rate": 1.405783128265278e-06, "logits/chosen": -1.9189598560333252, "logits/rejected": -1.8550268411636353, "logps/chosen": -555.7239990234375, "logps/rejected": -575.2282104492188, "loss": 0.4999, "rewards/accuracies": 0.75, "rewards/chosen": -2.295558452606201, "rewards/margins": 0.9672013521194458, "rewards/rejected": -3.2627596855163574, "step": 10390 }, { "epoch": 0.68, "learning_rate": 1.4006520491756427e-06, "logits/chosen": -1.7126344442367554, "logits/rejected": -2.134204626083374, "logps/chosen": -494.3365783691406, "logps/rejected": -612.637939453125, "loss": 0.6658, "rewards/accuracies": 0.75, "rewards/chosen": -2.496967315673828, "rewards/margins": 0.8580994606018066, "rewards/rejected": -3.355067014694214, "step": 10400 }, { "epoch": 0.68, "learning_rate": 1.39552670482344e-06, "logits/chosen": -2.0910916328430176, "logits/rejected": -1.7963777780532837, "logps/chosen": -440.82135009765625, "logps/rejected": -563.18310546875, "loss": 0.4642, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.44130802154541, "rewards/margins": 0.9846822023391724, "rewards/rejected": -3.425990581512451, "step": 10410 }, { "epoch": 0.68, "learning_rate": 1.3904071219449776e-06, "logits/chosen": -1.9734894037246704, "logits/rejected": -1.7503770589828491, "logps/chosen": -541.8358154296875, "logps/rejected": -622.4808349609375, "loss": 0.4553, "rewards/accuracies": 0.75, "rewards/chosen": -2.94407320022583, "rewards/margins": 0.8452234268188477, "rewards/rejected": -3.7892966270446777, "step": 10420 }, { "epoch": 0.68, "learning_rate": 1.3852933272465068e-06, "logits/chosen": -2.388002395629883, "logits/rejected": -1.7295029163360596, "logps/chosen": -581.5325927734375, "logps/rejected": -617.9428100585938, "loss": 0.6602, "rewards/accuracies": 0.75, "rewards/chosen": -2.7455406188964844, "rewards/margins": 1.230202317237854, "rewards/rejected": -3.975742816925049, "step": 10430 }, { "epoch": 0.68, "learning_rate": 1.3801853474040873e-06, "logits/chosen": -1.818842887878418, "logits/rejected": -1.7371982336044312, "logps/chosen": -609.0447998046875, "logps/rejected": -701.6209106445312, "loss": 0.4102, "rewards/accuracies": 0.75, "rewards/chosen": -3.2907192707061768, "rewards/margins": 0.7941431999206543, "rewards/rejected": -4.084862232208252, "step": 10440 }, { "epoch": 0.68, "learning_rate": 1.3750832090634417e-06, "logits/chosen": -2.0449259281158447, "logits/rejected": -1.609819769859314, "logps/chosen": -622.4576416015625, "logps/rejected": -702.9534301757812, "loss": 0.5342, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.222707748413086, "rewards/margins": 1.2670750617980957, "rewards/rejected": -4.489782333374023, "step": 10450 }, { "epoch": 0.68, "learning_rate": 1.3699869388398245e-06, "logits/chosen": -1.9015638828277588, "logits/rejected": -1.8529236316680908, "logps/chosen": -634.927001953125, "logps/rejected": -543.111083984375, "loss": 0.5947, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.228215456008911, "rewards/margins": 0.367938369512558, "rewards/rejected": -3.596153974533081, "step": 10460 }, { "epoch": 0.69, "learning_rate": 1.3648965633178772e-06, "logits/chosen": -2.254178285598755, "logits/rejected": -1.8249447345733643, "logps/chosen": -581.3519897460938, "logps/rejected": -621.2752685546875, "loss": 0.5659, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8849399089813232, "rewards/margins": 0.8968615531921387, "rewards/rejected": -3.781801223754883, "step": 10470 }, { "epoch": 0.69, "learning_rate": 1.3598121090514938e-06, "logits/chosen": -1.8592582941055298, "logits/rejected": -2.095013380050659, "logps/chosen": -501.6394958496094, "logps/rejected": -641.8675537109375, "loss": 0.5356, "rewards/accuracies": 0.75, "rewards/chosen": -2.5355939865112305, "rewards/margins": 0.7130419611930847, "rewards/rejected": -3.248635768890381, "step": 10480 }, { "epoch": 0.69, "learning_rate": 1.3547336025636753e-06, "logits/chosen": -1.9418447017669678, "logits/rejected": -1.9443689584732056, "logps/chosen": -624.2605590820312, "logps/rejected": -689.8292846679688, "loss": 0.592, "rewards/accuracies": 0.75, "rewards/chosen": -2.4760632514953613, "rewards/margins": 1.5628429651260376, "rewards/rejected": -4.038906097412109, "step": 10490 }, { "epoch": 0.69, "learning_rate": 1.3496610703464022e-06, "logits/chosen": -2.1925435066223145, "logits/rejected": -2.0867669582366943, "logps/chosen": -584.0159301757812, "logps/rejected": -752.5704345703125, "loss": 0.5222, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7059502601623535, "rewards/margins": 1.3427493572235107, "rewards/rejected": -4.048699378967285, "step": 10500 }, { "epoch": 0.69, "learning_rate": 1.3445945388604848e-06, "logits/chosen": -1.9044320583343506, "logits/rejected": -1.698246717453003, "logps/chosen": -550.87060546875, "logps/rejected": -650.580078125, "loss": 0.5889, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.0925121307373047, "rewards/margins": 1.2077891826629639, "rewards/rejected": -4.3003010749816895, "step": 10510 }, { "epoch": 0.69, "learning_rate": 1.3395340345354358e-06, "logits/chosen": -1.8592084646224976, "logits/rejected": -1.685819387435913, "logps/chosen": -542.7408447265625, "logps/rejected": -685.5568237304688, "loss": 0.4553, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5574772357940674, "rewards/margins": 1.348659634590149, "rewards/rejected": -3.9061362743377686, "step": 10520 }, { "epoch": 0.69, "learning_rate": 1.334479583769322e-06, "logits/chosen": -1.6842561960220337, "logits/rejected": -1.757942795753479, "logps/chosen": -540.4334106445312, "logps/rejected": -558.3435668945312, "loss": 0.8053, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.7499144077301025, "rewards/margins": 0.581194281578064, "rewards/rejected": -3.331108808517456, "step": 10530 }, { "epoch": 0.69, "learning_rate": 1.3294312129286366e-06, "logits/chosen": -2.046003818511963, "logits/rejected": -1.7724971771240234, "logps/chosen": -688.4324951171875, "logps/rejected": -720.7156982421875, "loss": 0.4639, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.893906354904175, "rewards/margins": 1.4649419784545898, "rewards/rejected": -4.3588480949401855, "step": 10540 }, { "epoch": 0.69, "learning_rate": 1.324388948348153e-06, "logits/chosen": -2.0161421298980713, "logits/rejected": -2.1307661533355713, "logps/chosen": -531.250732421875, "logps/rejected": -607.2823486328125, "loss": 0.5373, "rewards/accuracies": 0.75, "rewards/chosen": -2.34794545173645, "rewards/margins": 1.0986201763153076, "rewards/rejected": -3.446566104888916, "step": 10550 }, { "epoch": 0.69, "learning_rate": 1.319352816330796e-06, "logits/chosen": -1.8210279941558838, "logits/rejected": -2.1046142578125, "logps/chosen": -501.18353271484375, "logps/rejected": -660.062744140625, "loss": 0.4282, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3282196521759033, "rewards/margins": 1.038062334060669, "rewards/rejected": -3.3662819862365723, "step": 10560 }, { "epoch": 0.69, "learning_rate": 1.314322843147494e-06, "logits/chosen": -1.6058871746063232, "logits/rejected": -1.4052083492279053, "logps/chosen": -472.9828186035156, "logps/rejected": -602.2042846679688, "loss": 0.5213, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9463601112365723, "rewards/margins": 1.2267777919769287, "rewards/rejected": -4.173138618469238, "step": 10570 }, { "epoch": 0.69, "learning_rate": 1.3092990550370526e-06, "logits/chosen": -1.8036601543426514, "logits/rejected": -1.378004789352417, "logps/chosen": -599.864013671875, "logps/rejected": -751.0603637695312, "loss": 0.6359, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5068113803863525, "rewards/margins": 1.4007375240325928, "rewards/rejected": -3.907548427581787, "step": 10580 }, { "epoch": 0.69, "learning_rate": 1.3042814782060131e-06, "logits/chosen": -1.947623610496521, "logits/rejected": -1.6988731622695923, "logps/chosen": -516.1401977539062, "logps/rejected": -707.0333251953125, "loss": 0.4308, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8791418075561523, "rewards/margins": 1.8367408514022827, "rewards/rejected": -4.715882301330566, "step": 10590 }, { "epoch": 0.69, "learning_rate": 1.2992701388285112e-06, "logits/chosen": -2.125699520111084, "logits/rejected": -1.5845218896865845, "logps/chosen": -580.4114990234375, "logps/rejected": -618.8101806640625, "loss": 0.6051, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.8320207595825195, "rewards/margins": 1.1038181781768799, "rewards/rejected": -3.9358391761779785, "step": 10600 }, { "epoch": 0.69, "learning_rate": 1.29426506304615e-06, "logits/chosen": -2.3026821613311768, "logits/rejected": -1.843064308166504, "logps/chosen": -664.8126831054688, "logps/rejected": -697.6212768554688, "loss": 0.446, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.479389190673828, "rewards/margins": 1.1389353275299072, "rewards/rejected": -3.6183242797851562, "step": 10610 }, { "epoch": 0.69, "learning_rate": 1.289266276967855e-06, "logits/chosen": -1.8575429916381836, "logits/rejected": -1.7204229831695557, "logps/chosen": -513.6630859375, "logps/rejected": -561.46044921875, "loss": 0.456, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7641661167144775, "rewards/margins": 1.3243200778961182, "rewards/rejected": -4.0884857177734375, "step": 10620 }, { "epoch": 0.7, "learning_rate": 1.284273806669745e-06, "logits/chosen": -1.9443877935409546, "logits/rejected": -1.904849648475647, "logps/chosen": -579.8768310546875, "logps/rejected": -575.4515380859375, "loss": 0.656, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.2298901081085205, "rewards/margins": 0.28284725546836853, "rewards/rejected": -3.51273775100708, "step": 10630 }, { "epoch": 0.7, "learning_rate": 1.2792876781949884e-06, "logits/chosen": -1.764835000038147, "logits/rejected": -1.8784644603729248, "logps/chosen": -462.2169494628906, "logps/rejected": -620.6724853515625, "loss": 0.4777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.609870433807373, "rewards/margins": 0.887588620185852, "rewards/rejected": -3.4974589347839355, "step": 10640 }, { "epoch": 0.7, "learning_rate": 1.274307917553676e-06, "logits/chosen": -2.0640013217926025, "logits/rejected": -1.8911521434783936, "logps/chosen": -616.5274658203125, "logps/rejected": -635.0863037109375, "loss": 0.6024, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.588681936264038, "rewards/margins": 1.017657995223999, "rewards/rejected": -3.606339931488037, "step": 10650 }, { "epoch": 0.7, "learning_rate": 1.2693345507226767e-06, "logits/chosen": -2.1492960453033447, "logits/rejected": -1.4352697134017944, "logps/chosen": -565.2218017578125, "logps/rejected": -634.83447265625, "loss": 0.6638, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7949912548065186, "rewards/margins": 1.2197343111038208, "rewards/rejected": -4.014725685119629, "step": 10660 }, { "epoch": 0.7, "learning_rate": 1.2643676036455099e-06, "logits/chosen": -2.426874876022339, "logits/rejected": -1.861326813697815, "logps/chosen": -649.0333251953125, "logps/rejected": -665.9698486328125, "loss": 0.634, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.055633068084717, "rewards/margins": 0.6452008485794067, "rewards/rejected": -3.700833797454834, "step": 10670 }, { "epoch": 0.7, "learning_rate": 1.259407102232203e-06, "logits/chosen": -1.9577795267105103, "logits/rejected": -1.520578145980835, "logps/chosen": -484.3440856933594, "logps/rejected": -582.2489013671875, "loss": 0.5464, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.621001720428467, "rewards/margins": 1.1693580150604248, "rewards/rejected": -3.79036021232605, "step": 10680 }, { "epoch": 0.7, "learning_rate": 1.254453072359163e-06, "logits/chosen": -2.0032951831817627, "logits/rejected": -2.275364637374878, "logps/chosen": -531.3727416992188, "logps/rejected": -713.6097412109375, "loss": 0.4103, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.579955577850342, "rewards/margins": 1.1371161937713623, "rewards/rejected": -3.7170722484588623, "step": 10690 }, { "epoch": 0.7, "learning_rate": 1.2495055398690337e-06, "logits/chosen": -1.7794713973999023, "logits/rejected": -1.7121944427490234, "logps/chosen": -581.0518798828125, "logps/rejected": -602.139404296875, "loss": 0.606, "rewards/accuracies": 0.5, "rewards/chosen": -3.219092845916748, "rewards/margins": 0.5926779508590698, "rewards/rejected": -3.8117709159851074, "step": 10700 }, { "epoch": 0.7, "learning_rate": 1.2445645305705718e-06, "logits/chosen": -2.093291759490967, "logits/rejected": -1.5274455547332764, "logps/chosen": -557.7086181640625, "logps/rejected": -600.8196411132812, "loss": 0.4573, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3752002716064453, "rewards/margins": 1.2375011444091797, "rewards/rejected": -3.612701416015625, "step": 10710 }, { "epoch": 0.7, "learning_rate": 1.2396300702384995e-06, "logits/chosen": -1.8599927425384521, "logits/rejected": -1.9693912267684937, "logps/chosen": -517.023681640625, "logps/rejected": -709.513671875, "loss": 0.5622, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9134440422058105, "rewards/margins": 1.4009777307510376, "rewards/rejected": -4.314421653747559, "step": 10720 }, { "epoch": 0.7, "learning_rate": 1.234702184613381e-06, "logits/chosen": -2.248994827270508, "logits/rejected": -2.2692863941192627, "logps/chosen": -476.87890625, "logps/rejected": -669.5069580078125, "loss": 0.4611, "rewards/accuracies": 1.0, "rewards/chosen": -2.1680808067321777, "rewards/margins": 1.2293837070465088, "rewards/rejected": -3.3974647521972656, "step": 10730 }, { "epoch": 0.7, "learning_rate": 1.2297808994014793e-06, "logits/chosen": -1.8323322534561157, "logits/rejected": -1.673384428024292, "logps/chosen": -607.1715087890625, "logps/rejected": -587.4119262695312, "loss": 0.5794, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.567920207977295, "rewards/margins": 0.7959483861923218, "rewards/rejected": -3.363868236541748, "step": 10740 }, { "epoch": 0.7, "learning_rate": 1.2248662402746314e-06, "logits/chosen": -1.3988916873931885, "logits/rejected": -1.6921294927597046, "logps/chosen": -612.28857421875, "logps/rejected": -778.8629760742188, "loss": 0.4916, "rewards/accuracies": 0.75, "rewards/chosen": -3.053574800491333, "rewards/margins": 1.2433674335479736, "rewards/rejected": -4.296942234039307, "step": 10750 }, { "epoch": 0.7, "learning_rate": 1.2199582328701045e-06, "logits/chosen": -1.9762433767318726, "logits/rejected": -2.0665011405944824, "logps/chosen": -458.28076171875, "logps/rejected": -568.51806640625, "loss": 0.4898, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9166343212127686, "rewards/margins": 1.0887027978897095, "rewards/rejected": -3.0053372383117676, "step": 10760 }, { "epoch": 0.7, "learning_rate": 1.2150569027904712e-06, "logits/chosen": -2.140639305114746, "logits/rejected": -2.0418004989624023, "logps/chosen": -533.3043212890625, "logps/rejected": -749.2007446289062, "loss": 0.7594, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.868107318878174, "rewards/margins": 1.3112220764160156, "rewards/rejected": -4.179329872131348, "step": 10770 }, { "epoch": 0.71, "learning_rate": 1.2101622756034688e-06, "logits/chosen": -2.2054402828216553, "logits/rejected": -2.0917115211486816, "logps/chosen": -637.744873046875, "logps/rejected": -625.0416870117188, "loss": 0.627, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.1930999755859375, "rewards/margins": 0.7180913686752319, "rewards/rejected": -2.911191463470459, "step": 10780 }, { "epoch": 0.71, "learning_rate": 1.2052743768418715e-06, "logits/chosen": -2.3397207260131836, "logits/rejected": -1.8472929000854492, "logps/chosen": -684.1767578125, "logps/rejected": -793.119873046875, "loss": 0.7048, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.507220983505249, "rewards/margins": 1.1414546966552734, "rewards/rejected": -3.6486754417419434, "step": 10790 }, { "epoch": 0.71, "learning_rate": 1.2003932320033523e-06, "logits/chosen": -1.8293190002441406, "logits/rejected": -1.995443344116211, "logps/chosen": -535.847412109375, "logps/rejected": -565.1732788085938, "loss": 0.6591, "rewards/accuracies": 0.5, "rewards/chosen": -3.123100996017456, "rewards/margins": 0.29064124822616577, "rewards/rejected": -3.4137425422668457, "step": 10800 }, { "epoch": 0.71, "learning_rate": 1.1955188665503553e-06, "logits/chosen": -2.08339262008667, "logits/rejected": -2.1265950202941895, "logps/chosen": -640.2640380859375, "logps/rejected": -620.4893188476562, "loss": 0.5797, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.060319423675537, "rewards/margins": 0.6381840705871582, "rewards/rejected": -3.6985034942626953, "step": 10810 }, { "epoch": 0.71, "learning_rate": 1.1906513059099566e-06, "logits/chosen": -1.7086389064788818, "logits/rejected": -1.8432223796844482, "logps/chosen": -568.55810546875, "logps/rejected": -727.3695068359375, "loss": 0.427, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1456050872802734, "rewards/margins": 1.374998688697815, "rewards/rejected": -4.520603656768799, "step": 10820 }, { "epoch": 0.71, "learning_rate": 1.185790575473738e-06, "logits/chosen": -2.117264747619629, "logits/rejected": -1.9786115884780884, "logps/chosen": -623.8470458984375, "logps/rejected": -653.810546875, "loss": 0.4546, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6704392433166504, "rewards/margins": 1.197432279586792, "rewards/rejected": -3.8678722381591797, "step": 10830 }, { "epoch": 0.71, "learning_rate": 1.1809367005976516e-06, "logits/chosen": -2.156859874725342, "logits/rejected": -1.6391792297363281, "logps/chosen": -539.9671630859375, "logps/rejected": -634.4904174804688, "loss": 0.58, "rewards/accuracies": 0.75, "rewards/chosen": -2.6767139434814453, "rewards/margins": 1.1068816184997559, "rewards/rejected": -3.7835960388183594, "step": 10840 }, { "epoch": 0.71, "learning_rate": 1.1760897066018842e-06, "logits/chosen": -2.143618106842041, "logits/rejected": -1.9632008075714111, "logps/chosen": -496.422119140625, "logps/rejected": -600.4456787109375, "loss": 0.6026, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6161766052246094, "rewards/margins": 1.2766454219818115, "rewards/rejected": -3.892821788787842, "step": 10850 }, { "epoch": 0.71, "learning_rate": 1.1712496187707327e-06, "logits/chosen": -2.378615140914917, "logits/rejected": -2.21158504486084, "logps/chosen": -521.894287109375, "logps/rejected": -573.9391479492188, "loss": 0.5006, "rewards/accuracies": 0.75, "rewards/chosen": -1.9917871952056885, "rewards/margins": 1.0569854974746704, "rewards/rejected": -3.0487725734710693, "step": 10860 }, { "epoch": 0.71, "learning_rate": 1.1664164623524646e-06, "logits/chosen": -2.248140811920166, "logits/rejected": -1.6976674795150757, "logps/chosen": -523.8060913085938, "logps/rejected": -641.4791259765625, "loss": 0.5039, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4532601833343506, "rewards/margins": 1.2342567443847656, "rewards/rejected": -3.687516689300537, "step": 10870 }, { "epoch": 0.71, "learning_rate": 1.1615902625591926e-06, "logits/chosen": -2.197209358215332, "logits/rejected": -1.9862515926361084, "logps/chosen": -601.8943481445312, "logps/rejected": -616.5602416992188, "loss": 0.7245, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1172738075256348, "rewards/margins": 0.2903970181941986, "rewards/rejected": -3.4076714515686035, "step": 10880 }, { "epoch": 0.71, "learning_rate": 1.156771044566738e-06, "logits/chosen": -2.0141053199768066, "logits/rejected": -1.8743298053741455, "logps/chosen": -627.9108276367188, "logps/rejected": -661.3457641601562, "loss": 0.6366, "rewards/accuracies": 0.75, "rewards/chosen": -2.6688072681427, "rewards/margins": 1.156123399734497, "rewards/rejected": -3.8249306678771973, "step": 10890 }, { "epoch": 0.71, "learning_rate": 1.1519588335145037e-06, "logits/chosen": -1.9702669382095337, "logits/rejected": -1.8649107217788696, "logps/chosen": -525.55517578125, "logps/rejected": -649.5360717773438, "loss": 0.5939, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1571083068847656, "rewards/margins": 0.785507321357727, "rewards/rejected": -3.942615509033203, "step": 10900 }, { "epoch": 0.71, "learning_rate": 1.1471536545053382e-06, "logits/chosen": -2.1818172931671143, "logits/rejected": -2.2146239280700684, "logps/chosen": -674.1969604492188, "logps/rejected": -687.8861083984375, "loss": 0.5476, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.312225580215454, "rewards/margins": 1.3034298419952393, "rewards/rejected": -3.6156551837921143, "step": 10910 }, { "epoch": 0.71, "learning_rate": 1.1423555326054112e-06, "logits/chosen": -2.0928287506103516, "logits/rejected": -1.7444454431533813, "logps/chosen": -598.6189575195312, "logps/rejected": -651.1210327148438, "loss": 0.2837, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.289663553237915, "rewards/margins": 1.7426624298095703, "rewards/rejected": -4.032325744628906, "step": 10920 }, { "epoch": 0.72, "learning_rate": 1.1375644928440743e-06, "logits/chosen": -1.9880787134170532, "logits/rejected": -1.7319841384887695, "logps/chosen": -569.8656005859375, "logps/rejected": -608.8934326171875, "loss": 0.5198, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.71450138092041, "rewards/margins": 0.8693853616714478, "rewards/rejected": -3.5838871002197266, "step": 10930 }, { "epoch": 0.72, "learning_rate": 1.1327805602137396e-06, "logits/chosen": -1.8245779275894165, "logits/rejected": -1.6106252670288086, "logps/chosen": -597.9203491210938, "logps/rejected": -727.8937377929688, "loss": 0.3286, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8491742610931396, "rewards/margins": 1.6420882940292358, "rewards/rejected": -4.491262912750244, "step": 10940 }, { "epoch": 0.72, "learning_rate": 1.1280037596697426e-06, "logits/chosen": -2.0003836154937744, "logits/rejected": -1.841463327407837, "logps/chosen": -608.0892333984375, "logps/rejected": -667.3717041015625, "loss": 0.4502, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.653883218765259, "rewards/margins": 1.31430184841156, "rewards/rejected": -3.9681849479675293, "step": 10950 }, { "epoch": 0.72, "learning_rate": 1.123234116130216e-06, "logits/chosen": -2.1562633514404297, "logits/rejected": -1.8884432315826416, "logps/chosen": -569.6181640625, "logps/rejected": -645.463134765625, "loss": 0.4352, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.557612657546997, "rewards/margins": 0.9821327328681946, "rewards/rejected": -3.539745330810547, "step": 10960 }, { "epoch": 0.72, "learning_rate": 1.1184716544759553e-06, "logits/chosen": -2.110089063644409, "logits/rejected": -1.994105339050293, "logps/chosen": -532.6145629882812, "logps/rejected": -578.947998046875, "loss": 0.5566, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.7645394802093506, "rewards/margins": 0.3476647436618805, "rewards/rejected": -3.1122043132781982, "step": 10970 }, { "epoch": 0.72, "learning_rate": 1.1137163995502948e-06, "logits/chosen": -2.067387104034424, "logits/rejected": -1.7719135284423828, "logps/chosen": -572.7210693359375, "logps/rejected": -560.327880859375, "loss": 0.7995, "rewards/accuracies": 0.5, "rewards/chosen": -3.173888683319092, "rewards/margins": 0.44579392671585083, "rewards/rejected": -3.6196823120117188, "step": 10980 }, { "epoch": 0.72, "learning_rate": 1.1089683761589717e-06, "logits/chosen": -2.198796272277832, "logits/rejected": -1.8594017028808594, "logps/chosen": -494.27099609375, "logps/rejected": -558.8394775390625, "loss": 0.6524, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8282618522644043, "rewards/margins": 0.6688450574874878, "rewards/rejected": -3.4971070289611816, "step": 10990 }, { "epoch": 0.72, "learning_rate": 1.1042276090700044e-06, "logits/chosen": -1.8905689716339111, "logits/rejected": -1.600900411605835, "logps/chosen": -514.0736083984375, "logps/rejected": -618.9229125976562, "loss": 0.3493, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.639007091522217, "rewards/margins": 1.3429841995239258, "rewards/rejected": -3.9819908142089844, "step": 11000 }, { "epoch": 0.72, "learning_rate": 1.0994941230135536e-06, "logits/chosen": -2.286539077758789, "logits/rejected": -1.9388920068740845, "logps/chosen": -613.9688720703125, "logps/rejected": -747.5011596679688, "loss": 0.4968, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7653839588165283, "rewards/margins": 0.8753796815872192, "rewards/rejected": -3.640763521194458, "step": 11010 }, { "epoch": 0.72, "learning_rate": 1.094767942681804e-06, "logits/chosen": -2.1379446983337402, "logits/rejected": -2.147871255874634, "logps/chosen": -636.4110717773438, "logps/rejected": -709.1380615234375, "loss": 0.657, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.930417060852051, "rewards/margins": 0.8874378204345703, "rewards/rejected": -3.8178551197052, "step": 11020 }, { "epoch": 0.72, "learning_rate": 1.0900490927288248e-06, "logits/chosen": -1.964093804359436, "logits/rejected": -1.9250081777572632, "logps/chosen": -601.6725463867188, "logps/rejected": -590.1107177734375, "loss": 0.6325, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.7636687755584717, "rewards/margins": 0.4920852780342102, "rewards/rejected": -3.255753755569458, "step": 11030 }, { "epoch": 0.72, "learning_rate": 1.0853375977704511e-06, "logits/chosen": -1.8642339706420898, "logits/rejected": -1.5740877389907837, "logps/chosen": -594.4902954101562, "logps/rejected": -586.4608154296875, "loss": 0.4055, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.335334539413452, "rewards/margins": 1.5911147594451904, "rewards/rejected": -3.9264495372772217, "step": 11040 }, { "epoch": 0.72, "learning_rate": 1.0806334823841466e-06, "logits/chosen": -2.4295177459716797, "logits/rejected": -1.918278694152832, "logps/chosen": -545.12646484375, "logps/rejected": -628.2261962890625, "loss": 0.6423, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.925262928009033, "rewards/margins": 0.8189485669136047, "rewards/rejected": -3.744211196899414, "step": 11050 }, { "epoch": 0.72, "learning_rate": 1.0759367711088825e-06, "logits/chosen": -1.845646619796753, "logits/rejected": -2.076916217803955, "logps/chosen": -594.2481689453125, "logps/rejected": -688.998779296875, "loss": 0.5593, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.010962963104248, "rewards/margins": 0.9051377177238464, "rewards/rejected": -3.9161009788513184, "step": 11060 }, { "epoch": 0.72, "learning_rate": 1.0712474884450056e-06, "logits/chosen": -2.149695634841919, "logits/rejected": -1.1780688762664795, "logps/chosen": -514.140625, "logps/rejected": -586.2655029296875, "loss": 0.4989, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.6523220539093018, "rewards/margins": 1.3810240030288696, "rewards/rejected": -4.033346176147461, "step": 11070 }, { "epoch": 0.72, "learning_rate": 1.066565658854112e-06, "logits/chosen": -1.539414644241333, "logits/rejected": -1.7554525136947632, "logps/chosen": -495.03948974609375, "logps/rejected": -681.1571044921875, "loss": 0.552, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.119170665740967, "rewards/margins": 0.3835334777832031, "rewards/rejected": -3.502704620361328, "step": 11080 }, { "epoch": 0.73, "learning_rate": 1.0618913067589165e-06, "logits/chosen": -1.949589729309082, "logits/rejected": -1.1513017416000366, "logps/chosen": -607.67529296875, "logps/rejected": -571.7874145507812, "loss": 0.4539, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.3207106590270996, "rewards/margins": 0.8452437520027161, "rewards/rejected": -4.165954113006592, "step": 11090 }, { "epoch": 0.73, "learning_rate": 1.0572244565431313e-06, "logits/chosen": -1.9018256664276123, "logits/rejected": -1.9944654703140259, "logps/chosen": -698.68896484375, "logps/rejected": -694.4862060546875, "loss": 0.4973, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.091747283935547, "rewards/margins": 0.8501051068305969, "rewards/rejected": -3.941852569580078, "step": 11100 }, { "epoch": 0.73, "learning_rate": 1.0525651325513317e-06, "logits/chosen": -1.8144733905792236, "logits/rejected": -1.6635713577270508, "logps/chosen": -587.306884765625, "logps/rejected": -645.5009765625, "loss": 0.4695, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.213918685913086, "rewards/margins": 1.390151023864746, "rewards/rejected": -3.604069471359253, "step": 11110 }, { "epoch": 0.73, "learning_rate": 1.0479133590888351e-06, "logits/chosen": -2.0447092056274414, "logits/rejected": -1.724495530128479, "logps/chosen": -640.1514892578125, "logps/rejected": -618.6660766601562, "loss": 0.4973, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.23714017868042, "rewards/margins": 0.5908339023590088, "rewards/rejected": -3.8279738426208496, "step": 11120 }, { "epoch": 0.73, "learning_rate": 1.0432691604215695e-06, "logits/chosen": -1.9522594213485718, "logits/rejected": -1.5733158588409424, "logps/chosen": -741.5416259765625, "logps/rejected": -649.3689575195312, "loss": 0.5937, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.0032002925872803, "rewards/margins": 0.7238863110542297, "rewards/rejected": -3.7270865440368652, "step": 11130 }, { "epoch": 0.73, "learning_rate": 1.0386325607759515e-06, "logits/chosen": -1.877004861831665, "logits/rejected": -1.1804144382476807, "logps/chosen": -531.3442993164062, "logps/rejected": -643.6566162109375, "loss": 0.6587, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.545118808746338, "rewards/margins": 1.568022608757019, "rewards/rejected": -4.1131415367126465, "step": 11140 }, { "epoch": 0.73, "learning_rate": 1.0340035843387544e-06, "logits/chosen": -2.1784722805023193, "logits/rejected": -1.8719772100448608, "logps/chosen": -608.2196044921875, "logps/rejected": -712.3246459960938, "loss": 0.5015, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.738144874572754, "rewards/margins": 1.0193064212799072, "rewards/rejected": -3.757450819015503, "step": 11150 }, { "epoch": 0.73, "learning_rate": 1.0293822552569887e-06, "logits/chosen": -1.7314817905426025, "logits/rejected": -1.7551358938217163, "logps/chosen": -548.2224731445312, "logps/rejected": -731.991455078125, "loss": 0.2875, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.9847452640533447, "rewards/margins": 1.7589595317840576, "rewards/rejected": -4.743704319000244, "step": 11160 }, { "epoch": 0.73, "learning_rate": 1.0247685976377688e-06, "logits/chosen": -1.7158609628677368, "logits/rejected": -2.0380682945251465, "logps/chosen": -553.1986694335938, "logps/rejected": -748.2996826171875, "loss": 0.4106, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.849733829498291, "rewards/margins": 1.3996142148971558, "rewards/rejected": -4.2493486404418945, "step": 11170 }, { "epoch": 0.73, "learning_rate": 1.0201626355481939e-06, "logits/chosen": -1.9223911762237549, "logits/rejected": -1.8944050073623657, "logps/chosen": -625.564208984375, "logps/rejected": -731.0145874023438, "loss": 0.4134, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.318629741668701, "rewards/margins": 1.764533281326294, "rewards/rejected": -4.083163261413574, "step": 11180 }, { "epoch": 0.73, "learning_rate": 1.0155643930152192e-06, "logits/chosen": -1.7937358617782593, "logits/rejected": -1.4579087495803833, "logps/chosen": -472.65350341796875, "logps/rejected": -642.529052734375, "loss": 0.4607, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.678659439086914, "rewards/margins": 1.5914627313613892, "rewards/rejected": -4.270122528076172, "step": 11190 }, { "epoch": 0.73, "learning_rate": 1.0109738940255286e-06, "logits/chosen": -2.0085513591766357, "logits/rejected": -1.5377354621887207, "logps/chosen": -480.1001892089844, "logps/rejected": -557.2733764648438, "loss": 0.3617, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5536446571350098, "rewards/margins": 1.317373514175415, "rewards/rejected": -3.871018648147583, "step": 11200 }, { "epoch": 0.73, "learning_rate": 1.0063911625254155e-06, "logits/chosen": -2.3564112186431885, "logits/rejected": -1.542410969734192, "logps/chosen": -650.8883666992188, "logps/rejected": -715.9833984375, "loss": 0.543, "rewards/accuracies": 0.75, "rewards/chosen": -2.8708133697509766, "rewards/margins": 0.7783092260360718, "rewards/rejected": -3.649122714996338, "step": 11210 }, { "epoch": 0.73, "learning_rate": 1.0018162224206502e-06, "logits/chosen": -2.160595417022705, "logits/rejected": -1.7032890319824219, "logps/chosen": -614.4699096679688, "logps/rejected": -601.10498046875, "loss": 0.7165, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.0829195976257324, "rewards/margins": 0.7624799013137817, "rewards/rejected": -3.8453993797302246, "step": 11220 }, { "epoch": 0.73, "learning_rate": 9.97249097576363e-07, "logits/chosen": -2.035681962966919, "logits/rejected": -1.9352619647979736, "logps/chosen": -523.2393798828125, "logps/rejected": -653.7843017578125, "loss": 0.6436, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1325199604034424, "rewards/margins": 1.1268411874771118, "rewards/rejected": -4.259360313415527, "step": 11230 }, { "epoch": 0.74, "learning_rate": 9.92689811816913e-07, "logits/chosen": -2.1685519218444824, "logits/rejected": -2.1770365238189697, "logps/chosen": -498.0049743652344, "logps/rejected": -625.49462890625, "loss": 0.4397, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4198482036590576, "rewards/margins": 1.5032671689987183, "rewards/rejected": -3.9231152534484863, "step": 11240 }, { "epoch": 0.74, "learning_rate": 9.881383889257691e-07, "logits/chosen": -2.2852888107299805, "logits/rejected": -2.0769572257995605, "logps/chosen": -557.4730224609375, "logps/rejected": -576.2727661132812, "loss": 0.5914, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.2877988815307617, "rewards/margins": 0.9065145254135132, "rewards/rejected": -3.1943135261535645, "step": 11250 }, { "epoch": 0.74, "learning_rate": 9.835948526453817e-07, "logits/chosen": -2.180548906326294, "logits/rejected": -1.568034291267395, "logps/chosen": -596.4942626953125, "logps/rejected": -653.1251831054688, "loss": 0.5723, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.893049478530884, "rewards/margins": 0.8980989456176758, "rewards/rejected": -3.7911484241485596, "step": 11260 }, { "epoch": 0.74, "learning_rate": 9.790592266770633e-07, "logits/chosen": -2.2299437522888184, "logits/rejected": -2.1429529190063477, "logps/chosen": -600.1373901367188, "logps/rejected": -701.9710693359375, "loss": 0.4668, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1360514163970947, "rewards/margins": 1.4669578075408936, "rewards/rejected": -4.60300874710083, "step": 11270 }, { "epoch": 0.74, "learning_rate": 9.745315346808584e-07, "logits/chosen": -1.9295885562896729, "logits/rejected": -1.9131072759628296, "logps/chosen": -481.2538146972656, "logps/rejected": -589.9473876953125, "loss": 0.5351, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.528791904449463, "rewards/margins": 0.9400409460067749, "rewards/rejected": -3.4688332080841064, "step": 11280 }, { "epoch": 0.74, "learning_rate": 9.70011800275428e-07, "logits/chosen": -2.007050037384033, "logits/rejected": -1.2384674549102783, "logps/chosen": -601.4393920898438, "logps/rejected": -608.2933349609375, "loss": 0.684, "rewards/accuracies": 0.75, "rewards/chosen": -3.0498464107513428, "rewards/margins": 0.7697028517723083, "rewards/rejected": -3.819549083709717, "step": 11290 }, { "epoch": 0.74, "learning_rate": 9.655000470379206e-07, "logits/chosen": -2.1945061683654785, "logits/rejected": -1.9669269323349, "logps/chosen": -567.2088623046875, "logps/rejected": -729.3013305664062, "loss": 0.45, "rewards/accuracies": 0.75, "rewards/chosen": -2.5365357398986816, "rewards/margins": 1.151732087135315, "rewards/rejected": -3.688267946243286, "step": 11300 }, { "epoch": 0.74, "learning_rate": 9.609962985038517e-07, "logits/chosen": -2.264110803604126, "logits/rejected": -1.7192445993423462, "logps/chosen": -620.5179443359375, "logps/rejected": -585.7546997070312, "loss": 0.4001, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4289679527282715, "rewards/margins": 1.1963040828704834, "rewards/rejected": -3.6252715587615967, "step": 11310 }, { "epoch": 0.74, "learning_rate": 9.565005781669786e-07, "logits/chosen": -2.2255051136016846, "logits/rejected": -1.4862034320831299, "logps/chosen": -561.6036376953125, "logps/rejected": -579.4625244140625, "loss": 0.6386, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.013392925262451, "rewards/margins": 0.7098814249038696, "rewards/rejected": -3.7232747077941895, "step": 11320 }, { "epoch": 0.74, "learning_rate": 9.520129094791822e-07, "logits/chosen": -2.0273349285125732, "logits/rejected": -1.5884578227996826, "logps/chosen": -562.5972290039062, "logps/rejected": -664.5333251953125, "loss": 0.4295, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.576624631881714, "rewards/margins": 1.6048930883407593, "rewards/rejected": -4.181517601013184, "step": 11330 }, { "epoch": 0.74, "learning_rate": 9.475333158503389e-07, "logits/chosen": -2.228905200958252, "logits/rejected": -2.0265233516693115, "logps/chosen": -661.26611328125, "logps/rejected": -618.9077758789062, "loss": 0.5175, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4387011528015137, "rewards/margins": 1.1200625896453857, "rewards/rejected": -3.5587639808654785, "step": 11340 }, { "epoch": 0.74, "learning_rate": 9.430618206482053e-07, "logits/chosen": -2.025967836380005, "logits/rejected": -1.644971251487732, "logps/chosen": -586.7125854492188, "logps/rejected": -613.2728881835938, "loss": 0.4476, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5874221324920654, "rewards/margins": 0.8730489015579224, "rewards/rejected": -3.4604709148406982, "step": 11350 }, { "epoch": 0.74, "learning_rate": 9.385984471982892e-07, "logits/chosen": -2.110199451446533, "logits/rejected": -1.60942804813385, "logps/chosen": -486.7565002441406, "logps/rejected": -678.8507690429688, "loss": 0.3855, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.0172455310821533, "rewards/margins": 1.782753348350525, "rewards/rejected": -3.7999987602233887, "step": 11360 }, { "epoch": 0.74, "learning_rate": 9.341432187837343e-07, "logits/chosen": -2.0925145149230957, "logits/rejected": -1.9435867071151733, "logps/chosen": -555.3340454101562, "logps/rejected": -724.8929443359375, "loss": 0.6627, "rewards/accuracies": 0.75, "rewards/chosen": -3.3047962188720703, "rewards/margins": 1.0990153551101685, "rewards/rejected": -4.403811454772949, "step": 11370 }, { "epoch": 0.74, "learning_rate": 9.29696158645193e-07, "logits/chosen": -2.439696788787842, "logits/rejected": -1.9372104406356812, "logps/chosen": -535.96435546875, "logps/rejected": -607.0570068359375, "loss": 0.5157, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.652575969696045, "rewards/margins": 1.0027225017547607, "rewards/rejected": -3.655298948287964, "step": 11380 }, { "epoch": 0.75, "learning_rate": 9.252572899807111e-07, "logits/chosen": -1.8352473974227905, "logits/rejected": -1.7971274852752686, "logps/chosen": -525.2449951171875, "logps/rejected": -590.7938232421875, "loss": 0.4892, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7689268589019775, "rewards/margins": 0.7963454723358154, "rewards/rejected": -3.565272092819214, "step": 11390 }, { "epoch": 0.75, "learning_rate": 9.208266359456003e-07, "logits/chosen": -2.1703238487243652, "logits/rejected": -1.7259900569915771, "logps/chosen": -519.4930419921875, "logps/rejected": -710.6129760742188, "loss": 0.4442, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.9516105651855469, "rewards/margins": 1.9602371454238892, "rewards/rejected": -3.9118475914001465, "step": 11400 }, { "epoch": 0.75, "learning_rate": 9.164042196523229e-07, "logits/chosen": -2.060485363006592, "logits/rejected": -2.2558093070983887, "logps/chosen": -714.7739868164062, "logps/rejected": -715.2745361328125, "loss": 0.553, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3948960304260254, "rewards/margins": 0.8847671747207642, "rewards/rejected": -3.279662609100342, "step": 11410 }, { "epoch": 0.75, "learning_rate": 9.119900641703696e-07, "logits/chosen": -2.2287392616271973, "logits/rejected": -2.3328731060028076, "logps/chosen": -593.9951782226562, "logps/rejected": -810.0656127929688, "loss": 0.7244, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.9003055095672607, "rewards/margins": 0.7129500508308411, "rewards/rejected": -3.613255739212036, "step": 11420 }, { "epoch": 0.75, "learning_rate": 9.075841925261364e-07, "logits/chosen": -1.7975727319717407, "logits/rejected": -1.8429571390151978, "logps/chosen": -617.5447387695312, "logps/rejected": -673.0473022460938, "loss": 0.5878, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.9794981479644775, "rewards/margins": 0.9177531003952026, "rewards/rejected": -3.8972511291503906, "step": 11430 }, { "epoch": 0.75, "learning_rate": 9.031866277028093e-07, "logits/chosen": -1.7158721685409546, "logits/rejected": -1.5207316875457764, "logps/chosen": -535.4993286132812, "logps/rejected": -593.2886352539062, "loss": 0.5281, "rewards/accuracies": 0.75, "rewards/chosen": -3.504063367843628, "rewards/margins": 0.8655757904052734, "rewards/rejected": -4.369638919830322, "step": 11440 }, { "epoch": 0.75, "learning_rate": 8.987973926402391e-07, "logits/chosen": -2.2284095287323, "logits/rejected": -1.8909313678741455, "logps/chosen": -581.1748657226562, "logps/rejected": -668.5172119140625, "loss": 0.4486, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8736472129821777, "rewards/margins": 1.1625237464904785, "rewards/rejected": -4.036170959472656, "step": 11450 }, { "epoch": 0.75, "learning_rate": 8.944165102348273e-07, "logits/chosen": -2.121553659439087, "logits/rejected": -1.6587817668914795, "logps/chosen": -615.8829956054688, "logps/rejected": -656.5050048828125, "loss": 0.67, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.2396693229675293, "rewards/margins": 0.6015607714653015, "rewards/rejected": -3.8412303924560547, "step": 11460 }, { "epoch": 0.75, "learning_rate": 8.900440033394018e-07, "logits/chosen": -2.4720492362976074, "logits/rejected": -1.9725520610809326, "logps/chosen": -650.8018188476562, "logps/rejected": -680.92431640625, "loss": 0.3071, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.6864123344421387, "rewards/margins": 1.4779071807861328, "rewards/rejected": -4.1643195152282715, "step": 11470 }, { "epoch": 0.75, "learning_rate": 8.856798947631009e-07, "logits/chosen": -1.2511957883834839, "logits/rejected": -1.570084571838379, "logps/chosen": -431.64697265625, "logps/rejected": -622.7315673828125, "loss": 0.3729, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.7261099815368652, "rewards/margins": 1.8274139165878296, "rewards/rejected": -4.553524017333984, "step": 11480 }, { "epoch": 0.75, "learning_rate": 8.813242072712519e-07, "logits/chosen": -2.0236334800720215, "logits/rejected": -1.9342308044433594, "logps/chosen": -517.2122192382812, "logps/rejected": -609.6121826171875, "loss": 0.506, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7198448181152344, "rewards/margins": 0.9386121034622192, "rewards/rejected": -3.658456802368164, "step": 11490 }, { "epoch": 0.75, "learning_rate": 8.769769635852557e-07, "logits/chosen": -2.1818830966949463, "logits/rejected": -1.8092458248138428, "logps/chosen": -607.4002075195312, "logps/rejected": -655.6839599609375, "loss": 0.6314, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.9455173015594482, "rewards/margins": 1.0688055753707886, "rewards/rejected": -4.0143232345581055, "step": 11500 }, { "epoch": 0.75, "learning_rate": 8.726381863824635e-07, "logits/chosen": -2.1254098415374756, "logits/rejected": -1.7734934091567993, "logps/chosen": -707.5592041015625, "logps/rejected": -625.3045654296875, "loss": 0.6464, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.804962635040283, "rewards/margins": 0.7859970331192017, "rewards/rejected": -3.5909600257873535, "step": 11510 }, { "epoch": 0.75, "learning_rate": 8.683078982960638e-07, "logits/chosen": -2.003600597381592, "logits/rejected": -1.9953582286834717, "logps/chosen": -544.07861328125, "logps/rejected": -613.3527221679688, "loss": 0.5258, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.647714138031006, "rewards/margins": 1.037721037864685, "rewards/rejected": -3.6854355335235596, "step": 11520 }, { "epoch": 0.75, "learning_rate": 8.639861219149584e-07, "logits/chosen": -1.9662593603134155, "logits/rejected": -2.1062169075012207, "logps/chosen": -553.8065185546875, "logps/rejected": -558.3656005859375, "loss": 0.6001, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1915595531463623, "rewards/margins": 0.26489943265914917, "rewards/rejected": -3.4564595222473145, "step": 11530 }, { "epoch": 0.76, "learning_rate": 8.596728797836532e-07, "logits/chosen": -2.110957145690918, "logits/rejected": -1.926509141921997, "logps/chosen": -597.6747436523438, "logps/rejected": -646.9539184570312, "loss": 0.453, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.424696445465088, "rewards/margins": 1.0680582523345947, "rewards/rejected": -3.4927546977996826, "step": 11540 }, { "epoch": 0.76, "learning_rate": 8.553681944021294e-07, "logits/chosen": -1.9674144983291626, "logits/rejected": -1.697922706604004, "logps/chosen": -564.5590209960938, "logps/rejected": -630.7199096679688, "loss": 0.5602, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.4485175609588623, "rewards/margins": 1.2627383470535278, "rewards/rejected": -3.7112560272216797, "step": 11550 }, { "epoch": 0.76, "learning_rate": 8.510720882257365e-07, "logits/chosen": -1.8311374187469482, "logits/rejected": -1.7656824588775635, "logps/chosen": -520.2479248046875, "logps/rejected": -593.451904296875, "loss": 0.4734, "rewards/accuracies": 0.75, "rewards/chosen": -2.4908149242401123, "rewards/margins": 0.8919802904129028, "rewards/rejected": -3.382795810699463, "step": 11560 }, { "epoch": 0.76, "learning_rate": 8.467845836650667e-07, "logits/chosen": -1.9729102849960327, "logits/rejected": -1.8888013362884521, "logps/chosen": -554.5764770507812, "logps/rejected": -618.1435546875, "loss": 0.3996, "rewards/accuracies": 0.75, "rewards/chosen": -2.3505473136901855, "rewards/margins": 1.1294513940811157, "rewards/rejected": -3.4799983501434326, "step": 11570 }, { "epoch": 0.76, "learning_rate": 8.425057030858461e-07, "logits/chosen": -1.491781234741211, "logits/rejected": -1.6472505331039429, "logps/chosen": -536.1841430664062, "logps/rejected": -794.4387817382812, "loss": 0.5567, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.124786376953125, "rewards/margins": 1.557202696800232, "rewards/rejected": -4.6819891929626465, "step": 11580 }, { "epoch": 0.76, "learning_rate": 8.382354688088098e-07, "logits/chosen": -2.0456268787384033, "logits/rejected": -1.848305106163025, "logps/chosen": -602.2803955078125, "logps/rejected": -660.6411743164062, "loss": 0.5364, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.787703275680542, "rewards/margins": 0.9418042898178101, "rewards/rejected": -3.7295074462890625, "step": 11590 }, { "epoch": 0.76, "learning_rate": 8.33973903109594e-07, "logits/chosen": -1.9836442470550537, "logits/rejected": -1.7316744327545166, "logps/chosen": -539.95654296875, "logps/rejected": -645.2711181640625, "loss": 0.5209, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5273334980010986, "rewards/margins": 1.3982807397842407, "rewards/rejected": -3.925614595413208, "step": 11600 }, { "epoch": 0.76, "learning_rate": 8.297210282186102e-07, "logits/chosen": -1.7507593631744385, "logits/rejected": -1.5172091722488403, "logps/chosen": -560.347412109375, "logps/rejected": -643.906982421875, "loss": 0.4369, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.6636176109313965, "rewards/margins": 1.5707944631576538, "rewards/rejected": -4.23441219329834, "step": 11610 }, { "epoch": 0.76, "learning_rate": 8.254768663209397e-07, "logits/chosen": -2.0388007164001465, "logits/rejected": -2.022284984588623, "logps/chosen": -560.9110107421875, "logps/rejected": -649.5015869140625, "loss": 0.4579, "rewards/accuracies": 0.75, "rewards/chosen": -2.4646971225738525, "rewards/margins": 0.8786503672599792, "rewards/rejected": -3.3433470726013184, "step": 11620 }, { "epoch": 0.76, "learning_rate": 8.212414395562079e-07, "logits/chosen": -1.997552514076233, "logits/rejected": -1.9959675073623657, "logps/chosen": -573.3985595703125, "logps/rejected": -581.78759765625, "loss": 0.5521, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.674731492996216, "rewards/margins": 1.2291405200958252, "rewards/rejected": -3.90387225151062, "step": 11630 }, { "epoch": 0.76, "learning_rate": 8.170147700184775e-07, "logits/chosen": -2.024963855743408, "logits/rejected": -1.524887204170227, "logps/chosen": -527.4414672851562, "logps/rejected": -653.4882202148438, "loss": 0.4484, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3810606002807617, "rewards/margins": 1.7915737628936768, "rewards/rejected": -4.172634124755859, "step": 11640 }, { "epoch": 0.76, "learning_rate": 8.127968797561242e-07, "logits/chosen": -1.9064247608184814, "logits/rejected": -1.88960862159729, "logps/chosen": -543.8555908203125, "logps/rejected": -640.4848022460938, "loss": 0.4843, "rewards/accuracies": 0.75, "rewards/chosen": -2.717148542404175, "rewards/margins": 1.105126142501831, "rewards/rejected": -3.8222744464874268, "step": 11650 }, { "epoch": 0.76, "learning_rate": 8.085877907717338e-07, "logits/chosen": -1.9744573831558228, "logits/rejected": -1.6091855764389038, "logps/chosen": -551.3575439453125, "logps/rejected": -611.7328491210938, "loss": 0.5567, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.031090259552002, "rewards/margins": 0.9378800392150879, "rewards/rejected": -3.9689698219299316, "step": 11660 }, { "epoch": 0.76, "learning_rate": 8.043875250219732e-07, "logits/chosen": -2.105393886566162, "logits/rejected": -2.0549044609069824, "logps/chosen": -649.9873046875, "logps/rejected": -710.1104125976562, "loss": 0.6793, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7803592681884766, "rewards/margins": 0.6869893074035645, "rewards/rejected": -3.46734881401062, "step": 11670 }, { "epoch": 0.76, "learning_rate": 8.001961044174881e-07, "logits/chosen": -2.1214375495910645, "logits/rejected": -1.462120771408081, "logps/chosen": -572.0277709960938, "logps/rejected": -650.1475830078125, "loss": 0.4588, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2363853454589844, "rewards/margins": 2.0137810707092285, "rewards/rejected": -4.250166416168213, "step": 11680 }, { "epoch": 0.76, "learning_rate": 7.960135508227795e-07, "logits/chosen": -1.8443381786346436, "logits/rejected": -1.9977306127548218, "logps/chosen": -605.5724487304688, "logps/rejected": -708.4366455078125, "loss": 0.5162, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.791918992996216, "rewards/margins": 1.0935614109039307, "rewards/rejected": -3.8854804039001465, "step": 11690 }, { "epoch": 0.77, "learning_rate": 7.91839886056098e-07, "logits/chosen": -2.0267434120178223, "logits/rejected": -1.9769598245620728, "logps/chosen": -524.788330078125, "logps/rejected": -591.4601440429688, "loss": 0.511, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.600790500640869, "rewards/margins": 0.8144540786743164, "rewards/rejected": -3.4152445793151855, "step": 11700 }, { "epoch": 0.77, "learning_rate": 7.876751318893217e-07, "logits/chosen": -1.7773189544677734, "logits/rejected": -1.4888404607772827, "logps/chosen": -524.0817260742188, "logps/rejected": -642.443359375, "loss": 0.5246, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.8395049571990967, "rewards/margins": 1.466451644897461, "rewards/rejected": -4.305956840515137, "step": 11710 }, { "epoch": 0.77, "learning_rate": 7.8351931004785e-07, "logits/chosen": -1.8321616649627686, "logits/rejected": -2.047708511352539, "logps/chosen": -599.1519775390625, "logps/rejected": -620.6925659179688, "loss": 0.5871, "rewards/accuracies": 0.75, "rewards/chosen": -2.9461781978607178, "rewards/margins": 0.5298594236373901, "rewards/rejected": -3.4760379791259766, "step": 11720 }, { "epoch": 0.77, "learning_rate": 7.793724422104834e-07, "logits/chosen": -1.9366436004638672, "logits/rejected": -1.9429668188095093, "logps/chosen": -535.4391479492188, "logps/rejected": -641.5661010742188, "loss": 0.4596, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.000441074371338, "rewards/margins": 1.0180342197418213, "rewards/rejected": -4.018475532531738, "step": 11730 }, { "epoch": 0.77, "learning_rate": 7.752345500093184e-07, "logits/chosen": -2.282815456390381, "logits/rejected": -1.8461005687713623, "logps/chosen": -655.4285888671875, "logps/rejected": -604.0984497070312, "loss": 0.584, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1589279174804688, "rewards/margins": 0.7532690763473511, "rewards/rejected": -3.912196636199951, "step": 11740 }, { "epoch": 0.77, "learning_rate": 7.711056550296253e-07, "logits/chosen": -2.036350727081299, "logits/rejected": -1.7405805587768555, "logps/chosen": -718.2552490234375, "logps/rejected": -759.4415283203125, "loss": 0.4641, "rewards/accuracies": 0.75, "rewards/chosen": -2.1789045333862305, "rewards/margins": 1.2369298934936523, "rewards/rejected": -3.415834903717041, "step": 11750 }, { "epoch": 0.77, "learning_rate": 7.669857788097445e-07, "logits/chosen": -2.080662965774536, "logits/rejected": -2.1915130615234375, "logps/chosen": -586.1531982421875, "logps/rejected": -603.7664794921875, "loss": 0.4661, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6133437156677246, "rewards/margins": 0.7611432671546936, "rewards/rejected": -3.3744869232177734, "step": 11760 }, { "epoch": 0.77, "learning_rate": 7.628749428409676e-07, "logits/chosen": -2.1952221393585205, "logits/rejected": -1.871019959449768, "logps/chosen": -557.4973754882812, "logps/rejected": -677.4476318359375, "loss": 0.498, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.662010669708252, "rewards/margins": 1.359654188156128, "rewards/rejected": -4.021664619445801, "step": 11770 }, { "epoch": 0.77, "learning_rate": 7.587731685674288e-07, "logits/chosen": -1.7733513116836548, "logits/rejected": -1.8451553583145142, "logps/chosen": -589.5215454101562, "logps/rejected": -735.2191772460938, "loss": 0.3707, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.250026226043701, "rewards/margins": 1.2411115169525146, "rewards/rejected": -4.491137504577637, "step": 11780 }, { "epoch": 0.77, "learning_rate": 7.546804773859931e-07, "logits/chosen": -2.1263413429260254, "logits/rejected": -2.1229681968688965, "logps/chosen": -594.2049560546875, "logps/rejected": -629.3919067382812, "loss": 0.6908, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.963937759399414, "rewards/margins": 0.38868817687034607, "rewards/rejected": -3.352626085281372, "step": 11790 }, { "epoch": 0.77, "learning_rate": 7.505968906461409e-07, "logits/chosen": -2.091522455215454, "logits/rejected": -2.1456050872802734, "logps/chosen": -592.6693115234375, "logps/rejected": -766.59765625, "loss": 0.4894, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7104580402374268, "rewards/margins": 1.1917150020599365, "rewards/rejected": -3.9021732807159424, "step": 11800 }, { "epoch": 0.77, "learning_rate": 7.465224296498627e-07, "logits/chosen": -2.224442958831787, "logits/rejected": -1.8706302642822266, "logps/chosen": -630.4420776367188, "logps/rejected": -593.1500854492188, "loss": 0.532, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5721988677978516, "rewards/margins": 0.990858256816864, "rewards/rejected": -3.5630574226379395, "step": 11810 }, { "epoch": 0.77, "learning_rate": 7.424571156515412e-07, "logits/chosen": -1.3914611339569092, "logits/rejected": -1.3528521060943604, "logps/chosen": -620.4548950195312, "logps/rejected": -719.0347290039062, "loss": 0.4782, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.7914605140686035, "rewards/margins": 1.3912131786346436, "rewards/rejected": -4.182673454284668, "step": 11820 }, { "epoch": 0.77, "learning_rate": 7.38400969857847e-07, "logits/chosen": -1.641289472579956, "logits/rejected": -1.6610963344573975, "logps/chosen": -525.3446655273438, "logps/rejected": -587.3547973632812, "loss": 0.5223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.275611639022827, "rewards/margins": 1.4554826021194458, "rewards/rejected": -3.7310938835144043, "step": 11830 }, { "epoch": 0.77, "learning_rate": 7.343540134276225e-07, "logits/chosen": -2.371286630630493, "logits/rejected": -1.728348970413208, "logps/chosen": -612.7855834960938, "logps/rejected": -588.4951171875, "loss": 0.5313, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.828519821166992, "rewards/margins": 0.8085176348686218, "rewards/rejected": -3.6370372772216797, "step": 11840 }, { "epoch": 0.78, "learning_rate": 7.303162674717762e-07, "logits/chosen": -1.7673263549804688, "logits/rejected": -1.8349840641021729, "logps/chosen": -436.9541015625, "logps/rejected": -561.4946899414062, "loss": 0.4587, "rewards/accuracies": 0.75, "rewards/chosen": -2.671754837036133, "rewards/margins": 1.310455560684204, "rewards/rejected": -3.982210636138916, "step": 11850 }, { "epoch": 0.78, "learning_rate": 7.26287753053167e-07, "logits/chosen": -1.8987102508544922, "logits/rejected": -1.6133962869644165, "logps/chosen": -660.6593017578125, "logps/rejected": -785.08837890625, "loss": 0.4893, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.577979564666748, "rewards/margins": 0.7468835711479187, "rewards/rejected": -4.324862957000732, "step": 11860 }, { "epoch": 0.78, "learning_rate": 7.222684911865013e-07, "logits/chosen": -2.0908069610595703, "logits/rejected": -2.007396697998047, "logps/chosen": -521.2072143554688, "logps/rejected": -706.6531982421875, "loss": 0.5221, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.3962066173553467, "rewards/margins": 1.5858217477798462, "rewards/rejected": -3.9820282459259033, "step": 11870 }, { "epoch": 0.78, "learning_rate": 7.182585028382166e-07, "logits/chosen": -2.124596357345581, "logits/rejected": -1.9639291763305664, "logps/chosen": -477.7953186035156, "logps/rejected": -597.7669677734375, "loss": 0.4659, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0256547927856445, "rewards/margins": 1.4007855653762817, "rewards/rejected": -3.426440477371216, "step": 11880 }, { "epoch": 0.78, "learning_rate": 7.142578089263769e-07, "logits/chosen": -1.9041650295257568, "logits/rejected": -1.9842296838760376, "logps/chosen": -532.4962158203125, "logps/rejected": -559.984375, "loss": 0.5181, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.582383632659912, "rewards/margins": 1.0075069665908813, "rewards/rejected": -3.589890718460083, "step": 11890 }, { "epoch": 0.78, "learning_rate": 7.102664303205611e-07, "logits/chosen": -1.924194574356079, "logits/rejected": -2.085660457611084, "logps/chosen": -715.51025390625, "logps/rejected": -694.806884765625, "loss": 0.4882, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.911158323287964, "rewards/margins": 0.8436439633369446, "rewards/rejected": -3.754802703857422, "step": 11900 }, { "epoch": 0.78, "learning_rate": 7.062843878417566e-07, "logits/chosen": -1.6597715616226196, "logits/rejected": -1.526272177696228, "logps/chosen": -548.4642333984375, "logps/rejected": -670.6319580078125, "loss": 0.5269, "rewards/accuracies": 0.75, "rewards/chosen": -2.9917690753936768, "rewards/margins": 1.377164363861084, "rewards/rejected": -4.36893367767334, "step": 11910 }, { "epoch": 0.78, "learning_rate": 7.023117022622458e-07, "logits/chosen": -2.1705479621887207, "logits/rejected": -1.7469104528427124, "logps/chosen": -606.1336059570312, "logps/rejected": -674.3630981445312, "loss": 0.6392, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.6440083980560303, "rewards/margins": 0.7526111006736755, "rewards/rejected": -3.3966193199157715, "step": 11920 }, { "epoch": 0.78, "learning_rate": 6.983483943055042e-07, "logits/chosen": -2.0007176399230957, "logits/rejected": -2.0033366680145264, "logps/chosen": -632.1116943359375, "logps/rejected": -632.0157470703125, "loss": 0.399, "rewards/accuracies": 0.75, "rewards/chosen": -2.597093105316162, "rewards/margins": 0.9765065312385559, "rewards/rejected": -3.5736000537872314, "step": 11930 }, { "epoch": 0.78, "learning_rate": 6.943944846460859e-07, "logits/chosen": -2.1100950241088867, "logits/rejected": -2.160198211669922, "logps/chosen": -613.911865234375, "logps/rejected": -709.337890625, "loss": 0.5362, "rewards/accuracies": 0.75, "rewards/chosen": -2.8297903537750244, "rewards/margins": 1.0146056413650513, "rewards/rejected": -3.8443961143493652, "step": 11940 }, { "epoch": 0.78, "learning_rate": 6.904499939095225e-07, "logits/chosen": -2.1457552909851074, "logits/rejected": -1.9467493295669556, "logps/chosen": -667.1052856445312, "logps/rejected": -671.9146728515625, "loss": 0.4131, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.0337657928466797, "rewards/margins": 1.3421456813812256, "rewards/rejected": -4.375911712646484, "step": 11950 }, { "epoch": 0.78, "learning_rate": 6.865149426722079e-07, "logits/chosen": -1.936374306678772, "logits/rejected": -1.9775865077972412, "logps/chosen": -548.2598876953125, "logps/rejected": -707.3658447265625, "loss": 0.6487, "rewards/accuracies": 0.75, "rewards/chosen": -2.3087007999420166, "rewards/margins": 1.3016471862792969, "rewards/rejected": -3.6103482246398926, "step": 11960 }, { "epoch": 0.78, "learning_rate": 6.825893514612985e-07, "logits/chosen": -2.113210439682007, "logits/rejected": -1.7311427593231201, "logps/chosen": -580.7791748046875, "logps/rejected": -764.8864135742188, "loss": 0.634, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5090651512145996, "rewards/margins": 0.9025558233261108, "rewards/rejected": -3.41162109375, "step": 11970 }, { "epoch": 0.78, "learning_rate": 6.786732407546001e-07, "logits/chosen": -1.7111217975616455, "logits/rejected": -1.5820856094360352, "logps/chosen": -530.7203369140625, "logps/rejected": -689.3048706054688, "loss": 0.5514, "rewards/accuracies": 0.75, "rewards/chosen": -2.502312421798706, "rewards/margins": 1.546095848083496, "rewards/rejected": -4.048408508300781, "step": 11980 }, { "epoch": 0.78, "learning_rate": 6.747666309804654e-07, "logits/chosen": -2.161839246749878, "logits/rejected": -2.282132625579834, "logps/chosen": -562.6868286132812, "logps/rejected": -631.7789916992188, "loss": 0.4637, "rewards/accuracies": 0.75, "rewards/chosen": -2.6618592739105225, "rewards/margins": 0.9789729118347168, "rewards/rejected": -3.6408324241638184, "step": 11990 }, { "epoch": 0.79, "learning_rate": 6.708695425176831e-07, "logits/chosen": -2.36330509185791, "logits/rejected": -1.784592866897583, "logps/chosen": -543.343017578125, "logps/rejected": -702.59619140625, "loss": 0.4636, "rewards/accuracies": 0.75, "rewards/chosen": -2.4326610565185547, "rewards/margins": 1.4380266666412354, "rewards/rejected": -3.870687484741211, "step": 12000 }, { "epoch": 0.79, "eval_logits/chosen": -2.010216236114502, "eval_logits/rejected": -1.832584023475647, "eval_logps/chosen": -559.8972778320312, "eval_logps/rejected": -653.1429443359375, "eval_loss": 0.5347319841384888, "eval_rewards/accuracies": 0.7450000047683716, "eval_rewards/chosen": -2.644517183303833, "eval_rewards/margins": 1.1328535079956055, "eval_rewards/rejected": -3.7773704528808594, "eval_runtime": 464.7985, "eval_samples_per_second": 4.303, "eval_steps_per_second": 2.151, "step": 12000 }, { "epoch": 0.79, "learning_rate": 6.669819956953768e-07, "logits/chosen": -2.558680295944214, "logits/rejected": -2.0116984844207764, "logps/chosen": -648.0056762695312, "logps/rejected": -632.8907470703125, "loss": 0.4691, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4394781589508057, "rewards/margins": 1.159771203994751, "rewards/rejected": -3.5992493629455566, "step": 12010 }, { "epoch": 0.79, "learning_rate": 6.631040107928957e-07, "logits/chosen": -2.110262632369995, "logits/rejected": -1.9674198627471924, "logps/chosen": -613.672119140625, "logps/rejected": -663.4327392578125, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": -2.3904502391815186, "rewards/margins": 1.1840265989303589, "rewards/rejected": -3.574476718902588, "step": 12020 }, { "epoch": 0.79, "learning_rate": 6.592356080397072e-07, "logits/chosen": -1.972866415977478, "logits/rejected": -1.4877679347991943, "logps/chosen": -497.69036865234375, "logps/rejected": -569.5394287109375, "loss": 0.4793, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.0033771991729736, "rewards/margins": 1.077380895614624, "rewards/rejected": -4.080758094787598, "step": 12030 }, { "epoch": 0.79, "learning_rate": 6.553768076152963e-07, "logits/chosen": -2.2271311283111572, "logits/rejected": -1.976681113243103, "logps/chosen": -623.8030395507812, "logps/rejected": -777.1211547851562, "loss": 0.5016, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.64901065826416, "rewards/margins": 1.1936619281768799, "rewards/rejected": -3.8426718711853027, "step": 12040 }, { "epoch": 0.79, "learning_rate": 6.51527629649055e-07, "logits/chosen": -1.7846431732177734, "logits/rejected": -1.7610366344451904, "logps/chosen": -601.7432861328125, "logps/rejected": -581.0494384765625, "loss": 0.5333, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1557106971740723, "rewards/margins": 1.2338597774505615, "rewards/rejected": -3.389570713043213, "step": 12050 }, { "epoch": 0.79, "learning_rate": 6.476880942201824e-07, "logits/chosen": -2.2647032737731934, "logits/rejected": -1.8640931844711304, "logps/chosen": -515.746337890625, "logps/rejected": -597.1239013671875, "loss": 0.5296, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.493758201599121, "rewards/margins": 1.0930613279342651, "rewards/rejected": -3.5868194103240967, "step": 12060 }, { "epoch": 0.79, "learning_rate": 6.438582213575748e-07, "logits/chosen": -1.9803247451782227, "logits/rejected": -1.8715168237686157, "logps/chosen": -485.79559326171875, "logps/rejected": -644.2310791015625, "loss": 0.3128, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.364370584487915, "rewards/margins": 1.6224912405014038, "rewards/rejected": -3.9868621826171875, "step": 12070 }, { "epoch": 0.79, "learning_rate": 6.400380310397267e-07, "logits/chosen": -2.011253833770752, "logits/rejected": -1.855588674545288, "logps/chosen": -585.7911987304688, "logps/rejected": -683.5035400390625, "loss": 0.5284, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2106995582580566, "rewards/margins": 0.894027590751648, "rewards/rejected": -3.104727029800415, "step": 12080 }, { "epoch": 0.79, "learning_rate": 6.362275431946202e-07, "logits/chosen": -1.5855903625488281, "logits/rejected": -2.165252923965454, "logps/chosen": -506.32354736328125, "logps/rejected": -659.8756103515625, "loss": 0.4614, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.271911144256592, "rewards/margins": 1.0305030345916748, "rewards/rejected": -4.3024139404296875, "step": 12090 }, { "epoch": 0.79, "learning_rate": 6.324267776996285e-07, "logits/chosen": -1.8862168788909912, "logits/rejected": -1.7354240417480469, "logps/chosen": -625.648681640625, "logps/rejected": -629.8529052734375, "loss": 0.5204, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6771509647369385, "rewards/margins": 0.4423960745334625, "rewards/rejected": -3.1195473670959473, "step": 12100 }, { "epoch": 0.79, "learning_rate": 6.286357543814045e-07, "logits/chosen": -2.4290318489074707, "logits/rejected": -2.139944553375244, "logps/chosen": -536.7593994140625, "logps/rejected": -699.2761840820312, "loss": 0.5809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0974652767181396, "rewards/margins": 0.9861791729927063, "rewards/rejected": -3.083644390106201, "step": 12110 }, { "epoch": 0.79, "learning_rate": 6.248544930157838e-07, "logits/chosen": -2.145630121231079, "logits/rejected": -1.9923956394195557, "logps/chosen": -544.7433471679688, "logps/rejected": -650.2592163085938, "loss": 0.5544, "rewards/accuracies": 0.75, "rewards/chosen": -2.7388195991516113, "rewards/margins": 1.1529513597488403, "rewards/rejected": -3.8917713165283203, "step": 12120 }, { "epoch": 0.79, "learning_rate": 6.21083013327678e-07, "logits/chosen": -1.69477117061615, "logits/rejected": -2.0762622356414795, "logps/chosen": -535.0198364257812, "logps/rejected": -640.6212158203125, "loss": 0.3952, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.796827793121338, "rewards/margins": 1.0736668109893799, "rewards/rejected": -3.8704943656921387, "step": 12130 }, { "epoch": 0.79, "learning_rate": 6.17321334990973e-07, "logits/chosen": -1.894301176071167, "logits/rejected": -2.067835807800293, "logps/chosen": -597.6411743164062, "logps/rejected": -640.9744262695312, "loss": 0.5247, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.738687753677368, "rewards/margins": 1.0224624872207642, "rewards/rejected": -3.761150360107422, "step": 12140 }, { "epoch": 0.79, "learning_rate": 6.135694776284243e-07, "logits/chosen": -2.353419780731201, "logits/rejected": -1.988925576210022, "logps/chosen": -696.5867309570312, "logps/rejected": -627.3082885742188, "loss": 0.5669, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.1813197135925293, "rewards/margins": 0.7509251832962036, "rewards/rejected": -2.9322447776794434, "step": 12150 }, { "epoch": 0.8, "learning_rate": 6.098274608115595e-07, "logits/chosen": -1.694628357887268, "logits/rejected": -1.076794981956482, "logps/chosen": -584.422119140625, "logps/rejected": -619.1571044921875, "loss": 0.3698, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.3269131183624268, "rewards/margins": 1.1638798713684082, "rewards/rejected": -4.490792751312256, "step": 12160 }, { "epoch": 0.8, "learning_rate": 6.060953040605697e-07, "logits/chosen": -1.6705732345581055, "logits/rejected": -1.7893667221069336, "logps/chosen": -557.1876220703125, "logps/rejected": -576.4094848632812, "loss": 0.6879, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9966461658477783, "rewards/margins": 0.26419544219970703, "rewards/rejected": -3.2608418464660645, "step": 12170 }, { "epoch": 0.8, "learning_rate": 6.023730268442144e-07, "logits/chosen": -1.9730228185653687, "logits/rejected": -1.8565328121185303, "logps/chosen": -528.3025512695312, "logps/rejected": -596.0540771484375, "loss": 0.4897, "rewards/accuracies": 0.75, "rewards/chosen": -2.5707285404205322, "rewards/margins": 1.0148826837539673, "rewards/rejected": -3.585610866546631, "step": 12180 }, { "epoch": 0.8, "learning_rate": 5.986606485797131e-07, "logits/chosen": -2.4206955432891846, "logits/rejected": -2.0519917011260986, "logps/chosen": -650.331787109375, "logps/rejected": -709.4451293945312, "loss": 0.5735, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7515320777893066, "rewards/margins": 1.218911051750183, "rewards/rejected": -3.9704430103302, "step": 12190 }, { "epoch": 0.8, "learning_rate": 5.949581886326511e-07, "logits/chosen": -2.2249879837036133, "logits/rejected": -2.350133180618286, "logps/chosen": -580.1309814453125, "logps/rejected": -681.6095581054688, "loss": 0.6241, "rewards/accuracies": 0.75, "rewards/chosen": -2.244213342666626, "rewards/margins": 1.0740654468536377, "rewards/rejected": -3.3182787895202637, "step": 12200 }, { "epoch": 0.8, "learning_rate": 5.912656663168717e-07, "logits/chosen": -1.710314154624939, "logits/rejected": -1.9418649673461914, "logps/chosen": -567.335205078125, "logps/rejected": -693.5281982421875, "loss": 0.5183, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8011791706085205, "rewards/margins": 0.9482982754707336, "rewards/rejected": -3.7494773864746094, "step": 12210 }, { "epoch": 0.8, "learning_rate": 5.875831008943817e-07, "logits/chosen": -1.989551305770874, "logits/rejected": -2.0580105781555176, "logps/chosen": -510.07159423828125, "logps/rejected": -602.3060302734375, "loss": 0.5503, "rewards/accuracies": 0.75, "rewards/chosen": -2.1793808937072754, "rewards/margins": 1.047814965248108, "rewards/rejected": -3.227196216583252, "step": 12220 }, { "epoch": 0.8, "learning_rate": 5.839105115752442e-07, "logits/chosen": -2.4286434650421143, "logits/rejected": -2.187541961669922, "logps/chosen": -653.9439086914062, "logps/rejected": -668.8286743164062, "loss": 0.58, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.599398612976074, "rewards/margins": 0.5656057596206665, "rewards/rejected": -3.165004253387451, "step": 12230 }, { "epoch": 0.8, "learning_rate": 5.802479175174855e-07, "logits/chosen": -2.245690107345581, "logits/rejected": -1.6115894317626953, "logps/chosen": -503.63055419921875, "logps/rejected": -713.26513671875, "loss": 0.4681, "rewards/accuracies": 0.75, "rewards/chosen": -2.519134521484375, "rewards/margins": 1.466404914855957, "rewards/rejected": -3.985539197921753, "step": 12240 }, { "epoch": 0.8, "learning_rate": 5.765953378269901e-07, "logits/chosen": -1.9824949502944946, "logits/rejected": -2.0004501342773438, "logps/chosen": -565.99755859375, "logps/rejected": -666.8563232421875, "loss": 0.7208, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.5569775104522705, "rewards/margins": 0.556684136390686, "rewards/rejected": -3.113661766052246, "step": 12250 }, { "epoch": 0.8, "learning_rate": 5.729527915574037e-07, "logits/chosen": -2.390829086303711, "logits/rejected": -1.928675651550293, "logps/chosen": -580.5411376953125, "logps/rejected": -638.8826293945312, "loss": 0.4666, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4753901958465576, "rewards/margins": 1.4707015752792358, "rewards/rejected": -3.946091413497925, "step": 12260 }, { "epoch": 0.8, "learning_rate": 5.693202977100304e-07, "logits/chosen": -1.7291290760040283, "logits/rejected": -2.0605156421661377, "logps/chosen": -541.4114379882812, "logps/rejected": -680.072998046875, "loss": 0.4824, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.732806921005249, "rewards/margins": 1.2888050079345703, "rewards/rejected": -4.021612167358398, "step": 12270 }, { "epoch": 0.8, "learning_rate": 5.656978752337389e-07, "logits/chosen": -1.7255207300186157, "logits/rejected": -2.1508889198303223, "logps/chosen": -496.97125244140625, "logps/rejected": -799.4225463867188, "loss": 0.6514, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.5759778022766113, "rewards/margins": 1.273808240890503, "rewards/rejected": -3.8497862815856934, "step": 12280 }, { "epoch": 0.8, "learning_rate": 5.620855430248581e-07, "logits/chosen": -1.9934139251708984, "logits/rejected": -1.6731551885604858, "logps/chosen": -532.9713745117188, "logps/rejected": -620.8679809570312, "loss": 0.49, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7751731872558594, "rewards/margins": 0.825658917427063, "rewards/rejected": -3.6008315086364746, "step": 12290 }, { "epoch": 0.8, "learning_rate": 5.584833199270837e-07, "logits/chosen": -1.892397165298462, "logits/rejected": -1.9254367351531982, "logps/chosen": -442.7142639160156, "logps/rejected": -661.3265380859375, "loss": 0.4687, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.443904399871826, "rewards/margins": 1.6627155542373657, "rewards/rejected": -4.1066203117370605, "step": 12300 }, { "epoch": 0.81, "learning_rate": 5.548912247313742e-07, "logits/chosen": -1.9033715724945068, "logits/rejected": -1.8007004261016846, "logps/chosen": -531.8680419921875, "logps/rejected": -670.9957275390625, "loss": 0.5136, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.654646396636963, "rewards/margins": 1.2657206058502197, "rewards/rejected": -3.9203670024871826, "step": 12310 }, { "epoch": 0.81, "learning_rate": 5.513092761758596e-07, "logits/chosen": -2.130232334136963, "logits/rejected": -1.8414499759674072, "logps/chosen": -601.0474243164062, "logps/rejected": -640.6214599609375, "loss": 0.4741, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4780101776123047, "rewards/margins": 0.6366981863975525, "rewards/rejected": -3.114708423614502, "step": 12320 }, { "epoch": 0.81, "learning_rate": 5.477374929457363e-07, "logits/chosen": -2.0072124004364014, "logits/rejected": -2.020477056503296, "logps/chosen": -496.66497802734375, "logps/rejected": -574.7835693359375, "loss": 0.4712, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.09798526763916, "rewards/margins": 1.52622389793396, "rewards/rejected": -3.624208927154541, "step": 12330 }, { "epoch": 0.81, "learning_rate": 5.441758936731772e-07, "logits/chosen": -1.8541311025619507, "logits/rejected": -1.9003242254257202, "logps/chosen": -584.4236450195312, "logps/rejected": -690.5775146484375, "loss": 0.6396, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6013457775115967, "rewards/margins": 1.170657992362976, "rewards/rejected": -3.7720043659210205, "step": 12340 }, { "epoch": 0.81, "learning_rate": 5.406244969372273e-07, "logits/chosen": -2.2977888584136963, "logits/rejected": -2.159036636352539, "logps/chosen": -559.5189208984375, "logps/rejected": -704.7296752929688, "loss": 0.4578, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.185288906097412, "rewards/margins": 1.2700397968292236, "rewards/rejected": -3.4553287029266357, "step": 12350 }, { "epoch": 0.81, "learning_rate": 5.370833212637122e-07, "logits/chosen": -1.5719448328018188, "logits/rejected": -1.7477598190307617, "logps/chosen": -432.41937255859375, "logps/rejected": -650.3663330078125, "loss": 0.618, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.04536509513855, "rewards/margins": 0.9212772250175476, "rewards/rejected": -3.966642379760742, "step": 12360 }, { "epoch": 0.81, "learning_rate": 5.335523851251392e-07, "logits/chosen": -2.318669319152832, "logits/rejected": -1.7633874416351318, "logps/chosen": -623.6515502929688, "logps/rejected": -614.4036865234375, "loss": 0.4593, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4246182441711426, "rewards/margins": 1.146517038345337, "rewards/rejected": -3.5711350440979004, "step": 12370 }, { "epoch": 0.81, "learning_rate": 5.300317069406003e-07, "logits/chosen": -1.8068931102752686, "logits/rejected": -1.7986596822738647, "logps/chosen": -674.0821533203125, "logps/rejected": -612.6988525390625, "loss": 0.5668, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.4161953926086426, "rewards/margins": 0.4039956033229828, "rewards/rejected": -3.8201911449432373, "step": 12380 }, { "epoch": 0.81, "learning_rate": 5.265213050756782e-07, "logits/chosen": -2.1107888221740723, "logits/rejected": -2.1840109825134277, "logps/chosen": -764.8612060546875, "logps/rejected": -786.1771240234375, "loss": 0.3672, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -3.1414551734924316, "rewards/margins": 1.0852000713348389, "rewards/rejected": -4.226655006408691, "step": 12390 }, { "epoch": 0.81, "learning_rate": 5.230211978423477e-07, "logits/chosen": -1.7935606241226196, "logits/rejected": -2.1159043312072754, "logps/chosen": -557.9053955078125, "logps/rejected": -678.38525390625, "loss": 0.5979, "rewards/accuracies": 0.75, "rewards/chosen": -2.4887566566467285, "rewards/margins": 1.6448196172714233, "rewards/rejected": -4.133576393127441, "step": 12400 }, { "epoch": 0.81, "learning_rate": 5.195314034988835e-07, "logits/chosen": -1.6798433065414429, "logits/rejected": -1.9338346719741821, "logps/chosen": -492.1327209472656, "logps/rejected": -591.898681640625, "loss": 0.6946, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.843254804611206, "rewards/margins": 0.31330960988998413, "rewards/rejected": -3.156564712524414, "step": 12410 }, { "epoch": 0.81, "learning_rate": 5.160519402497616e-07, "logits/chosen": -2.2170321941375732, "logits/rejected": -1.2710682153701782, "logps/chosen": -694.8778076171875, "logps/rejected": -638.19287109375, "loss": 0.3549, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.887610912322998, "rewards/margins": 1.2259972095489502, "rewards/rejected": -4.113608360290527, "step": 12420 }, { "epoch": 0.81, "learning_rate": 5.125828262455679e-07, "logits/chosen": -1.9972445964813232, "logits/rejected": -1.5846986770629883, "logps/chosen": -585.0384521484375, "logps/rejected": -653.4993896484375, "loss": 0.5703, "rewards/accuracies": 0.75, "rewards/chosen": -2.0784976482391357, "rewards/margins": 1.2749563455581665, "rewards/rejected": -3.3534538745880127, "step": 12430 }, { "epoch": 0.81, "learning_rate": 5.091240795828992e-07, "logits/chosen": -2.2598729133605957, "logits/rejected": -2.0431060791015625, "logps/chosen": -642.5684814453125, "logps/rejected": -509.0912170410156, "loss": 0.794, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7314934730529785, "rewards/margins": 0.42480888962745667, "rewards/rejected": -3.156301975250244, "step": 12440 }, { "epoch": 0.81, "learning_rate": 5.056757183042732e-07, "logits/chosen": -2.065981388092041, "logits/rejected": -1.5648051500320435, "logps/chosen": -562.4705810546875, "logps/rejected": -604.2764892578125, "loss": 0.6376, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8458163738250732, "rewards/margins": 0.7787143588066101, "rewards/rejected": -3.624530792236328, "step": 12450 }, { "epoch": 0.82, "learning_rate": 5.022377603980308e-07, "logits/chosen": -1.9168132543563843, "logits/rejected": -1.6493221521377563, "logps/chosen": -689.4484252929688, "logps/rejected": -724.8370361328125, "loss": 0.4035, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7570643424987793, "rewards/margins": 1.5779832601547241, "rewards/rejected": -4.335047721862793, "step": 12460 }, { "epoch": 0.82, "learning_rate": 4.988102237982454e-07, "logits/chosen": -1.7574495077133179, "logits/rejected": -2.05273699760437, "logps/chosen": -525.7030029296875, "logps/rejected": -648.1375732421875, "loss": 0.4259, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.600522994995117, "rewards/margins": 1.2422122955322266, "rewards/rejected": -3.8427352905273438, "step": 12470 }, { "epoch": 0.82, "learning_rate": 4.953931263846251e-07, "logits/chosen": -2.0417659282684326, "logits/rejected": -1.957859754562378, "logps/chosen": -608.4056396484375, "logps/rejected": -740.5634155273438, "loss": 0.5116, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.7717933654785156, "rewards/margins": 0.4688115119934082, "rewards/rejected": -3.240605115890503, "step": 12480 }, { "epoch": 0.82, "learning_rate": 4.919864859824266e-07, "logits/chosen": -1.8863499164581299, "logits/rejected": -1.8030261993408203, "logps/chosen": -553.4390869140625, "logps/rejected": -671.4093627929688, "loss": 0.5524, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.732145071029663, "rewards/margins": 1.1588938236236572, "rewards/rejected": -3.8910393714904785, "step": 12490 }, { "epoch": 0.82, "learning_rate": 4.885903203623532e-07, "logits/chosen": -1.9563652276992798, "logits/rejected": -1.9688422679901123, "logps/chosen": -550.7012939453125, "logps/rejected": -624.7869873046875, "loss": 0.4811, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5207536220550537, "rewards/margins": 0.9616984128952026, "rewards/rejected": -3.482451915740967, "step": 12500 }, { "epoch": 0.82, "learning_rate": 4.852046472404695e-07, "logits/chosen": -2.0069515705108643, "logits/rejected": -1.915759801864624, "logps/chosen": -574.3663330078125, "logps/rejected": -675.5836791992188, "loss": 0.5176, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.898287534713745, "rewards/margins": 1.1114394664764404, "rewards/rejected": -4.0097270011901855, "step": 12510 }, { "epoch": 0.82, "learning_rate": 4.818294842781035e-07, "logits/chosen": -1.8245235681533813, "logits/rejected": -1.6396923065185547, "logps/chosen": -511.2354431152344, "logps/rejected": -533.2288818359375, "loss": 0.5954, "rewards/accuracies": 0.75, "rewards/chosen": -2.424683094024658, "rewards/margins": 0.762967586517334, "rewards/rejected": -3.187650442123413, "step": 12520 }, { "epoch": 0.82, "learning_rate": 4.784648490817601e-07, "logits/chosen": -2.390002727508545, "logits/rejected": -1.4948108196258545, "logps/chosen": -599.1458740234375, "logps/rejected": -640.5682373046875, "loss": 0.5618, "rewards/accuracies": 0.75, "rewards/chosen": -2.689018726348877, "rewards/margins": 1.4242786169052124, "rewards/rejected": -4.113297462463379, "step": 12530 }, { "epoch": 0.82, "learning_rate": 4.751107592030235e-07, "logits/chosen": -2.0932703018188477, "logits/rejected": -1.7733566761016846, "logps/chosen": -610.6126708984375, "logps/rejected": -719.3922119140625, "loss": 0.5577, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9133660793304443, "rewards/margins": 1.0601561069488525, "rewards/rejected": -3.9735217094421387, "step": 12540 }, { "epoch": 0.82, "learning_rate": 4.717672321384703e-07, "logits/chosen": -1.5202213525772095, "logits/rejected": -1.974769949913025, "logps/chosen": -458.13836669921875, "logps/rejected": -612.10986328125, "loss": 0.4942, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4996895790100098, "rewards/margins": 1.1478599309921265, "rewards/rejected": -3.6475493907928467, "step": 12550 }, { "epoch": 0.82, "learning_rate": 4.684342853295748e-07, "logits/chosen": -1.9798561334609985, "logits/rejected": -1.7142130136489868, "logps/chosen": -574.4808349609375, "logps/rejected": -703.8031616210938, "loss": 0.5167, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6047985553741455, "rewards/margins": 1.0268545150756836, "rewards/rejected": -3.6316535472869873, "step": 12560 }, { "epoch": 0.82, "learning_rate": 4.651119361626213e-07, "logits/chosen": -2.03121018409729, "logits/rejected": -1.7948925495147705, "logps/chosen": -556.2425537109375, "logps/rejected": -572.4705810546875, "loss": 0.6544, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.823820114135742, "rewards/margins": 0.2980334162712097, "rewards/rejected": -3.1218535900115967, "step": 12570 }, { "epoch": 0.82, "learning_rate": 4.618002019686091e-07, "logits/chosen": -2.1452739238739014, "logits/rejected": -1.3949072360992432, "logps/chosen": -565.899169921875, "logps/rejected": -617.302734375, "loss": 0.4045, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.528507947921753, "rewards/margins": 1.3577812910079956, "rewards/rejected": -3.886289596557617, "step": 12580 }, { "epoch": 0.82, "learning_rate": 4.5849910002316757e-07, "logits/chosen": -2.036592483520508, "logits/rejected": -2.1171741485595703, "logps/chosen": -571.2657470703125, "logps/rejected": -675.0497436523438, "loss": 0.5223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.716240644454956, "rewards/margins": 1.0484824180603027, "rewards/rejected": -3.764723300933838, "step": 12590 }, { "epoch": 0.82, "learning_rate": 4.5520864754645984e-07, "logits/chosen": -2.0893242359161377, "logits/rejected": -2.1231119632720947, "logps/chosen": -479.87066650390625, "logps/rejected": -635.230224609375, "loss": 0.5501, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6188578605651855, "rewards/margins": 0.9830479621887207, "rewards/rejected": -3.6019058227539062, "step": 12600 }, { "epoch": 0.83, "learning_rate": 4.5192886170309896e-07, "logits/chosen": -1.8651552200317383, "logits/rejected": -1.6246029138565063, "logps/chosen": -573.304931640625, "logps/rejected": -564.4774169921875, "loss": 0.5632, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.2361462116241455, "rewards/margins": 0.16420508921146393, "rewards/rejected": -3.4003512859344482, "step": 12610 }, { "epoch": 0.83, "learning_rate": 4.486597596020548e-07, "logits/chosen": -1.7377746105194092, "logits/rejected": -1.476535439491272, "logps/chosen": -492.79290771484375, "logps/rejected": -609.2454833984375, "loss": 0.559, "rewards/accuracies": 0.75, "rewards/chosen": -2.638640880584717, "rewards/margins": 1.4015352725982666, "rewards/rejected": -4.040175914764404, "step": 12620 }, { "epoch": 0.83, "learning_rate": 4.454013582965644e-07, "logits/chosen": -1.9276511669158936, "logits/rejected": -1.6506223678588867, "logps/chosen": -475.0560607910156, "logps/rejected": -581.9962158203125, "loss": 0.457, "rewards/accuracies": 0.75, "rewards/chosen": -2.385908365249634, "rewards/margins": 1.3607556819915771, "rewards/rejected": -3.746664047241211, "step": 12630 }, { "epoch": 0.83, "learning_rate": 4.4215367478404605e-07, "logits/chosen": -1.850710153579712, "logits/rejected": -1.9889118671417236, "logps/chosen": -599.2052001953125, "logps/rejected": -740.7207641601562, "loss": 0.4639, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.972484827041626, "rewards/margins": 1.1469919681549072, "rewards/rejected": -4.119476795196533, "step": 12640 }, { "epoch": 0.83, "learning_rate": 4.389167260060068e-07, "logits/chosen": -2.1288907527923584, "logits/rejected": -1.8164348602294922, "logps/chosen": -669.3413696289062, "logps/rejected": -723.665283203125, "loss": 0.2786, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3347713947296143, "rewards/margins": 1.8705580234527588, "rewards/rejected": -4.205329418182373, "step": 12650 }, { "epoch": 0.83, "learning_rate": 4.356905288479579e-07, "logits/chosen": -1.9049745798110962, "logits/rejected": -2.047344207763672, "logps/chosen": -539.6206665039062, "logps/rejected": -662.4989013671875, "loss": 0.5003, "rewards/accuracies": 0.75, "rewards/chosen": -2.6522750854492188, "rewards/margins": 1.5446138381958008, "rewards/rejected": -4.1968889236450195, "step": 12660 }, { "epoch": 0.83, "learning_rate": 4.3247510013932377e-07, "logits/chosen": -1.9219976663589478, "logits/rejected": -1.8390038013458252, "logps/chosen": -500.98199462890625, "logps/rejected": -563.2193603515625, "loss": 0.6404, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.376319646835327, "rewards/margins": 0.6220875978469849, "rewards/rejected": -2.9984073638916016, "step": 12670 }, { "epoch": 0.83, "learning_rate": 4.2927045665335594e-07, "logits/chosen": -1.747137427330017, "logits/rejected": -1.8025964498519897, "logps/chosen": -678.0807495117188, "logps/rejected": -724.901611328125, "loss": 0.5937, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0370450019836426, "rewards/margins": 0.9841930270195007, "rewards/rejected": -4.021238327026367, "step": 12680 }, { "epoch": 0.83, "learning_rate": 4.260766151070439e-07, "logits/chosen": -1.9573780298233032, "logits/rejected": -1.9837071895599365, "logps/chosen": -629.7701416015625, "logps/rejected": -728.9971313476562, "loss": 0.4825, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3575403690338135, "rewards/margins": 1.7600396871566772, "rewards/rejected": -4.117579460144043, "step": 12690 }, { "epoch": 0.83, "learning_rate": 4.228935921610308e-07, "logits/chosen": -2.112596035003662, "logits/rejected": -2.3389065265655518, "logps/chosen": -511.53118896484375, "logps/rejected": -678.299072265625, "loss": 0.6924, "rewards/accuracies": 0.5, "rewards/chosen": -2.821420669555664, "rewards/margins": 0.20472578704357147, "rewards/rejected": -3.026146650314331, "step": 12700 }, { "epoch": 0.83, "learning_rate": 4.1972140441952246e-07, "logits/chosen": -2.140105962753296, "logits/rejected": -1.904766321182251, "logps/chosen": -576.4169311523438, "logps/rejected": -638.1660766601562, "loss": 0.582, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.951691150665283, "rewards/margins": 0.9234949350357056, "rewards/rejected": -3.8751864433288574, "step": 12710 }, { "epoch": 0.83, "learning_rate": 4.165600684302046e-07, "logits/chosen": -2.0236175060272217, "logits/rejected": -2.025157928466797, "logps/chosen": -698.0067749023438, "logps/rejected": -652.054931640625, "loss": 0.4429, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9516820907592773, "rewards/margins": 0.9500513076782227, "rewards/rejected": -2.901733160018921, "step": 12720 }, { "epoch": 0.83, "learning_rate": 4.13409600684154e-07, "logits/chosen": -2.2495474815368652, "logits/rejected": -1.8680133819580078, "logps/chosen": -604.0130615234375, "logps/rejected": -690.2644653320312, "loss": 0.3482, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.2546908855438232, "rewards/margins": 1.5465089082717896, "rewards/rejected": -3.8011996746063232, "step": 12730 }, { "epoch": 0.83, "learning_rate": 4.102700176157548e-07, "logits/chosen": -2.0701842308044434, "logits/rejected": -1.9402803182601929, "logps/chosen": -646.2323608398438, "logps/rejected": -698.7376708984375, "loss": 0.4756, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0139358043670654, "rewards/margins": 0.7653390765190125, "rewards/rejected": -3.7792751789093018, "step": 12740 }, { "epoch": 0.83, "learning_rate": 4.0714133560260884e-07, "logits/chosen": -1.6190227270126343, "logits/rejected": -1.9803718328475952, "logps/chosen": -571.6145629882812, "logps/rejected": -689.69287109375, "loss": 0.5, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9178075790405273, "rewards/margins": 1.2299944162368774, "rewards/rejected": -4.147801876068115, "step": 12750 }, { "epoch": 0.83, "learning_rate": 4.0402357096545527e-07, "logits/chosen": -2.0898361206054688, "logits/rejected": -2.194303035736084, "logps/chosen": -600.5381469726562, "logps/rejected": -672.1326293945312, "loss": 0.4669, "rewards/accuracies": 0.75, "rewards/chosen": -2.165907144546509, "rewards/margins": 1.0310553312301636, "rewards/rejected": -3.196962356567383, "step": 12760 }, { "epoch": 0.84, "learning_rate": 4.0091673996808025e-07, "logits/chosen": -2.037740468978882, "logits/rejected": -1.6166813373565674, "logps/chosen": -600.8524169921875, "logps/rejected": -612.1278686523438, "loss": 0.5125, "rewards/accuracies": 0.75, "rewards/chosen": -2.304969549179077, "rewards/margins": 1.0908288955688477, "rewards/rejected": -3.395798444747925, "step": 12770 }, { "epoch": 0.84, "learning_rate": 3.9782085881723776e-07, "logits/chosen": -1.8339916467666626, "logits/rejected": -2.191059112548828, "logps/chosen": -540.3582153320312, "logps/rejected": -670.88232421875, "loss": 0.5695, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.28513765335083, "rewards/margins": 0.8673833608627319, "rewards/rejected": -4.152520656585693, "step": 12780 }, { "epoch": 0.84, "learning_rate": 3.947359436625592e-07, "logits/chosen": -1.8163385391235352, "logits/rejected": -1.6801373958587646, "logps/chosen": -467.11376953125, "logps/rejected": -692.0794677734375, "loss": 0.3275, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.140580654144287, "rewards/margins": 2.241574287414551, "rewards/rejected": -4.382154941558838, "step": 12790 }, { "epoch": 0.84, "learning_rate": 3.9166201059647386e-07, "logits/chosen": -2.2520198822021484, "logits/rejected": -1.7607390880584717, "logps/chosen": -490.7139587402344, "logps/rejected": -604.9755859375, "loss": 0.4137, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.212995767593384, "rewards/margins": 1.3278906345367432, "rewards/rejected": -3.540886402130127, "step": 12800 }, { "epoch": 0.84, "learning_rate": 3.8859907565412194e-07, "logits/chosen": -2.5652875900268555, "logits/rejected": -1.9722869396209717, "logps/chosen": -594.1384887695312, "logps/rejected": -624.1168823242188, "loss": 0.6008, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7919609546661377, "rewards/margins": 1.0597692728042603, "rewards/rejected": -3.85172963142395, "step": 12810 }, { "epoch": 0.84, "learning_rate": 3.8554715481327303e-07, "logits/chosen": -2.1508398056030273, "logits/rejected": -1.7388432025909424, "logps/chosen": -571.0724487304688, "logps/rejected": -676.97998046875, "loss": 0.4576, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.288452386856079, "rewards/margins": 1.5518442392349243, "rewards/rejected": -3.8402962684631348, "step": 12820 }, { "epoch": 0.84, "learning_rate": 3.8250626399424007e-07, "logits/chosen": -2.3599681854248047, "logits/rejected": -2.067962408065796, "logps/chosen": -582.147216796875, "logps/rejected": -634.3817749023438, "loss": 0.6027, "rewards/accuracies": 0.75, "rewards/chosen": -1.914297103881836, "rewards/margins": 0.8835035562515259, "rewards/rejected": -2.7978005409240723, "step": 12830 }, { "epoch": 0.84, "learning_rate": 3.7947641905980104e-07, "logits/chosen": -2.2323431968688965, "logits/rejected": -1.75969660282135, "logps/chosen": -532.4102172851562, "logps/rejected": -586.7337646484375, "loss": 0.377, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.3225793838500977, "rewards/margins": 1.504718542098999, "rewards/rejected": -3.8272979259490967, "step": 12840 }, { "epoch": 0.84, "learning_rate": 3.764576358151098e-07, "logits/chosen": -1.6704959869384766, "logits/rejected": -1.1709400415420532, "logps/chosen": -492.19659423828125, "logps/rejected": -505.697021484375, "loss": 0.606, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.853659152984619, "rewards/margins": 0.8451746106147766, "rewards/rejected": -3.69883394241333, "step": 12850 }, { "epoch": 0.84, "learning_rate": 3.7344993000761944e-07, "logits/chosen": -2.033820390701294, "logits/rejected": -2.1698267459869385, "logps/chosen": -602.9301147460938, "logps/rejected": -742.5383911132812, "loss": 0.5597, "rewards/accuracies": 0.75, "rewards/chosen": -2.648543119430542, "rewards/margins": 0.9859378933906555, "rewards/rejected": -3.634481430053711, "step": 12860 }, { "epoch": 0.84, "learning_rate": 3.7045331732699585e-07, "logits/chosen": -2.0151562690734863, "logits/rejected": -1.9181638956069946, "logps/chosen": -651.0270385742188, "logps/rejected": -691.8004760742188, "loss": 0.5002, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.936880111694336, "rewards/margins": 1.0908117294311523, "rewards/rejected": -4.027691841125488, "step": 12870 }, { "epoch": 0.84, "learning_rate": 3.6746781340503993e-07, "logits/chosen": -2.1583030223846436, "logits/rejected": -1.5957120656967163, "logps/chosen": -512.1683349609375, "logps/rejected": -653.0682983398438, "loss": 0.4046, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6026124954223633, "rewards/margins": 1.51370370388031, "rewards/rejected": -4.116316318511963, "step": 12880 }, { "epoch": 0.84, "learning_rate": 3.6449343381560116e-07, "logits/chosen": -2.0830488204956055, "logits/rejected": -1.7334263324737549, "logps/chosen": -628.7766723632812, "logps/rejected": -617.7608032226562, "loss": 0.6826, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.146591901779175, "rewards/margins": 0.6630234718322754, "rewards/rejected": -3.80961537361145, "step": 12890 }, { "epoch": 0.84, "learning_rate": 3.615301940745017e-07, "logits/chosen": -2.1485512256622314, "logits/rejected": -2.1845757961273193, "logps/chosen": -547.0929565429688, "logps/rejected": -692.17919921875, "loss": 0.5561, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4031753540039062, "rewards/margins": 0.7044495344161987, "rewards/rejected": -3.1076247692108154, "step": 12900 }, { "epoch": 0.84, "learning_rate": 3.5857810963945084e-07, "logits/chosen": -2.192819595336914, "logits/rejected": -2.05318284034729, "logps/chosen": -594.884765625, "logps/rejected": -599.3443603515625, "loss": 0.7412, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6752190589904785, "rewards/margins": 0.33076637983322144, "rewards/rejected": -3.005985736846924, "step": 12910 }, { "epoch": 0.85, "learning_rate": 3.556371959099678e-07, "logits/chosen": -2.1900582313537598, "logits/rejected": -1.5971273183822632, "logps/chosen": -542.5919799804688, "logps/rejected": -571.1941528320312, "loss": 0.4925, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.249591112136841, "rewards/margins": 0.8787922859191895, "rewards/rejected": -3.1283833980560303, "step": 12920 }, { "epoch": 0.85, "learning_rate": 3.5270746822729797e-07, "logits/chosen": -1.9319312572479248, "logits/rejected": -1.6746313571929932, "logps/chosen": -597.1219482421875, "logps/rejected": -586.4876708984375, "loss": 0.4624, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0332717895507812, "rewards/margins": 1.236446499824524, "rewards/rejected": -3.269718647003174, "step": 12930 }, { "epoch": 0.85, "learning_rate": 3.4978894187433746e-07, "logits/chosen": -2.0437123775482178, "logits/rejected": -2.0197818279266357, "logps/chosen": -532.19677734375, "logps/rejected": -666.5894775390625, "loss": 0.5365, "rewards/accuracies": 0.75, "rewards/chosen": -2.8714423179626465, "rewards/margins": 0.9335220456123352, "rewards/rejected": -3.804964542388916, "step": 12940 }, { "epoch": 0.85, "learning_rate": 3.468816320755486e-07, "logits/chosen": -1.7541229724884033, "logits/rejected": -1.7077795267105103, "logps/chosen": -509.0663146972656, "logps/rejected": -753.5894165039062, "loss": 0.4303, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6254935264587402, "rewards/margins": 1.4224083423614502, "rewards/rejected": -4.0479021072387695, "step": 12950 }, { "epoch": 0.85, "learning_rate": 3.4398555399688336e-07, "logits/chosen": -1.9851763248443604, "logits/rejected": -1.3425769805908203, "logps/chosen": -554.126953125, "logps/rejected": -561.9085083007812, "loss": 0.5907, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.192791700363159, "rewards/margins": 0.6226320862770081, "rewards/rejected": -3.8154244422912598, "step": 12960 }, { "epoch": 0.85, "learning_rate": 3.411007227457047e-07, "logits/chosen": -1.9916019439697266, "logits/rejected": -1.8850574493408203, "logps/chosen": -510.70733642578125, "logps/rejected": -586.5621337890625, "loss": 0.5224, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -2.755558967590332, "rewards/margins": 0.21060097217559814, "rewards/rejected": -2.9661598205566406, "step": 12970 }, { "epoch": 0.85, "learning_rate": 3.382271533707043e-07, "logits/chosen": -2.1298325061798096, "logits/rejected": -1.8457624912261963, "logps/chosen": -761.4664306640625, "logps/rejected": -716.9132080078125, "loss": 0.5527, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.6292195320129395, "rewards/margins": 0.9699549674987793, "rewards/rejected": -3.5991744995117188, "step": 12980 }, { "epoch": 0.85, "learning_rate": 3.353648608618287e-07, "logits/chosen": -2.0403294563293457, "logits/rejected": -2.2515652179718018, "logps/chosen": -613.3743896484375, "logps/rejected": -729.22802734375, "loss": 0.6022, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.769686222076416, "rewards/margins": 0.8927669525146484, "rewards/rejected": -3.6624526977539062, "step": 12990 }, { "epoch": 0.85, "learning_rate": 3.3251386015019676e-07, "logits/chosen": -2.323190927505493, "logits/rejected": -1.8212988376617432, "logps/chosen": -632.8485717773438, "logps/rejected": -775.36181640625, "loss": 0.5054, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.0626654624938965, "rewards/margins": 1.7412147521972656, "rewards/rejected": -3.803880214691162, "step": 13000 }, { "epoch": 0.85, "learning_rate": 3.296741661080255e-07, "logits/chosen": -1.7240066528320312, "logits/rejected": -1.8471324443817139, "logps/chosen": -528.8418579101562, "logps/rejected": -653.22509765625, "loss": 0.6246, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.734323024749756, "rewards/margins": 0.8821514248847961, "rewards/rejected": -3.6164746284484863, "step": 13010 }, { "epoch": 0.85, "learning_rate": 3.2684579354854974e-07, "logits/chosen": -1.852184534072876, "logits/rejected": -1.9791618585586548, "logps/chosen": -607.6898193359375, "logps/rejected": -612.999267578125, "loss": 0.6873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.3641304969787598, "rewards/margins": 0.4132850766181946, "rewards/rejected": -3.7774155139923096, "step": 13020 }, { "epoch": 0.85, "learning_rate": 3.2402875722594653e-07, "logits/chosen": -1.6074268817901611, "logits/rejected": -1.5102227926254272, "logps/chosen": -469.12591552734375, "logps/rejected": -599.9771118164062, "loss": 0.3347, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.6753275394439697, "rewards/margins": 1.4595181941986084, "rewards/rejected": -4.13484525680542, "step": 13030 }, { "epoch": 0.85, "learning_rate": 3.212230718352566e-07, "logits/chosen": -2.4419732093811035, "logits/rejected": -2.057793617248535, "logps/chosen": -708.2984008789062, "logps/rejected": -678.909912109375, "loss": 0.52, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.55948543548584, "rewards/margins": 0.5278710722923279, "rewards/rejected": -3.0873570442199707, "step": 13040 }, { "epoch": 0.85, "learning_rate": 3.1842875201231025e-07, "logits/chosen": -1.3760806322097778, "logits/rejected": -1.891939401626587, "logps/chosen": -555.8878173828125, "logps/rejected": -505.7021484375, "loss": 0.6167, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.7588725090026855, "rewards/margins": 0.5370644330978394, "rewards/rejected": -3.2959365844726562, "step": 13050 }, { "epoch": 0.85, "learning_rate": 3.156458123336478e-07, "logits/chosen": -1.6771266460418701, "logits/rejected": -1.8519872426986694, "logps/chosen": -511.8056640625, "logps/rejected": -585.6915893554688, "loss": 0.6702, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.160909652709961, "rewards/margins": 0.9589214324951172, "rewards/rejected": -4.119831085205078, "step": 13060 }, { "epoch": 0.86, "learning_rate": 3.128742673164459e-07, "logits/chosen": -2.0616233348846436, "logits/rejected": -2.001276969909668, "logps/chosen": -525.9805908203125, "logps/rejected": -554.439453125, "loss": 0.6953, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.1127099990844727, "rewards/margins": 0.6922973990440369, "rewards/rejected": -2.805007219314575, "step": 13070 }, { "epoch": 0.86, "learning_rate": 3.101141314184414e-07, "logits/chosen": -1.6857885122299194, "logits/rejected": -2.239570140838623, "logps/chosen": -489.9275817871094, "logps/rejected": -652.6864013671875, "loss": 0.4121, "rewards/accuracies": 0.75, "rewards/chosen": -2.383946657180786, "rewards/margins": 1.0762474536895752, "rewards/rejected": -3.4601943492889404, "step": 13080 }, { "epoch": 0.86, "learning_rate": 3.0736541903785526e-07, "logits/chosen": -1.9731498956680298, "logits/rejected": -1.6244605779647827, "logps/chosen": -529.0377197265625, "logps/rejected": -638.1597900390625, "loss": 0.3359, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.173402786254883, "rewards/margins": 1.8639698028564453, "rewards/rejected": -4.037372589111328, "step": 13090 }, { "epoch": 0.86, "learning_rate": 3.0462814451331704e-07, "logits/chosen": -2.0660529136657715, "logits/rejected": -1.9338308572769165, "logps/chosen": -512.8846435546875, "logps/rejected": -600.3922119140625, "loss": 0.5674, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2265946865081787, "rewards/margins": 1.1876270771026611, "rewards/rejected": -3.4142215251922607, "step": 13100 }, { "epoch": 0.86, "learning_rate": 3.019023221237927e-07, "logits/chosen": -2.053157329559326, "logits/rejected": -1.6921491622924805, "logps/chosen": -498.1617126464844, "logps/rejected": -530.1224365234375, "loss": 0.4655, "rewards/accuracies": 0.75, "rewards/chosen": -2.7820048332214355, "rewards/margins": 0.839253306388855, "rewards/rejected": -3.621258497238159, "step": 13110 }, { "epoch": 0.86, "learning_rate": 2.991879660885058e-07, "logits/chosen": -2.083878517150879, "logits/rejected": -1.2553603649139404, "logps/chosen": -486.6934509277344, "logps/rejected": -607.5968017578125, "loss": 0.5171, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.330781936645508, "rewards/margins": 1.6225910186767578, "rewards/rejected": -3.9533724784851074, "step": 13120 }, { "epoch": 0.86, "learning_rate": 2.9648509056686786e-07, "logits/chosen": -2.3199565410614014, "logits/rejected": -1.7166868448257446, "logps/chosen": -556.6590576171875, "logps/rejected": -598.4984130859375, "loss": 0.3818, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.919614553451538, "rewards/margins": 1.4941420555114746, "rewards/rejected": -3.4137566089630127, "step": 13130 }, { "epoch": 0.86, "learning_rate": 2.937937096584012e-07, "logits/chosen": -1.9594379663467407, "logits/rejected": -1.6278324127197266, "logps/chosen": -526.9962158203125, "logps/rejected": -588.111572265625, "loss": 0.4816, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6737329959869385, "rewards/margins": 0.8540897369384766, "rewards/rejected": -3.527822494506836, "step": 13140 }, { "epoch": 0.86, "learning_rate": 2.9111383740266756e-07, "logits/chosen": -1.6279857158660889, "logits/rejected": -2.1303136348724365, "logps/chosen": -543.24853515625, "logps/rejected": -681.8314208984375, "loss": 0.5019, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.4309229850769043, "rewards/margins": 0.676580548286438, "rewards/rejected": -4.1075029373168945, "step": 13150 }, { "epoch": 0.86, "learning_rate": 2.8844548777919255e-07, "logits/chosen": -1.8237205743789673, "logits/rejected": -1.8526685237884521, "logps/chosen": -670.98486328125, "logps/rejected": -855.240234375, "loss": 0.5499, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.716068983078003, "rewards/margins": 1.226020097732544, "rewards/rejected": -4.942088603973389, "step": 13160 }, { "epoch": 0.86, "learning_rate": 2.8578867470739594e-07, "logits/chosen": -2.036724805831909, "logits/rejected": -1.7551301717758179, "logps/chosen": -541.3497314453125, "logps/rejected": -731.2151489257812, "loss": 0.4116, "rewards/accuracies": 0.75, "rewards/chosen": -2.593327283859253, "rewards/margins": 1.2935224771499634, "rewards/rejected": -3.8868496417999268, "step": 13170 }, { "epoch": 0.86, "learning_rate": 2.8314341204651484e-07, "logits/chosen": -1.8775577545166016, "logits/rejected": -1.483978033065796, "logps/chosen": -518.1921997070312, "logps/rejected": -620.164306640625, "loss": 0.6077, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.475600242614746, "rewards/margins": 1.0090327262878418, "rewards/rejected": -3.484632968902588, "step": 13180 }, { "epoch": 0.86, "learning_rate": 2.805097135955362e-07, "logits/chosen": -1.7203683853149414, "logits/rejected": -1.4739410877227783, "logps/chosen": -573.33984375, "logps/rejected": -617.6419677734375, "loss": 0.6494, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.027435302734375, "rewards/margins": 0.5865287780761719, "rewards/rejected": -3.613964080810547, "step": 13190 }, { "epoch": 0.86, "learning_rate": 2.778875930931213e-07, "logits/chosen": -2.086585283279419, "logits/rejected": -1.8391920328140259, "logps/chosen": -534.3094482421875, "logps/rejected": -591.4953002929688, "loss": 0.5956, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.117051601409912, "rewards/margins": 0.9744482040405273, "rewards/rejected": -4.0914998054504395, "step": 13200 }, { "epoch": 0.86, "learning_rate": 2.7527706421753426e-07, "logits/chosen": -1.999886155128479, "logits/rejected": -2.0072293281555176, "logps/chosen": -597.1724853515625, "logps/rejected": -748.3546142578125, "loss": 0.3268, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.8634326457977295, "rewards/margins": 1.959315299987793, "rewards/rejected": -3.8227481842041016, "step": 13210 }, { "epoch": 0.86, "learning_rate": 2.726781405865736e-07, "logits/chosen": -1.7645056247711182, "logits/rejected": -1.8943487405776978, "logps/chosen": -550.005859375, "logps/rejected": -743.2796630859375, "loss": 0.528, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.024041175842285, "rewards/margins": 0.9227795600891113, "rewards/rejected": -3.9468207359313965, "step": 13220 }, { "epoch": 0.87, "learning_rate": 2.7009083575749687e-07, "logits/chosen": -1.7528998851776123, "logits/rejected": -2.1692652702331543, "logps/chosen": -533.8668212890625, "logps/rejected": -633.972412109375, "loss": 0.4979, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3489327430725098, "rewards/margins": 1.1471573114395142, "rewards/rejected": -3.4960899353027344, "step": 13230 }, { "epoch": 0.87, "learning_rate": 2.6751516322695457e-07, "logits/chosen": -1.7719800472259521, "logits/rejected": -2.000718355178833, "logps/chosen": -583.4794921875, "logps/rejected": -676.126708984375, "loss": 0.4221, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.420154094696045, "rewards/margins": 1.2737191915512085, "rewards/rejected": -3.693873643875122, "step": 13240 }, { "epoch": 0.87, "learning_rate": 2.649511364309154e-07, "logits/chosen": -2.1271283626556396, "logits/rejected": -1.6879568099975586, "logps/chosen": -541.5220947265625, "logps/rejected": -575.933837890625, "loss": 0.7008, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.7217676639556885, "rewards/margins": 0.6736433506011963, "rewards/rejected": -3.3954110145568848, "step": 13250 }, { "epoch": 0.87, "learning_rate": 2.6239876874460003e-07, "logits/chosen": -1.8494091033935547, "logits/rejected": -1.6988645792007446, "logps/chosen": -529.8714599609375, "logps/rejected": -623.36962890625, "loss": 0.4867, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.4785268306732178, "rewards/margins": 1.0714387893676758, "rewards/rejected": -3.5499656200408936, "step": 13260 }, { "epoch": 0.87, "learning_rate": 2.5985807348240744e-07, "logits/chosen": -2.0470900535583496, "logits/rejected": -2.0908408164978027, "logps/chosen": -547.52783203125, "logps/rejected": -677.0643310546875, "loss": 0.3973, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.1159825325012207, "rewards/margins": 1.9986178874969482, "rewards/rejected": -4.11460018157959, "step": 13270 }, { "epoch": 0.87, "learning_rate": 2.5732906389785014e-07, "logits/chosen": -2.3125741481781006, "logits/rejected": -2.243870735168457, "logps/chosen": -578.3486938476562, "logps/rejected": -613.689697265625, "loss": 0.6074, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6407647132873535, "rewards/margins": 0.7890304923057556, "rewards/rejected": -3.429795026779175, "step": 13280 }, { "epoch": 0.87, "learning_rate": 2.5481175318347956e-07, "logits/chosen": -2.2822792530059814, "logits/rejected": -2.039731025695801, "logps/chosen": -590.3525390625, "logps/rejected": -729.5716552734375, "loss": 0.6833, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4964635372161865, "rewards/margins": 0.7121137976646423, "rewards/rejected": -3.2085776329040527, "step": 13290 }, { "epoch": 0.87, "learning_rate": 2.5230615447082246e-07, "logits/chosen": -2.2843728065490723, "logits/rejected": -1.8483355045318604, "logps/chosen": -563.0799560546875, "logps/rejected": -601.3505249023438, "loss": 0.4561, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.796326160430908, "rewards/margins": 0.8913065195083618, "rewards/rejected": -3.6876327991485596, "step": 13300 }, { "epoch": 0.87, "learning_rate": 2.49812280830308e-07, "logits/chosen": -1.7356340885162354, "logits/rejected": -1.704167366027832, "logps/chosen": -431.0927734375, "logps/rejected": -556.7681884765625, "loss": 0.5258, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.185537338256836, "rewards/margins": 1.2141865491867065, "rewards/rejected": -3.399723768234253, "step": 13310 }, { "epoch": 0.87, "learning_rate": 2.4733014527120457e-07, "logits/chosen": -2.1145758628845215, "logits/rejected": -1.5190739631652832, "logps/chosen": -648.5697021484375, "logps/rejected": -629.7784423828125, "loss": 0.6043, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.0062718391418457, "rewards/margins": 0.17153212428092957, "rewards/rejected": -3.1778042316436768, "step": 13320 }, { "epoch": 0.87, "learning_rate": 2.4485976074154565e-07, "logits/chosen": -2.2370784282684326, "logits/rejected": -1.8612892627716064, "logps/chosen": -573.6791381835938, "logps/rejected": -620.290283203125, "loss": 0.6516, "rewards/accuracies": 0.75, "rewards/chosen": -2.7251853942871094, "rewards/margins": 1.1538364887237549, "rewards/rejected": -3.8790218830108643, "step": 13330 }, { "epoch": 0.87, "learning_rate": 2.4240114012806763e-07, "logits/chosen": -1.9059484004974365, "logits/rejected": -1.699082374572754, "logps/chosen": -480.8922424316406, "logps/rejected": -609.4310302734375, "loss": 0.5809, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.237086534500122, "rewards/margins": 0.8183034658432007, "rewards/rejected": -3.055389881134033, "step": 13340 }, { "epoch": 0.87, "learning_rate": 2.399542962561399e-07, "logits/chosen": -2.176151990890503, "logits/rejected": -2.2230257987976074, "logps/chosen": -635.1368408203125, "logps/rejected": -658.0455322265625, "loss": 0.4534, "rewards/accuracies": 0.75, "rewards/chosen": -3.0653738975524902, "rewards/margins": 0.7946423292160034, "rewards/rejected": -3.8600165843963623, "step": 13350 }, { "epoch": 0.87, "learning_rate": 2.3751924188969876e-07, "logits/chosen": -1.8235515356063843, "logits/rejected": -1.7832295894622803, "logps/chosen": -537.9699096679688, "logps/rejected": -640.5518188476562, "loss": 0.5523, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.814718246459961, "rewards/margins": 0.8247028589248657, "rewards/rejected": -3.639420747756958, "step": 13360 }, { "epoch": 0.87, "learning_rate": 2.3509598973118024e-07, "logits/chosen": -1.9352995157241821, "logits/rejected": -2.125542163848877, "logps/chosen": -612.2140502929688, "logps/rejected": -807.5963134765625, "loss": 0.5532, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1127424240112305, "rewards/margins": 2.1476805210113525, "rewards/rejected": -4.260422706604004, "step": 13370 }, { "epoch": 0.88, "learning_rate": 2.326845524214555e-07, "logits/chosen": -2.003291368484497, "logits/rejected": -1.985430359840393, "logps/chosen": -632.3148193359375, "logps/rejected": -715.447021484375, "loss": 0.6007, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.773860216140747, "rewards/margins": 0.8511343002319336, "rewards/rejected": -3.6249947547912598, "step": 13380 }, { "epoch": 0.88, "learning_rate": 2.3028494253976158e-07, "logits/chosen": -1.909105896949768, "logits/rejected": -1.9562370777130127, "logps/chosen": -641.9075927734375, "logps/rejected": -664.9573974609375, "loss": 0.5771, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8007686138153076, "rewards/margins": 0.5579482913017273, "rewards/rejected": -3.3587164878845215, "step": 13390 }, { "epoch": 0.88, "learning_rate": 2.2789717260364026e-07, "logits/chosen": -2.392245054244995, "logits/rejected": -1.9540112018585205, "logps/chosen": -666.5771484375, "logps/rejected": -664.3335571289062, "loss": 0.5076, "rewards/accuracies": 0.75, "rewards/chosen": -2.7387795448303223, "rewards/margins": 0.9567523002624512, "rewards/rejected": -3.6955318450927734, "step": 13400 }, { "epoch": 0.88, "learning_rate": 2.255212550688682e-07, "logits/chosen": -1.743224859237671, "logits/rejected": -2.1734211444854736, "logps/chosen": -580.0703735351562, "logps/rejected": -669.6419677734375, "loss": 0.7062, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.4899613857269287, "rewards/margins": 0.4612503945827484, "rewards/rejected": -2.951211452484131, "step": 13410 }, { "epoch": 0.88, "learning_rate": 2.2315720232939598e-07, "logits/chosen": -2.154392719268799, "logits/rejected": -1.6075624227523804, "logps/chosen": -609.3424072265625, "logps/rejected": -720.8203735351562, "loss": 0.6591, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9730067253112793, "rewards/margins": 1.2910255193710327, "rewards/rejected": -4.264031887054443, "step": 13420 }, { "epoch": 0.88, "learning_rate": 2.2080502671727956e-07, "logits/chosen": -1.985039472579956, "logits/rejected": -2.019219398498535, "logps/chosen": -604.9932250976562, "logps/rejected": -675.3074951171875, "loss": 0.4031, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4360015392303467, "rewards/margins": 1.0569236278533936, "rewards/rejected": -3.492924928665161, "step": 13430 }, { "epoch": 0.88, "learning_rate": 2.1846474050262078e-07, "logits/chosen": -1.9477293491363525, "logits/rejected": -1.6500422954559326, "logps/chosen": -503.76171875, "logps/rejected": -555.336181640625, "loss": 0.52, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.2972493171691895, "rewards/margins": 1.2646000385284424, "rewards/rejected": -3.5618491172790527, "step": 13440 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.9945507049560547, "logits/rejected": -1.4509528875350952, "logps/chosen": -587.4645385742188, "logps/rejected": -687.7674560546875, "loss": 0.5055, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0522592067718506, "rewards/margins": 1.0424975156784058, "rewards/rejected": -4.094756603240967, "step": 13450 }, { "epoch": 0.88, "learning_rate": 2.1381988503590578e-07, "logits/chosen": -1.9841388463974, "logits/rejected": -2.04860258102417, "logps/chosen": -541.0191650390625, "logps/rejected": -620.4351196289062, "loss": 0.5912, "rewards/accuracies": 0.75, "rewards/chosen": -3.111358165740967, "rewards/margins": 0.8610742688179016, "rewards/rejected": -3.9724323749542236, "step": 13460 }, { "epoch": 0.88, "learning_rate": 2.11515340013691e-07, "logits/chosen": -1.5378882884979248, "logits/rejected": -1.428012490272522, "logps/chosen": -571.2432861328125, "logps/rejected": -693.9453125, "loss": 0.6854, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.346834659576416, "rewards/margins": 0.9279208183288574, "rewards/rejected": -4.274755001068115, "step": 13470 }, { "epoch": 0.88, "learning_rate": 2.092227328484897e-07, "logits/chosen": -2.31776762008667, "logits/rejected": -1.6877338886260986, "logps/chosen": -628.2243041992188, "logps/rejected": -663.4994506835938, "loss": 0.4897, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.9990774393081665, "rewards/margins": 1.315735101699829, "rewards/rejected": -3.314812421798706, "step": 13480 }, { "epoch": 0.88, "learning_rate": 2.0694207549966345e-07, "logits/chosen": -2.096724033355713, "logits/rejected": -2.0771172046661377, "logps/chosen": -475.6868591308594, "logps/rejected": -588.9679565429688, "loss": 0.5565, "rewards/accuracies": 0.75, "rewards/chosen": -2.0010933876037598, "rewards/margins": 1.167586088180542, "rewards/rejected": -3.1686794757843018, "step": 13490 }, { "epoch": 0.88, "learning_rate": 2.0467337986423864e-07, "logits/chosen": -1.917855978012085, "logits/rejected": -1.869166612625122, "logps/chosen": -600.2056274414062, "logps/rejected": -674.4396362304688, "loss": 0.4465, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6695377826690674, "rewards/margins": 1.2429989576339722, "rewards/rejected": -3.912536144256592, "step": 13500 }, { "epoch": 0.88, "learning_rate": 2.0241665777684272e-07, "logits/chosen": -2.0944838523864746, "logits/rejected": -1.9450299739837646, "logps/chosen": -550.61083984375, "logps/rejected": -558.5845947265625, "loss": 0.5995, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.613074541091919, "rewards/margins": 0.7126724720001221, "rewards/rejected": -3.32574725151062, "step": 13510 }, { "epoch": 0.88, "learning_rate": 2.0017192100964366e-07, "logits/chosen": -1.9588878154754639, "logits/rejected": -1.8809268474578857, "logps/chosen": -587.5408325195312, "logps/rejected": -636.9974365234375, "loss": 0.5443, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.8207666873931885, "rewards/margins": 1.385787010192871, "rewards/rejected": -4.206553936004639, "step": 13520 }, { "epoch": 0.89, "learning_rate": 1.9793918127228777e-07, "logits/chosen": -2.2451016902923584, "logits/rejected": -1.945010781288147, "logps/chosen": -692.6990356445312, "logps/rejected": -717.7359619140625, "loss": 0.3511, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.3657596111297607, "rewards/margins": 1.6406257152557373, "rewards/rejected": -4.00638484954834, "step": 13530 }, { "epoch": 0.89, "learning_rate": 1.9571845021184005e-07, "logits/chosen": -1.9744113683700562, "logits/rejected": -2.2573609352111816, "logps/chosen": -567.6195068359375, "logps/rejected": -639.2779541015625, "loss": 0.6477, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4648168087005615, "rewards/margins": 0.5581859350204468, "rewards/rejected": -3.0230026245117188, "step": 13540 }, { "epoch": 0.89, "learning_rate": 1.9350973941272027e-07, "logits/chosen": -2.211911678314209, "logits/rejected": -2.039405345916748, "logps/chosen": -504.38165283203125, "logps/rejected": -585.1155395507812, "loss": 0.6163, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1378955841064453, "rewards/margins": 0.6837860941886902, "rewards/rejected": -3.821681499481201, "step": 13550 }, { "epoch": 0.89, "learning_rate": 1.9131306039664676e-07, "logits/chosen": -1.8577501773834229, "logits/rejected": -1.7375942468643188, "logps/chosen": -654.9981689453125, "logps/rejected": -676.6926879882812, "loss": 0.6042, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.0645787715911865, "rewards/margins": 0.8455365896224976, "rewards/rejected": -3.9101154804229736, "step": 13560 }, { "epoch": 0.89, "learning_rate": 1.8912842462257358e-07, "logits/chosen": -2.2314064502716064, "logits/rejected": -1.6110010147094727, "logps/chosen": -546.2057495117188, "logps/rejected": -739.7926635742188, "loss": 0.357, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.6191498041152954, "rewards/margins": 1.8934110403060913, "rewards/rejected": -3.512561082839966, "step": 13570 }, { "epoch": 0.89, "learning_rate": 1.869558434866303e-07, "logits/chosen": -1.8159793615341187, "logits/rejected": -1.8199894428253174, "logps/chosen": -567.0482788085938, "logps/rejected": -632.1754760742188, "loss": 0.4181, "rewards/accuracies": 0.75, "rewards/chosen": -2.8042659759521484, "rewards/margins": 0.914924144744873, "rewards/rejected": -3.7191901206970215, "step": 13580 }, { "epoch": 0.89, "learning_rate": 1.847953283220652e-07, "logits/chosen": -1.9711805582046509, "logits/rejected": -1.8406652212142944, "logps/chosen": -500.529541015625, "logps/rejected": -594.8906860351562, "loss": 0.4779, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4555704593658447, "rewards/margins": 0.9383051991462708, "rewards/rejected": -3.3938755989074707, "step": 13590 }, { "epoch": 0.89, "learning_rate": 1.8264689039918265e-07, "logits/chosen": -2.391815662384033, "logits/rejected": -2.154350757598877, "logps/chosen": -658.3121337890625, "logps/rejected": -721.5520629882812, "loss": 0.563, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.005051612854004, "rewards/margins": 1.2892992496490479, "rewards/rejected": -3.294351100921631, "step": 13600 }, { "epoch": 0.89, "learning_rate": 1.8051054092528857e-07, "logits/chosen": -2.197561025619507, "logits/rejected": -2.2787792682647705, "logps/chosen": -591.5797729492188, "logps/rejected": -601.7685546875, "loss": 0.6596, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -2.650155544281006, "rewards/margins": -0.006956362631171942, "rewards/rejected": -2.6431994438171387, "step": 13610 }, { "epoch": 0.89, "learning_rate": 1.783862910446271e-07, "logits/chosen": -1.7571876049041748, "logits/rejected": -1.4969539642333984, "logps/chosen": -516.4697265625, "logps/rejected": -669.2288208007812, "loss": 0.4461, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4835076332092285, "rewards/margins": 1.576086401939392, "rewards/rejected": -4.05959415435791, "step": 13620 }, { "epoch": 0.89, "learning_rate": 1.762741518383271e-07, "logits/chosen": -1.9321539402008057, "logits/rejected": -1.8940976858139038, "logps/chosen": -470.38592529296875, "logps/rejected": -621.544921875, "loss": 0.4946, "rewards/accuracies": 0.75, "rewards/chosen": -2.25473690032959, "rewards/margins": 0.9184789657592773, "rewards/rejected": -3.173215389251709, "step": 13630 }, { "epoch": 0.89, "learning_rate": 1.7417413432434082e-07, "logits/chosen": -2.0840368270874023, "logits/rejected": -1.9253311157226562, "logps/chosen": -502.5130920410156, "logps/rejected": -672.5440673828125, "loss": 0.4869, "rewards/accuracies": 0.75, "rewards/chosen": -2.920154094696045, "rewards/margins": 1.1422127485275269, "rewards/rejected": -4.062366962432861, "step": 13640 }, { "epoch": 0.89, "learning_rate": 1.7208624945738855e-07, "logits/chosen": -2.071925640106201, "logits/rejected": -2.2120015621185303, "logps/chosen": -634.158447265625, "logps/rejected": -607.0089721679688, "loss": 0.7913, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.838249921798706, "rewards/margins": -0.061018358916044235, "rewards/rejected": -2.7772316932678223, "step": 13650 }, { "epoch": 0.89, "learning_rate": 1.7001050812889995e-07, "logits/chosen": -2.302963972091675, "logits/rejected": -1.9083874225616455, "logps/chosen": -608.5677490234375, "logps/rejected": -685.5296020507812, "loss": 0.5522, "rewards/accuracies": 0.75, "rewards/chosen": -3.0623526573181152, "rewards/margins": 1.2272964715957642, "rewards/rejected": -4.28964900970459, "step": 13660 }, { "epoch": 0.89, "learning_rate": 1.679469211669596e-07, "logits/chosen": -2.027047634124756, "logits/rejected": -2.0308895111083984, "logps/chosen": -660.1836547851562, "logps/rejected": -792.7001953125, "loss": 0.5356, "rewards/accuracies": 0.75, "rewards/chosen": -2.6164486408233643, "rewards/margins": 0.7289638519287109, "rewards/rejected": -3.345412492752075, "step": 13670 }, { "epoch": 0.9, "learning_rate": 1.6589549933624715e-07, "logits/chosen": -2.072457790374756, "logits/rejected": -1.5405828952789307, "logps/chosen": -585.9480590820312, "logps/rejected": -636.3859252929688, "loss": 0.4296, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.647041082382202, "rewards/margins": 1.593322992324829, "rewards/rejected": -4.240364074707031, "step": 13680 }, { "epoch": 0.9, "learning_rate": 1.638562533379845e-07, "logits/chosen": -1.9048030376434326, "logits/rejected": -1.890991449356079, "logps/chosen": -559.7710571289062, "logps/rejected": -668.1385498046875, "loss": 0.5207, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.792799711227417, "rewards/margins": 1.087615728378296, "rewards/rejected": -3.880415439605713, "step": 13690 }, { "epoch": 0.9, "learning_rate": 1.6182919380987676e-07, "logits/chosen": -2.223335027694702, "logits/rejected": -1.4407284259796143, "logps/chosen": -591.7991943359375, "logps/rejected": -634.6351318359375, "loss": 0.3976, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.7539069652557373, "rewards/margins": 1.480650782585144, "rewards/rejected": -4.234557628631592, "step": 13700 }, { "epoch": 0.9, "learning_rate": 1.598143313260603e-07, "logits/chosen": -2.0772881507873535, "logits/rejected": -1.9280656576156616, "logps/chosen": -591.2493896484375, "logps/rejected": -637.0396728515625, "loss": 0.6488, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5404486656188965, "rewards/margins": 0.6507530212402344, "rewards/rejected": -3.1912014484405518, "step": 13710 }, { "epoch": 0.9, "learning_rate": 1.5781167639704415e-07, "logits/chosen": -1.6263024806976318, "logits/rejected": -1.94978928565979, "logps/chosen": -597.3123779296875, "logps/rejected": -677.3203125, "loss": 0.5913, "rewards/accuracies": 0.75, "rewards/chosen": -3.080565929412842, "rewards/margins": 0.8565647006034851, "rewards/rejected": -3.9371306896209717, "step": 13720 }, { "epoch": 0.9, "learning_rate": 1.5582123946965787e-07, "logits/chosen": -2.0462169647216797, "logits/rejected": -2.0411927700042725, "logps/chosen": -604.1297607421875, "logps/rejected": -650.912109375, "loss": 0.5571, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8162455558776855, "rewards/margins": 0.8516273498535156, "rewards/rejected": -3.6678733825683594, "step": 13730 }, { "epoch": 0.9, "learning_rate": 1.5384303092699504e-07, "logits/chosen": -1.9073169231414795, "logits/rejected": -1.6117340326309204, "logps/chosen": -558.4398193359375, "logps/rejected": -686.1032104492188, "loss": 0.436, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.904715061187744, "rewards/margins": 1.413425087928772, "rewards/rejected": -4.318139553070068, "step": 13740 }, { "epoch": 0.9, "learning_rate": 1.518770610883613e-07, "logits/chosen": -1.9963598251342773, "logits/rejected": -1.8562904596328735, "logps/chosen": -521.17333984375, "logps/rejected": -635.5724487304688, "loss": 0.5527, "rewards/accuracies": 0.75, "rewards/chosen": -2.555896520614624, "rewards/margins": 1.0598012208938599, "rewards/rejected": -3.6156983375549316, "step": 13750 }, { "epoch": 0.9, "learning_rate": 1.4992334020921735e-07, "logits/chosen": -1.9674097299575806, "logits/rejected": -1.5446860790252686, "logps/chosen": -520.31298828125, "logps/rejected": -600.7852172851562, "loss": 0.4879, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.430971622467041, "rewards/margins": 1.3507161140441895, "rewards/rejected": -3.7816879749298096, "step": 13760 }, { "epoch": 0.9, "learning_rate": 1.4798187848112905e-07, "logits/chosen": -2.195971965789795, "logits/rejected": -2.413776397705078, "logps/chosen": -617.2885131835938, "logps/rejected": -696.2974853515625, "loss": 0.6088, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3813633918762207, "rewards/margins": 1.0204932689666748, "rewards/rejected": -3.4018566608428955, "step": 13770 }, { "epoch": 0.9, "learning_rate": 1.460526860317113e-07, "logits/chosen": -2.0938820838928223, "logits/rejected": -1.679978370666504, "logps/chosen": -563.26806640625, "logps/rejected": -562.6168212890625, "loss": 0.7427, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.4968013763427734, "rewards/margins": 0.9434688687324524, "rewards/rejected": -3.440270185470581, "step": 13780 }, { "epoch": 0.9, "learning_rate": 1.441357729245771e-07, "logits/chosen": -1.9362106323242188, "logits/rejected": -1.9356101751327515, "logps/chosen": -622.1790771484375, "logps/rejected": -744.4317626953125, "loss": 0.4152, "rewards/accuracies": 0.75, "rewards/chosen": -2.6212239265441895, "rewards/margins": 0.9055881500244141, "rewards/rejected": -3.5268120765686035, "step": 13790 }, { "epoch": 0.9, "learning_rate": 1.4223114915928482e-07, "logits/chosen": -1.968400239944458, "logits/rejected": -2.054955005645752, "logps/chosen": -483.0773010253906, "logps/rejected": -671.42919921875, "loss": 0.4554, "rewards/accuracies": 0.75, "rewards/chosen": -3.0358223915100098, "rewards/margins": 1.2512052059173584, "rewards/rejected": -4.287027359008789, "step": 13800 }, { "epoch": 0.9, "learning_rate": 1.403388246712842e-07, "logits/chosen": -2.0551671981811523, "logits/rejected": -2.219425678253174, "logps/chosen": -569.8794555664062, "logps/rejected": -619.9769287109375, "loss": 0.4425, "rewards/accuracies": 0.75, "rewards/chosen": -2.722309112548828, "rewards/margins": 0.9440568685531616, "rewards/rejected": -3.6663658618927, "step": 13810 }, { "epoch": 0.9, "learning_rate": 1.3845880933186757e-07, "logits/chosen": -1.735666036605835, "logits/rejected": -1.5795748233795166, "logps/chosen": -581.3074340820312, "logps/rejected": -585.5355224609375, "loss": 0.5682, "rewards/accuracies": 0.75, "rewards/chosen": -3.425495147705078, "rewards/margins": 0.8304556012153625, "rewards/rejected": -4.255950450897217, "step": 13820 }, { "epoch": 0.9, "learning_rate": 1.3659111294811457e-07, "logits/chosen": -2.090566396713257, "logits/rejected": -1.9472938776016235, "logps/chosen": -597.61865234375, "logps/rejected": -634.379638671875, "loss": 0.4947, "rewards/accuracies": 0.75, "rewards/chosen": -2.5898215770721436, "rewards/margins": 0.7958720922470093, "rewards/rejected": -3.3856937885284424, "step": 13830 }, { "epoch": 0.91, "learning_rate": 1.347357452628459e-07, "logits/chosen": -1.9464733600616455, "logits/rejected": -2.106271743774414, "logps/chosen": -465.6495056152344, "logps/rejected": -636.1798095703125, "loss": 0.4478, "rewards/accuracies": 0.75, "rewards/chosen": -2.5175442695617676, "rewards/margins": 1.1137826442718506, "rewards/rejected": -3.6313271522521973, "step": 13840 }, { "epoch": 0.91, "learning_rate": 1.3289271595456732e-07, "logits/chosen": -1.9521602392196655, "logits/rejected": -2.1690216064453125, "logps/chosen": -469.918212890625, "logps/rejected": -746.4378051757812, "loss": 0.4991, "rewards/accuracies": 0.75, "rewards/chosen": -2.1301827430725098, "rewards/margins": 1.5656712055206299, "rewards/rejected": -3.6958537101745605, "step": 13850 }, { "epoch": 0.91, "learning_rate": 1.310620346374228e-07, "logits/chosen": -1.9243173599243164, "logits/rejected": -1.9282804727554321, "logps/chosen": -574.6932373046875, "logps/rejected": -654.2211303710938, "loss": 0.4016, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.658046245574951, "rewards/margins": 1.051507592201233, "rewards/rejected": -3.7095534801483154, "step": 13860 }, { "epoch": 0.91, "learning_rate": 1.2924371086114274e-07, "logits/chosen": -2.149164915084839, "logits/rejected": -1.7731244564056396, "logps/chosen": -556.2320556640625, "logps/rejected": -671.5202026367188, "loss": 0.5376, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.921588897705078, "rewards/margins": 1.4650413990020752, "rewards/rejected": -4.386630058288574, "step": 13870 }, { "epoch": 0.91, "learning_rate": 1.274377541109953e-07, "logits/chosen": -1.784868597984314, "logits/rejected": -1.7221359014511108, "logps/chosen": -615.0252075195312, "logps/rejected": -646.4591674804688, "loss": 0.4736, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1848056316375732, "rewards/margins": 1.1370110511779785, "rewards/rejected": -4.321816921234131, "step": 13880 }, { "epoch": 0.91, "learning_rate": 1.2564417380773435e-07, "logits/chosen": -2.166748046875, "logits/rejected": -2.1211986541748047, "logps/chosen": -525.5357666015625, "logps/rejected": -550.85400390625, "loss": 0.7374, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.6531982421875, "rewards/margins": 0.2655898928642273, "rewards/rejected": -2.918788194656372, "step": 13890 }, { "epoch": 0.91, "learning_rate": 1.2386297930755436e-07, "logits/chosen": -2.168666362762451, "logits/rejected": -1.830987572669983, "logps/chosen": -577.5469970703125, "logps/rejected": -601.9207153320312, "loss": 0.5228, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.034702777862549, "rewards/margins": 0.8586205244064331, "rewards/rejected": -3.8933234214782715, "step": 13900 }, { "epoch": 0.91, "learning_rate": 1.220941799020378e-07, "logits/chosen": -2.2160067558288574, "logits/rejected": -2.0485739707946777, "logps/chosen": -487.3202209472656, "logps/rejected": -670.6369018554688, "loss": 0.531, "rewards/accuracies": 0.75, "rewards/chosen": -2.3113012313842773, "rewards/margins": 1.3632745742797852, "rewards/rejected": -3.6745758056640625, "step": 13910 }, { "epoch": 0.91, "learning_rate": 1.2033778481810975e-07, "logits/chosen": -2.11628794670105, "logits/rejected": -2.22096848487854, "logps/chosen": -599.7377319335938, "logps/rejected": -669.64013671875, "loss": 0.5778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5265109539031982, "rewards/margins": 1.0575026273727417, "rewards/rejected": -3.5840137004852295, "step": 13920 }, { "epoch": 0.91, "learning_rate": 1.1859380321798591e-07, "logits/chosen": -2.1361804008483887, "logits/rejected": -1.877889633178711, "logps/chosen": -502.3155212402344, "logps/rejected": -577.39990234375, "loss": 0.479, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.577942132949829, "rewards/margins": 0.9486045837402344, "rewards/rejected": -3.5265464782714844, "step": 13930 }, { "epoch": 0.91, "learning_rate": 1.1686224419912989e-07, "logits/chosen": -2.0007095336914062, "logits/rejected": -1.7303388118743896, "logps/chosen": -581.5795288085938, "logps/rejected": -736.5750732421875, "loss": 0.505, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.580915689468384, "rewards/margins": 1.4071890115737915, "rewards/rejected": -3.9881045818328857, "step": 13940 }, { "epoch": 0.91, "learning_rate": 1.1514311679420104e-07, "logits/chosen": -2.116205930709839, "logits/rejected": -2.267324447631836, "logps/chosen": -573.3087158203125, "logps/rejected": -651.9915771484375, "loss": 0.526, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6597065925598145, "rewards/margins": 0.9490043520927429, "rewards/rejected": -3.6087112426757812, "step": 13950 }, { "epoch": 0.91, "learning_rate": 1.1343642997101029e-07, "logits/chosen": -2.0283255577087402, "logits/rejected": -1.945887804031372, "logps/chosen": -508.79248046875, "logps/rejected": -639.9680786132812, "loss": 0.687, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1274921894073486, "rewards/margins": 1.3700916767120361, "rewards/rejected": -3.4975838661193848, "step": 13960 }, { "epoch": 0.91, "learning_rate": 1.1174219263247188e-07, "logits/chosen": -1.7178478240966797, "logits/rejected": -1.9336674213409424, "logps/chosen": -558.7196655273438, "logps/rejected": -724.6270141601562, "loss": 0.4229, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -3.0520660877227783, "rewards/margins": 1.747532844543457, "rewards/rejected": -4.7995991706848145, "step": 13970 }, { "epoch": 0.91, "learning_rate": 1.1006041361655839e-07, "logits/chosen": -1.9918807744979858, "logits/rejected": -1.704734206199646, "logps/chosen": -526.9503784179688, "logps/rejected": -597.0250244140625, "loss": 0.4088, "rewards/accuracies": 0.75, "rewards/chosen": -2.31221342086792, "rewards/margins": 1.1227428913116455, "rewards/rejected": -3.4349560737609863, "step": 13980 }, { "epoch": 0.92, "learning_rate": 1.0839110169625189e-07, "logits/chosen": -1.6949018239974976, "logits/rejected": -2.197110414505005, "logps/chosen": -538.4711303710938, "logps/rejected": -767.4324951171875, "loss": 0.585, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4817819595336914, "rewards/margins": 1.339092493057251, "rewards/rejected": -3.8208746910095215, "step": 13990 }, { "epoch": 0.92, "learning_rate": 1.06734265579502e-07, "logits/chosen": -1.6315670013427734, "logits/rejected": -1.8602148294448853, "logps/chosen": -520.1702880859375, "logps/rejected": -650.0765380859375, "loss": 0.4227, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.6304497718811035, "rewards/margins": 1.2677826881408691, "rewards/rejected": -3.8982322216033936, "step": 14000 }, { "epoch": 0.92, "learning_rate": 1.050899139091771e-07, "logits/chosen": -2.0828208923339844, "logits/rejected": -1.7871806621551514, "logps/chosen": -535.5015258789062, "logps/rejected": -591.5552978515625, "loss": 0.5932, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.899904727935791, "rewards/margins": 0.5952678918838501, "rewards/rejected": -3.4951725006103516, "step": 14010 }, { "epoch": 0.92, "learning_rate": 1.0345805526302072e-07, "logits/chosen": -1.8452575206756592, "logits/rejected": -1.7843999862670898, "logps/chosen": -602.8890380859375, "logps/rejected": -660.4598388671875, "loss": 0.7046, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1601126194000244, "rewards/margins": 0.25720563530921936, "rewards/rejected": -3.4173178672790527, "step": 14020 }, { "epoch": 0.92, "learning_rate": 1.0183869815360764e-07, "logits/chosen": -1.950768232345581, "logits/rejected": -1.9863277673721313, "logps/chosen": -510.62371826171875, "logps/rejected": -669.0475463867188, "loss": 0.4555, "rewards/accuracies": 0.75, "rewards/chosen": -2.1132547855377197, "rewards/margins": 1.230995774269104, "rewards/rejected": -3.344250440597534, "step": 14030 }, { "epoch": 0.92, "learning_rate": 1.0023185102829763e-07, "logits/chosen": -2.395341396331787, "logits/rejected": -1.7982008457183838, "logps/chosen": -520.4229125976562, "logps/rejected": -565.4215087890625, "loss": 0.5862, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.729526996612549, "rewards/margins": 1.086969256401062, "rewards/rejected": -3.8164963722229004, "step": 14040 }, { "epoch": 0.92, "learning_rate": 9.863752226919182e-08, "logits/chosen": -2.1661365032196045, "logits/rejected": -1.8107774257659912, "logps/chosen": -597.0890502929688, "logps/rejected": -642.457275390625, "loss": 0.4836, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.7160167694091797, "rewards/margins": 1.1926524639129639, "rewards/rejected": -3.9086689949035645, "step": 14050 }, { "epoch": 0.92, "learning_rate": 9.705572019309107e-08, "logits/chosen": -2.0592494010925293, "logits/rejected": -1.9266738891601562, "logps/chosen": -540.168701171875, "logps/rejected": -708.5350341796875, "loss": 0.5257, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9037212133407593, "rewards/margins": 1.595682144165039, "rewards/rejected": -3.499403476715088, "step": 14060 }, { "epoch": 0.92, "learning_rate": 9.548645305144849e-08, "logits/chosen": -1.9899635314941406, "logits/rejected": -1.475678563117981, "logps/chosen": -504.0489807128906, "logps/rejected": -611.5504150390625, "loss": 0.6257, "rewards/accuracies": 0.75, "rewards/chosen": -3.3494484424591064, "rewards/margins": 0.8685885667800903, "rewards/rejected": -4.218036651611328, "step": 14070 }, { "epoch": 0.92, "learning_rate": 9.392972903033149e-08, "logits/chosen": -2.318446636199951, "logits/rejected": -2.0666234493255615, "logps/chosen": -551.0264892578125, "logps/rejected": -684.6751708984375, "loss": 0.4677, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.280172824859619, "rewards/margins": 1.5428760051727295, "rewards/rejected": -3.8230483531951904, "step": 14080 }, { "epoch": 0.92, "learning_rate": 9.238555625037449e-08, "logits/chosen": -2.116685390472412, "logits/rejected": -1.8756097555160522, "logps/chosen": -518.8292236328125, "logps/rejected": -609.4307861328125, "loss": 0.6441, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.84653639793396, "rewards/margins": 0.8913130760192871, "rewards/rejected": -3.737849473953247, "step": 14090 }, { "epoch": 0.92, "learning_rate": 9.085394276673903e-08, "logits/chosen": -2.1735680103302, "logits/rejected": -2.0650429725646973, "logps/chosen": -649.1590576171875, "logps/rejected": -814.1696166992188, "loss": 0.5992, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.727769374847412, "rewards/margins": 1.1337007284164429, "rewards/rejected": -3.8614699840545654, "step": 14100 }, { "epoch": 0.92, "learning_rate": 8.933489656907157e-08, "logits/chosen": -2.0548338890075684, "logits/rejected": -2.1526238918304443, "logps/chosen": -484.33282470703125, "logps/rejected": -654.7472534179688, "loss": 0.3397, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.680506467819214, "rewards/margins": 1.4911242723464966, "rewards/rejected": -4.171631336212158, "step": 14110 }, { "epoch": 0.92, "learning_rate": 8.782842558146127e-08, "logits/chosen": -2.1364803314208984, "logits/rejected": -1.8520491123199463, "logps/chosen": -480.78509521484375, "logps/rejected": -608.3148803710938, "loss": 0.5262, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6304593086242676, "rewards/margins": 1.3878322839736938, "rewards/rejected": -4.01829195022583, "step": 14120 }, { "epoch": 0.92, "learning_rate": 8.633453766239836e-08, "logits/chosen": -1.675296425819397, "logits/rejected": -2.0876195430755615, "logps/chosen": -495.19342041015625, "logps/rejected": -622.8082885742188, "loss": 0.5607, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.1442830562591553, "rewards/margins": 0.6814892292022705, "rewards/rejected": -3.825772523880005, "step": 14130 }, { "epoch": 0.93, "learning_rate": 8.485324060473448e-08, "logits/chosen": -2.0316433906555176, "logits/rejected": -1.7985146045684814, "logps/chosen": -577.4540405273438, "logps/rejected": -663.97119140625, "loss": 0.5518, "rewards/accuracies": 0.75, "rewards/chosen": -3.1149959564208984, "rewards/margins": 0.9152809381484985, "rewards/rejected": -4.030276775360107, "step": 14140 }, { "epoch": 0.93, "learning_rate": 8.338454213564052e-08, "logits/chosen": -2.191765546798706, "logits/rejected": -2.410991907119751, "logps/chosen": -818.0296630859375, "logps/rejected": -747.2487182617188, "loss": 0.6268, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.624410390853882, "rewards/margins": 0.8381460905075073, "rewards/rejected": -3.4625561237335205, "step": 14150 }, { "epoch": 0.93, "learning_rate": 8.192844991656679e-08, "logits/chosen": -1.954979658126831, "logits/rejected": -1.678969383239746, "logps/chosen": -613.8314208984375, "logps/rejected": -630.6569213867188, "loss": 0.6434, "rewards/accuracies": 0.75, "rewards/chosen": -2.841416835784912, "rewards/margins": 1.1300991773605347, "rewards/rejected": -3.9715161323547363, "step": 14160 }, { "epoch": 0.93, "learning_rate": 8.048497154320434e-08, "logits/chosen": -2.226532459259033, "logits/rejected": -1.5135737657546997, "logps/chosen": -638.7493896484375, "logps/rejected": -624.1115112304688, "loss": 0.5291, "rewards/accuracies": 0.75, "rewards/chosen": -2.5119431018829346, "rewards/margins": 0.841154932975769, "rewards/rejected": -3.353097915649414, "step": 14170 }, { "epoch": 0.93, "learning_rate": 7.905411454544265e-08, "logits/chosen": -1.9530729055404663, "logits/rejected": -2.051448106765747, "logps/chosen": -494.66705322265625, "logps/rejected": -704.9006958007812, "loss": 0.5769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.5946383476257324, "rewards/margins": 1.4650105237960815, "rewards/rejected": -4.0596489906311035, "step": 14180 }, { "epoch": 0.93, "learning_rate": 7.763588638733332e-08, "logits/chosen": -2.0100789070129395, "logits/rejected": -2.243278741836548, "logps/chosen": -556.4869384765625, "logps/rejected": -712.3450927734375, "loss": 0.4168, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4392809867858887, "rewards/margins": 2.094257354736328, "rewards/rejected": -4.533538341522217, "step": 14190 }, { "epoch": 0.93, "learning_rate": 7.623029446704899e-08, "logits/chosen": -2.285468578338623, "logits/rejected": -1.9770171642303467, "logps/chosen": -600.0869140625, "logps/rejected": -674.52294921875, "loss": 0.3591, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4405174255371094, "rewards/margins": 1.3958009481430054, "rewards/rejected": -3.8363184928894043, "step": 14200 }, { "epoch": 0.93, "learning_rate": 7.483734611684557e-08, "logits/chosen": -2.212456226348877, "logits/rejected": -1.953904151916504, "logps/chosen": -514.7883911132812, "logps/rejected": -627.7686157226562, "loss": 0.4992, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.890023946762085, "rewards/margins": 1.129746675491333, "rewards/rejected": -4.019770622253418, "step": 14210 }, { "epoch": 0.93, "learning_rate": 7.345704860302366e-08, "logits/chosen": -1.8617370128631592, "logits/rejected": -1.75583016872406, "logps/chosen": -460.3050231933594, "logps/rejected": -571.10302734375, "loss": 0.5618, "rewards/accuracies": 0.75, "rewards/chosen": -2.465756893157959, "rewards/margins": 1.1497135162353516, "rewards/rejected": -3.6154708862304688, "step": 14220 }, { "epoch": 0.93, "learning_rate": 7.208940912589224e-08, "logits/chosen": -2.284726619720459, "logits/rejected": -1.6230722665786743, "logps/chosen": -520.0506591796875, "logps/rejected": -572.2464599609375, "loss": 0.5166, "rewards/accuracies": 0.75, "rewards/chosen": -2.474360704421997, "rewards/margins": 0.902630627155304, "rewards/rejected": -3.376990795135498, "step": 14230 }, { "epoch": 0.93, "learning_rate": 7.073443481972753e-08, "logits/chosen": -1.8819868564605713, "logits/rejected": -2.2348008155822754, "logps/chosen": -547.7752685546875, "logps/rejected": -621.5697021484375, "loss": 0.5359, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.5357489585876465, "rewards/margins": 0.810367226600647, "rewards/rejected": -3.346116304397583, "step": 14240 }, { "epoch": 0.93, "learning_rate": 6.939213275274027e-08, "logits/chosen": -1.5495518445968628, "logits/rejected": -1.8627188205718994, "logps/chosen": -553.0704345703125, "logps/rejected": -624.6507568359375, "loss": 0.515, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8459842205047607, "rewards/margins": 0.6540545225143433, "rewards/rejected": -3.5000386238098145, "step": 14250 }, { "epoch": 0.93, "learning_rate": 6.806250992703461e-08, "logits/chosen": -2.2263686656951904, "logits/rejected": -1.7921960353851318, "logps/chosen": -630.40234375, "logps/rejected": -653.8508911132812, "loss": 0.5446, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4244742393493652, "rewards/margins": 1.3381140232086182, "rewards/rejected": -3.7625880241394043, "step": 14260 }, { "epoch": 0.93, "learning_rate": 6.674557327857572e-08, "logits/chosen": -2.0738942623138428, "logits/rejected": -1.8814947605133057, "logps/chosen": -545.239501953125, "logps/rejected": -628.984619140625, "loss": 0.5258, "rewards/accuracies": 0.75, "rewards/chosen": -3.1627590656280518, "rewards/margins": 0.5774599313735962, "rewards/rejected": -3.7402186393737793, "step": 14270 }, { "epoch": 0.93, "learning_rate": 6.544132967714917e-08, "logits/chosen": -1.8430073261260986, "logits/rejected": -1.827848196029663, "logps/chosen": -531.9650268554688, "logps/rejected": -599.7696533203125, "loss": 0.7531, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.364248275756836, "rewards/margins": 0.6611557602882385, "rewards/rejected": -4.0254034996032715, "step": 14280 }, { "epoch": 0.93, "learning_rate": 6.414978592632932e-08, "logits/chosen": -1.9486316442489624, "logits/rejected": -1.8632211685180664, "logps/chosen": -615.6444702148438, "logps/rejected": -666.4421997070312, "loss": 0.5636, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.3053977489471436, "rewards/margins": 1.1161303520202637, "rewards/rejected": -3.4215283393859863, "step": 14290 }, { "epoch": 0.94, "learning_rate": 6.287094876344046e-08, "logits/chosen": -1.9855334758758545, "logits/rejected": -1.8690643310546875, "logps/chosen": -536.8694458007812, "logps/rejected": -622.2525634765625, "loss": 0.5167, "rewards/accuracies": 0.75, "rewards/chosen": -2.398989200592041, "rewards/margins": 1.2469675540924072, "rewards/rejected": -3.645956516265869, "step": 14300 }, { "epoch": 0.94, "learning_rate": 6.160482485952413e-08, "logits/chosen": -1.8896257877349854, "logits/rejected": -1.643678069114685, "logps/chosen": -560.6286010742188, "logps/rejected": -716.4464111328125, "loss": 0.6004, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.47556734085083, "rewards/margins": 1.5186498165130615, "rewards/rejected": -3.9942169189453125, "step": 14310 }, { "epoch": 0.94, "learning_rate": 6.035142081930234e-08, "logits/chosen": -2.1216776371002197, "logits/rejected": -1.4286061525344849, "logps/chosen": -650.0715942382812, "logps/rejected": -665.3375244140625, "loss": 0.4731, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.341348648071289, "rewards/margins": 1.6465059518814087, "rewards/rejected": -3.9878551959991455, "step": 14320 }, { "epoch": 0.94, "learning_rate": 5.911074318114496e-08, "logits/chosen": -1.494458794593811, "logits/rejected": -1.7939786911010742, "logps/chosen": -537.7138671875, "logps/rejected": -615.3954467773438, "loss": 0.6188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2225029468536377, "rewards/margins": 0.672461748123169, "rewards/rejected": -2.8949646949768066, "step": 14330 }, { "epoch": 0.94, "learning_rate": 5.788279841703381e-08, "logits/chosen": -1.9612480401992798, "logits/rejected": -2.061548948287964, "logps/chosen": -582.6454467773438, "logps/rejected": -694.6004638671875, "loss": 0.6293, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.083461284637451, "rewards/margins": 0.7667292356491089, "rewards/rejected": -3.8501904010772705, "step": 14340 }, { "epoch": 0.94, "learning_rate": 5.66675929325311e-08, "logits/chosen": -2.2754197120666504, "logits/rejected": -2.2194416522979736, "logps/chosen": -477.0834045410156, "logps/rejected": -582.92333984375, "loss": 0.467, "rewards/accuracies": 0.75, "rewards/chosen": -2.0185394287109375, "rewards/margins": 1.1576719284057617, "rewards/rejected": -3.17621111869812, "step": 14350 }, { "epoch": 0.94, "learning_rate": 5.546513306674301e-08, "logits/chosen": -2.2503647804260254, "logits/rejected": -1.8498117923736572, "logps/chosen": -567.9696044921875, "logps/rejected": -627.1837158203125, "loss": 0.6846, "rewards/accuracies": 0.75, "rewards/chosen": -3.134516954421997, "rewards/margins": 0.9543534517288208, "rewards/rejected": -4.088870048522949, "step": 14360 }, { "epoch": 0.94, "learning_rate": 5.4275425092290004e-08, "logits/chosen": -2.0579543113708496, "logits/rejected": -1.4061779975891113, "logps/chosen": -577.0407104492188, "logps/rejected": -603.1378173828125, "loss": 0.4578, "rewards/accuracies": 0.75, "rewards/chosen": -2.449418783187866, "rewards/margins": 1.5462074279785156, "rewards/rejected": -3.995626449584961, "step": 14370 }, { "epoch": 0.94, "learning_rate": 5.309847521527078e-08, "logits/chosen": -1.9693748950958252, "logits/rejected": -1.7713311910629272, "logps/chosen": -477.5189514160156, "logps/rejected": -606.2625732421875, "loss": 0.5151, "rewards/accuracies": 0.75, "rewards/chosen": -2.545546770095825, "rewards/margins": 1.240389108657837, "rewards/rejected": -3.785935878753662, "step": 14380 }, { "epoch": 0.94, "learning_rate": 5.1934289575233385e-08, "logits/chosen": -2.1927027702331543, "logits/rejected": -2.040621042251587, "logps/chosen": -565.4403076171875, "logps/rejected": -641.1866455078125, "loss": 0.3722, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2529892921447754, "rewards/margins": 1.0782588720321655, "rewards/rejected": -3.3312485218048096, "step": 14390 }, { "epoch": 0.94, "learning_rate": 5.078287424513994e-08, "logits/chosen": -2.3554036617279053, "logits/rejected": -1.893204689025879, "logps/chosen": -580.8216552734375, "logps/rejected": -546.94677734375, "loss": 0.5845, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0198404788970947, "rewards/margins": 1.1553198099136353, "rewards/rejected": -3.1751601696014404, "step": 14400 }, { "epoch": 0.94, "learning_rate": 4.964423523133671e-08, "logits/chosen": -1.8956031799316406, "logits/rejected": -1.57084321975708, "logps/chosen": -596.0135498046875, "logps/rejected": -654.08544921875, "loss": 0.5841, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.9032137393951416, "rewards/margins": 0.8584194183349609, "rewards/rejected": -3.7616333961486816, "step": 14410 }, { "epoch": 0.94, "learning_rate": 4.8518378473522976e-08, "logits/chosen": -1.9264984130859375, "logits/rejected": -1.9649795293807983, "logps/chosen": -718.8721313476562, "logps/rejected": -723.98974609375, "loss": 0.5798, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -2.825594425201416, "rewards/margins": 0.8478279113769531, "rewards/rejected": -3.673422336578369, "step": 14420 }, { "epoch": 0.94, "learning_rate": 4.7405309844718584e-08, "logits/chosen": -1.9904693365097046, "logits/rejected": -2.0691580772399902, "logps/chosen": -662.9415893554688, "logps/rejected": -656.4781494140625, "loss": 0.5028, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.6238646507263184, "rewards/margins": 1.5457340478897095, "rewards/rejected": -4.1695990562438965, "step": 14430 }, { "epoch": 0.94, "learning_rate": 4.630503515123508e-08, "logits/chosen": -2.187486410140991, "logits/rejected": -1.6711041927337646, "logps/chosen": -578.8890991210938, "logps/rejected": -642.47216796875, "loss": 0.4737, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4145071506500244, "rewards/margins": 1.5308291912078857, "rewards/rejected": -3.9453365802764893, "step": 14440 }, { "epoch": 0.95, "learning_rate": 4.5217560132644056e-08, "logits/chosen": -2.026479959487915, "logits/rejected": -2.1466727256774902, "logps/chosen": -517.5628051757812, "logps/rejected": -748.7684326171875, "loss": 0.4446, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.1509206295013428, "rewards/margins": 1.1940914392471313, "rewards/rejected": -4.3450117111206055, "step": 14450 }, { "epoch": 0.95, "learning_rate": 4.41428904617483e-08, "logits/chosen": -1.8300641775131226, "logits/rejected": -1.963171362876892, "logps/chosen": -551.1915283203125, "logps/rejected": -632.8893432617188, "loss": 0.4552, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4853625297546387, "rewards/margins": 1.235141634941101, "rewards/rejected": -3.72050404548645, "step": 14460 }, { "epoch": 0.95, "learning_rate": 4.3081031744550696e-08, "logits/chosen": -2.18308687210083, "logits/rejected": -1.511857271194458, "logps/chosen": -482.4222717285156, "logps/rejected": -541.9293212890625, "loss": 0.6497, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4646048545837402, "rewards/margins": 0.9028294682502747, "rewards/rejected": -3.3674347400665283, "step": 14470 }, { "epoch": 0.95, "learning_rate": 4.2031989520227025e-08, "logits/chosen": -1.781324028968811, "logits/rejected": -1.8688886165618896, "logps/chosen": -539.0523071289062, "logps/rejected": -607.6040649414062, "loss": 0.4513, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.917088747024536, "rewards/margins": 1.1266096830368042, "rewards/rejected": -4.043698310852051, "step": 14480 }, { "epoch": 0.95, "learning_rate": 4.099576926109461e-08, "logits/chosen": -1.958184003829956, "logits/rejected": -2.033747434616089, "logps/chosen": -712.1417236328125, "logps/rejected": -733.5404052734375, "loss": 0.6166, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -3.5497589111328125, "rewards/margins": 0.6533251404762268, "rewards/rejected": -4.2030839920043945, "step": 14490 }, { "epoch": 0.95, "learning_rate": 3.997237637258705e-08, "logits/chosen": -1.6168861389160156, "logits/rejected": -1.8724403381347656, "logps/chosen": -546.1658325195312, "logps/rejected": -616.4589233398438, "loss": 0.6491, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.780271530151367, "rewards/margins": 0.6067410111427307, "rewards/rejected": -3.387012481689453, "step": 14500 }, { "epoch": 0.95, "learning_rate": 3.8961816193222035e-08, "logits/chosen": -2.250966787338257, "logits/rejected": -1.8082072734832764, "logps/chosen": -601.9894409179688, "logps/rejected": -652.6393432617188, "loss": 0.4912, "rewards/accuracies": 0.75, "rewards/chosen": -2.557422161102295, "rewards/margins": 1.5952788591384888, "rewards/rejected": -4.152700901031494, "step": 14510 }, { "epoch": 0.95, "learning_rate": 3.79640939945769e-08, "logits/chosen": -2.213221311569214, "logits/rejected": -1.8191182613372803, "logps/chosen": -567.0030517578125, "logps/rejected": -648.6131591796875, "loss": 0.532, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.401536703109741, "rewards/margins": 1.0345122814178467, "rewards/rejected": -3.436048984527588, "step": 14520 }, { "epoch": 0.95, "learning_rate": 3.697921498125895e-08, "logits/chosen": -2.357787609100342, "logits/rejected": -1.991206407546997, "logps/chosen": -641.5526123046875, "logps/rejected": -709.0422973632812, "loss": 0.5707, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.414332628250122, "rewards/margins": 1.1486154794692993, "rewards/rejected": -3.562948226928711, "step": 14530 }, { "epoch": 0.95, "learning_rate": 3.6007184290880456e-08, "logits/chosen": -2.375516176223755, "logits/rejected": -2.1332132816314697, "logps/chosen": -608.3237915039062, "logps/rejected": -620.3967895507812, "loss": 0.5038, "rewards/accuracies": 0.75, "rewards/chosen": -1.9436047077178955, "rewards/margins": 1.2093195915222168, "rewards/rejected": -3.1529242992401123, "step": 14540 }, { "epoch": 0.95, "learning_rate": 3.504800699402872e-08, "logits/chosen": -2.153402090072632, "logits/rejected": -1.7231197357177734, "logps/chosen": -558.6926879882812, "logps/rejected": -608.3848876953125, "loss": 0.4761, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -1.9543708562850952, "rewards/margins": 0.9569627642631531, "rewards/rejected": -2.9113335609436035, "step": 14550 }, { "epoch": 0.95, "learning_rate": 3.4101688094242967e-08, "logits/chosen": -1.902076005935669, "logits/rejected": -1.8242212533950806, "logps/chosen": -474.19512939453125, "logps/rejected": -666.9522705078125, "loss": 0.4113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4757742881774902, "rewards/margins": 1.9288349151611328, "rewards/rejected": -4.404609203338623, "step": 14560 }, { "epoch": 0.95, "learning_rate": 3.3168232527985564e-08, "logits/chosen": -2.121044397354126, "logits/rejected": -1.6909615993499756, "logps/chosen": -657.7474365234375, "logps/rejected": -793.020263671875, "loss": 0.4025, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.607120990753174, "rewards/margins": 1.5403274297714233, "rewards/rejected": -4.147448539733887, "step": 14570 }, { "epoch": 0.95, "learning_rate": 3.224764516461892e-08, "logits/chosen": -1.758429765701294, "logits/rejected": -1.5632776021957397, "logps/chosen": -597.2870483398438, "logps/rejected": -627.7225341796875, "loss": 0.5881, "rewards/accuracies": 0.75, "rewards/chosen": -2.684779405593872, "rewards/margins": 1.027318000793457, "rewards/rejected": -3.712097644805908, "step": 14580 }, { "epoch": 0.95, "learning_rate": 3.133993080637665e-08, "logits/chosen": -2.2052054405212402, "logits/rejected": -1.766552209854126, "logps/chosen": -565.1602783203125, "logps/rejected": -716.3389892578125, "loss": 0.3828, "rewards/accuracies": 0.75, "rewards/chosen": -2.767854690551758, "rewards/margins": 1.580475091934204, "rewards/rejected": -4.348330020904541, "step": 14590 }, { "epoch": 0.96, "learning_rate": 3.0445094188342186e-08, "logits/chosen": -2.425266742706299, "logits/rejected": -2.240478992462158, "logps/chosen": -547.5765991210938, "logps/rejected": -691.012939453125, "loss": 0.498, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.614175796508789, "rewards/margins": 1.1199251413345337, "rewards/rejected": -3.734100341796875, "step": 14600 }, { "epoch": 0.96, "learning_rate": 2.9563139978421028e-08, "logits/chosen": -2.029474973678589, "logits/rejected": -1.9663759469985962, "logps/chosen": -583.2051391601562, "logps/rejected": -761.0662841796875, "loss": 0.6742, "rewards/accuracies": 0.75, "rewards/chosen": -2.939052104949951, "rewards/margins": 1.2332971096038818, "rewards/rejected": -4.172348976135254, "step": 14610 }, { "epoch": 0.96, "learning_rate": 2.869407277731939e-08, "logits/chosen": -1.5976159572601318, "logits/rejected": -1.4359140396118164, "logps/chosen": -448.70831298828125, "logps/rejected": -568.3868408203125, "loss": 0.4283, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6614177227020264, "rewards/margins": 1.1558550596237183, "rewards/rejected": -3.817272901535034, "step": 14620 }, { "epoch": 0.96, "learning_rate": 2.783789711851642e-08, "logits/chosen": -1.7493689060211182, "logits/rejected": -1.9442659616470337, "logps/chosen": -503.8953552246094, "logps/rejected": -651.5677490234375, "loss": 0.4684, "rewards/accuracies": 0.75, "rewards/chosen": -2.3239119052886963, "rewards/margins": 1.5627952814102173, "rewards/rejected": -3.886706829071045, "step": 14630 }, { "epoch": 0.96, "learning_rate": 2.6994617468244778e-08, "logits/chosen": -1.977817177772522, "logits/rejected": -2.0746657848358154, "logps/chosen": -481.6962890625, "logps/rejected": -559.5147705078125, "loss": 0.6108, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6540279388427734, "rewards/margins": 0.7342770099639893, "rewards/rejected": -3.3883049488067627, "step": 14640 }, { "epoch": 0.96, "learning_rate": 2.6164238225463155e-08, "logits/chosen": -1.8128788471221924, "logits/rejected": -2.017542600631714, "logps/chosen": -582.6580200195312, "logps/rejected": -668.75, "loss": 0.5317, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.9285879135131836, "rewards/margins": 1.08481764793396, "rewards/rejected": -4.013405799865723, "step": 14650 }, { "epoch": 0.96, "learning_rate": 2.534676372183742e-08, "logits/chosen": -1.7201662063598633, "logits/rejected": -1.8715013265609741, "logps/chosen": -576.0623779296875, "logps/rejected": -882.0549926757812, "loss": 0.5135, "rewards/accuracies": 0.75, "rewards/chosen": -2.6352553367614746, "rewards/margins": 1.320225477218628, "rewards/rejected": -3.9554810523986816, "step": 14660 }, { "epoch": 0.96, "learning_rate": 2.4542198221714218e-08, "logits/chosen": -1.8759219646453857, "logits/rejected": -1.9353220462799072, "logps/chosen": -608.5661010742188, "logps/rejected": -724.3257446289062, "loss": 0.5991, "rewards/accuracies": 0.75, "rewards/chosen": -2.945322036743164, "rewards/margins": 1.2213268280029297, "rewards/rejected": -4.166649341583252, "step": 14670 }, { "epoch": 0.96, "learning_rate": 2.3750545922101854e-08, "logits/chosen": -2.0961718559265137, "logits/rejected": -1.6841129064559937, "logps/chosen": -521.6822509765625, "logps/rejected": -638.8639526367188, "loss": 0.411, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.7227702140808105, "rewards/margins": 1.513890027999878, "rewards/rejected": -4.236660003662109, "step": 14680 }, { "epoch": 0.96, "learning_rate": 2.2971810952646112e-08, "logits/chosen": -2.2837064266204834, "logits/rejected": -2.1549346446990967, "logps/chosen": -638.1166381835938, "logps/rejected": -623.2474975585938, "loss": 0.6452, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.713475227355957, "rewards/margins": 0.6098706126213074, "rewards/rejected": -3.323345899581909, "step": 14690 }, { "epoch": 0.96, "learning_rate": 2.2205997375610576e-08, "logits/chosen": -2.247194290161133, "logits/rejected": -1.9691559076309204, "logps/chosen": -541.24609375, "logps/rejected": -600.1187744140625, "loss": 0.5547, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.379558801651001, "rewards/margins": 0.8371645212173462, "rewards/rejected": -3.2167232036590576, "step": 14700 }, { "epoch": 0.96, "learning_rate": 2.1453109185853304e-08, "logits/chosen": -1.9703853130340576, "logits/rejected": -2.1267032623291016, "logps/chosen": -538.843017578125, "logps/rejected": -636.073974609375, "loss": 0.5679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.287182331085205, "rewards/margins": 0.865347683429718, "rewards/rejected": -3.152529716491699, "step": 14710 }, { "epoch": 0.96, "learning_rate": 2.0713150310808784e-08, "logits/chosen": -1.9766517877578735, "logits/rejected": -2.010066509246826, "logps/chosen": -465.16204833984375, "logps/rejected": -689.02783203125, "loss": 0.5972, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.1616501808166504, "rewards/margins": 1.6034317016601562, "rewards/rejected": -3.7650814056396484, "step": 14720 }, { "epoch": 0.96, "learning_rate": 1.9986124610464064e-08, "logits/chosen": -1.9305881261825562, "logits/rejected": -1.5496737957000732, "logps/chosen": -587.0402221679688, "logps/rejected": -731.3446044921875, "loss": 0.4927, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.2727527618408203, "rewards/margins": 1.5993540287017822, "rewards/rejected": -3.8721065521240234, "step": 14730 }, { "epoch": 0.96, "learning_rate": 1.927203587734211e-08, "logits/chosen": -2.180502414703369, "logits/rejected": -1.900294542312622, "logps/chosen": -609.2461547851562, "logps/rejected": -682.5765380859375, "loss": 0.5784, "rewards/accuracies": 0.5, "rewards/chosen": -3.084608554840088, "rewards/margins": 0.7377563714981079, "rewards/rejected": -3.8223655223846436, "step": 14740 }, { "epoch": 0.97, "learning_rate": 1.8570887836479034e-08, "logits/chosen": -2.027315855026245, "logits/rejected": -1.824481725692749, "logps/chosen": -556.1491088867188, "logps/rejected": -595.5225830078125, "loss": 0.4913, "rewards/accuracies": 0.75, "rewards/chosen": -2.941615581512451, "rewards/margins": 0.7568367123603821, "rewards/rejected": -3.6984519958496094, "step": 14750 }, { "epoch": 0.97, "learning_rate": 1.7882684145406616e-08, "logits/chosen": -1.7561118602752686, "logits/rejected": -2.1623549461364746, "logps/chosen": -552.8467407226562, "logps/rejected": -614.5779418945312, "loss": 0.5921, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.634575605392456, "rewards/margins": 1.004473328590393, "rewards/rejected": -3.6390490531921387, "step": 14760 }, { "epoch": 0.97, "learning_rate": 1.7207428394132865e-08, "logits/chosen": -1.8452908992767334, "logits/rejected": -2.2522010803222656, "logps/chosen": -595.1798095703125, "logps/rejected": -801.8895263671875, "loss": 0.4474, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8258135318756104, "rewards/margins": 1.2675896883010864, "rewards/rejected": -4.093403339385986, "step": 14770 }, { "epoch": 0.97, "learning_rate": 1.654512410512177e-08, "logits/chosen": -1.8162853717803955, "logits/rejected": -1.6008058786392212, "logps/chosen": -591.1409912109375, "logps/rejected": -628.3958129882812, "loss": 0.5372, "rewards/accuracies": 0.5, "rewards/chosen": -3.6309876441955566, "rewards/margins": 0.3401626646518707, "rewards/rejected": -3.9711506366729736, "step": 14780 }, { "epoch": 0.97, "learning_rate": 1.5895774733277468e-08, "logits/chosen": -1.8977062702178955, "logits/rejected": -1.8740453720092773, "logps/chosen": -610.1856689453125, "logps/rejected": -745.1798095703125, "loss": 0.642, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -3.373018741607666, "rewards/margins": 0.5555127263069153, "rewards/rejected": -3.9285316467285156, "step": 14790 }, { "epoch": 0.97, "learning_rate": 1.5259383665924e-08, "logits/chosen": -1.9771171808242798, "logits/rejected": -2.1021008491516113, "logps/chosen": -546.7236328125, "logps/rejected": -603.7360229492188, "loss": 0.4806, "rewards/accuracies": 0.75, "rewards/chosen": -2.3632724285125732, "rewards/margins": 0.8726188540458679, "rewards/rejected": -3.235891342163086, "step": 14800 }, { "epoch": 0.97, "learning_rate": 1.4635954222789461e-08, "logits/chosen": -1.9596999883651733, "logits/rejected": -2.053947687149048, "logps/chosen": -546.943359375, "logps/rejected": -663.4627075195312, "loss": 0.5462, "rewards/accuracies": 0.75, "rewards/chosen": -2.808772087097168, "rewards/margins": 0.8841426968574524, "rewards/rejected": -3.6929144859313965, "step": 14810 }, { "epoch": 0.97, "learning_rate": 1.402548965598688e-08, "logits/chosen": -2.0539464950561523, "logits/rejected": -2.116236925125122, "logps/chosen": -515.5634155273438, "logps/rejected": -638.9573364257812, "loss": 0.6564, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.3546359539031982, "rewards/margins": 0.9324806928634644, "rewards/rejected": -3.287116527557373, "step": 14820 }, { "epoch": 0.97, "learning_rate": 1.3427993149998375e-08, "logits/chosen": -1.9696085453033447, "logits/rejected": -1.6930809020996094, "logps/chosen": -572.9739990234375, "logps/rejected": -665.8734130859375, "loss": 0.4738, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.9606966972351074, "rewards/margins": 1.3416447639465332, "rewards/rejected": -4.302341938018799, "step": 14830 }, { "epoch": 0.97, "learning_rate": 1.2843467821658518e-08, "logits/chosen": -1.533319115638733, "logits/rejected": -1.516014814376831, "logps/chosen": -584.8552856445312, "logps/rejected": -655.9161376953125, "loss": 0.4087, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.864767074584961, "rewards/margins": 1.255389928817749, "rewards/rejected": -4.120157241821289, "step": 14840 }, { "epoch": 0.97, "learning_rate": 1.2271916720137666e-08, "logits/chosen": -2.2170162200927734, "logits/rejected": -2.1248416900634766, "logps/chosen": -568.857177734375, "logps/rejected": -635.1595458984375, "loss": 0.6052, "rewards/accuracies": 0.75, "rewards/chosen": -2.8181326389312744, "rewards/margins": 0.6834885478019714, "rewards/rejected": -3.5016212463378906, "step": 14850 }, { "epoch": 0.97, "learning_rate": 1.171334282692671e-08, "logits/chosen": -1.637953519821167, "logits/rejected": -2.0385165214538574, "logps/chosen": -564.2327270507812, "logps/rejected": -709.8792114257812, "loss": 0.3442, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.607111692428589, "rewards/margins": 1.3842123746871948, "rewards/rejected": -3.9913246631622314, "step": 14860 }, { "epoch": 0.97, "learning_rate": 1.116774905582041e-08, "logits/chosen": -1.9809516668319702, "logits/rejected": -1.9079357385635376, "logps/chosen": -535.7376708984375, "logps/rejected": -629.07568359375, "loss": 0.5655, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.8107266426086426, "rewards/margins": 1.4259259700775146, "rewards/rejected": -4.236652851104736, "step": 14870 }, { "epoch": 0.97, "learning_rate": 1.0635138252902966e-08, "logits/chosen": -1.839346170425415, "logits/rejected": -1.8110500574111938, "logps/chosen": -553.4337768554688, "logps/rejected": -578.0079956054688, "loss": 0.5583, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.765519618988037, "rewards/margins": 0.7037255764007568, "rewards/rejected": -3.469245195388794, "step": 14880 }, { "epoch": 0.97, "learning_rate": 1.0115513196533589e-08, "logits/chosen": -2.424407482147217, "logits/rejected": -1.7454407215118408, "logps/chosen": -595.8634033203125, "logps/rejected": -676.6961669921875, "loss": 0.4608, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.504723072052002, "rewards/margins": 1.2304394245147705, "rewards/rejected": -3.7351627349853516, "step": 14890 }, { "epoch": 0.97, "learning_rate": 9.608876597330952e-09, "logits/chosen": -2.0921707153320312, "logits/rejected": -1.8799470663070679, "logps/chosen": -651.5478515625, "logps/rejected": -791.4033203125, "loss": 0.5989, "rewards/accuracies": 0.75, "rewards/chosen": -2.843231678009033, "rewards/margins": 0.8656366467475891, "rewards/rejected": -3.7088685035705566, "step": 14900 }, { "epoch": 0.98, "learning_rate": 9.115231098159594e-09, "logits/chosen": -1.8645089864730835, "logits/rejected": -2.1675772666931152, "logps/chosen": -625.9075927734375, "logps/rejected": -554.9896850585938, "loss": 0.5518, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.273899793624878, "rewards/margins": 0.6048682928085327, "rewards/rejected": -2.8787682056427, "step": 14910 }, { "epoch": 0.98, "learning_rate": 8.634579274116317e-09, "logits/chosen": -2.1418542861938477, "logits/rejected": -2.051110029220581, "logps/chosen": -646.91162109375, "logps/rejected": -757.44189453125, "loss": 0.6099, "rewards/accuracies": 0.75, "rewards/chosen": -2.6040735244750977, "rewards/margins": 0.9848331212997437, "rewards/rejected": -3.5889065265655518, "step": 14920 }, { "epoch": 0.98, "learning_rate": 8.166923632516865e-09, "logits/chosen": -2.135305881500244, "logits/rejected": -2.3295464515686035, "logps/chosen": -550.7391967773438, "logps/rejected": -599.1780395507812, "loss": 0.5218, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.2799880504608154, "rewards/margins": 1.2160379886627197, "rewards/rejected": -3.4960262775421143, "step": 14930 }, { "epoch": 0.98, "learning_rate": 7.712266612881492e-09, "logits/chosen": -2.110206365585327, "logits/rejected": -2.0911717414855957, "logps/chosen": -646.8552856445312, "logps/rejected": -775.9037475585938, "loss": 0.6099, "rewards/accuracies": 0.75, "rewards/chosen": -2.8884596824645996, "rewards/margins": 1.1088417768478394, "rewards/rejected": -3.9973015785217285, "step": 14940 }, { "epoch": 0.98, "learning_rate": 7.270610586924687e-09, "logits/chosen": -1.9851608276367188, "logits/rejected": -1.7386525869369507, "logps/chosen": -532.4805908203125, "logps/rejected": -577.1221923828125, "loss": 0.4293, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.726097583770752, "rewards/margins": 1.0200824737548828, "rewards/rejected": -3.7461800575256348, "step": 14950 }, { "epoch": 0.98, "learning_rate": 6.841957858539916e-09, "logits/chosen": -2.039259433746338, "logits/rejected": -1.5415773391723633, "logps/chosen": -574.57080078125, "logps/rejected": -579.6693115234375, "loss": 0.544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.8482258319854736, "rewards/margins": 0.8674120903015137, "rewards/rejected": -3.7156379222869873, "step": 14960 }, { "epoch": 0.98, "learning_rate": 6.426310663790181e-09, "logits/chosen": -2.1447885036468506, "logits/rejected": -1.8553613424301147, "logps/chosen": -472.54345703125, "logps/rejected": -681.0835571289062, "loss": 0.4591, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.4107956886291504, "rewards/margins": 1.6803451776504517, "rewards/rejected": -4.091141223907471, "step": 14970 }, { "epoch": 0.98, "learning_rate": 6.023671170894696e-09, "logits/chosen": -2.3050427436828613, "logits/rejected": -2.0280723571777344, "logps/chosen": -523.9729614257812, "logps/rejected": -675.7442626953125, "loss": 0.3343, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -2.1255812644958496, "rewards/margins": 1.6824712753295898, "rewards/rejected": -3.8080525398254395, "step": 14980 }, { "epoch": 0.98, "learning_rate": 5.634041480218344e-09, "logits/chosen": -2.022458553314209, "logits/rejected": -1.8785483837127686, "logps/chosen": -538.9459228515625, "logps/rejected": -625.7891235351562, "loss": 0.5558, "rewards/accuracies": 0.75, "rewards/chosen": -2.573493480682373, "rewards/margins": 1.359083890914917, "rewards/rejected": -3.932577133178711, "step": 14990 }, { "epoch": 0.98, "learning_rate": 5.257423624260849e-09, "logits/chosen": -2.1214098930358887, "logits/rejected": -1.5633071660995483, "logps/chosen": -530.5240478515625, "logps/rejected": -629.9700317382812, "loss": 0.544, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -3.113564968109131, "rewards/margins": 1.1282984018325806, "rewards/rejected": -4.241864204406738, "step": 15000 }, { "epoch": 0.98, "eval_logits/chosen": -2.0103464126586914, "eval_logits/rejected": -1.8321548700332642, "eval_logps/chosen": -559.2841186523438, "eval_logps/rejected": -652.72314453125, "eval_loss": 0.5345566272735596, "eval_rewards/accuracies": 0.7450000047683716, "eval_rewards/chosen": -2.6383869647979736, "eval_rewards/margins": 1.1347852945327759, "eval_rewards/rejected": -3.773172616958618, "eval_runtime": 464.8123, "eval_samples_per_second": 4.303, "eval_steps_per_second": 2.151, "step": 15000 }, { "epoch": 0.98, "learning_rate": 4.893819567644564e-09, "logits/chosen": -2.589524030685425, "logits/rejected": -2.085636854171753, "logps/chosen": -627.2362060546875, "logps/rejected": -616.60009765625, "loss": 0.4448, "rewards/accuracies": 0.75, "rewards/chosen": -1.7485313415527344, "rewards/margins": 0.941628098487854, "rewards/rejected": -2.690159559249878, "step": 15010 }, { "epoch": 0.98, "learning_rate": 4.543231207107257e-09, "logits/chosen": -2.071953058242798, "logits/rejected": -1.9068734645843506, "logps/chosen": -564.134033203125, "logps/rejected": -653.0672607421875, "loss": 0.5597, "rewards/accuracies": 0.75, "rewards/chosen": -2.419016122817993, "rewards/margins": 1.1532005071640015, "rewards/rejected": -3.572216510772705, "step": 15020 }, { "epoch": 0.98, "learning_rate": 4.205660371488785e-09, "logits/chosen": -1.7492027282714844, "logits/rejected": -2.114201784133911, "logps/chosen": -523.1380004882812, "logps/rejected": -662.16650390625, "loss": 0.5591, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.3801138401031494, "rewards/margins": 0.8532201647758484, "rewards/rejected": -3.2333340644836426, "step": 15030 }, { "epoch": 0.98, "learning_rate": 3.88110882172471e-09, "logits/chosen": -2.2043449878692627, "logits/rejected": -1.5802637338638306, "logps/chosen": -593.7720336914062, "logps/rejected": -702.6158447265625, "loss": 0.4519, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.357976198196411, "rewards/margins": 1.9908418655395508, "rewards/rejected": -4.348818302154541, "step": 15040 }, { "epoch": 0.98, "learning_rate": 3.569578250834371e-09, "logits/chosen": -2.2800846099853516, "logits/rejected": -1.9660142660140991, "logps/chosen": -635.9804077148438, "logps/rejected": -608.4877319335938, "loss": 0.4264, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.367474317550659, "rewards/margins": 1.1003978252410889, "rewards/rejected": -3.467872142791748, "step": 15050 }, { "epoch": 0.99, "learning_rate": 3.2710702839139353e-09, "logits/chosen": -1.9524681568145752, "logits/rejected": -2.089827060699463, "logps/chosen": -536.96533203125, "logps/rejected": -729.6658935546875, "loss": 0.5196, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.171668529510498, "rewards/margins": 1.4687433242797852, "rewards/rejected": -3.640411853790283, "step": 15060 }, { "epoch": 0.99, "learning_rate": 2.9855864781272448e-09, "logits/chosen": -1.7301270961761475, "logits/rejected": -1.9880526065826416, "logps/chosen": -502.5127868652344, "logps/rejected": -669.14013671875, "loss": 0.4803, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.6482887268066406, "rewards/margins": 1.0087933540344238, "rewards/rejected": -3.6570820808410645, "step": 15070 }, { "epoch": 0.99, "learning_rate": 2.7131283226977665e-09, "logits/chosen": -1.7450988292694092, "logits/rejected": -1.6128599643707275, "logps/chosen": -495.0038146972656, "logps/rejected": -660.7210693359375, "loss": 0.5607, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.581587076187134, "rewards/margins": 1.8689552545547485, "rewards/rejected": -4.450542449951172, "step": 15080 }, { "epoch": 0.99, "learning_rate": 2.4536972389008205e-09, "logits/chosen": -1.9724218845367432, "logits/rejected": -1.832627296447754, "logps/chosen": -507.7687072753906, "logps/rejected": -597.588623046875, "loss": 0.4921, "rewards/accuracies": 0.75, "rewards/chosen": -2.8451650142669678, "rewards/margins": 0.994085431098938, "rewards/rejected": -3.8392510414123535, "step": 15090 }, { "epoch": 0.99, "learning_rate": 2.20729458005553e-09, "logits/chosen": -1.9598588943481445, "logits/rejected": -2.226276159286499, "logps/chosen": -603.4548950195312, "logps/rejected": -636.9799194335938, "loss": 0.6833, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.43165922164917, "rewards/margins": 0.9997150301933289, "rewards/rejected": -3.4313747882843018, "step": 15100 }, { "epoch": 0.99, "learning_rate": 1.9739216315192712e-09, "logits/chosen": -1.884840726852417, "logits/rejected": -1.8147767782211304, "logps/chosen": -549.3267822265625, "logps/rejected": -673.274658203125, "loss": 0.6796, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2264692783355713, "rewards/margins": 1.4674718379974365, "rewards/rejected": -3.693941593170166, "step": 15110 }, { "epoch": 0.99, "learning_rate": 1.7535796106796231e-09, "logits/chosen": -1.8648507595062256, "logits/rejected": -1.8699684143066406, "logps/chosen": -559.1756591796875, "logps/rejected": -737.6688842773438, "loss": 0.5379, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.814243793487549, "rewards/margins": 1.0435668230056763, "rewards/rejected": -3.8578104972839355, "step": 15120 }, { "epoch": 0.99, "learning_rate": 1.5462696669482636e-09, "logits/chosen": -1.8213493824005127, "logits/rejected": -1.9720951318740845, "logps/chosen": -475.69268798828125, "logps/rejected": -601.0528564453125, "loss": 0.4822, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.752937078475952, "rewards/margins": 1.1356090307235718, "rewards/rejected": -3.8885464668273926, "step": 15130 }, { "epoch": 0.99, "learning_rate": 1.3519928817556927e-09, "logits/chosen": -2.2426865100860596, "logits/rejected": -1.9893312454223633, "logps/chosen": -523.9650268554688, "logps/rejected": -693.4822998046875, "loss": 0.588, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.4684181213378906, "rewards/margins": 1.2350502014160156, "rewards/rejected": -3.7034687995910645, "step": 15140 }, { "epoch": 0.99, "learning_rate": 1.1707502685448512e-09, "logits/chosen": -1.8712642192840576, "logits/rejected": -1.9466218948364258, "logps/chosen": -568.0076904296875, "logps/rejected": -602.0161743164062, "loss": 0.6481, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.9134345054626465, "rewards/margins": 0.6558619737625122, "rewards/rejected": -3.569296360015869, "step": 15150 }, { "epoch": 0.99, "learning_rate": 1.002542772765569e-09, "logits/chosen": -2.0638976097106934, "logits/rejected": -1.9307587146759033, "logps/chosen": -560.0218505859375, "logps/rejected": -565.7151489257812, "loss": 0.7394, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.1229779720306396, "rewards/margins": 0.8032512664794922, "rewards/rejected": -3.926229476928711, "step": 15160 }, { "epoch": 0.99, "learning_rate": 8.473712718709559e-10, "logits/chosen": -2.003127098083496, "logits/rejected": -1.8916511535644531, "logps/chosen": -585.7120361328125, "logps/rejected": -612.2128295898438, "loss": 0.4223, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.2510201930999756, "rewards/margins": 1.000040888786316, "rewards/rejected": -3.251060962677002, "step": 15170 }, { "epoch": 0.99, "learning_rate": 7.052365753112966e-10, "logits/chosen": -1.907179832458496, "logits/rejected": -2.0109689235687256, "logps/chosen": -516.2862548828125, "logps/rejected": -582.3834228515625, "loss": 0.6324, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.6489791870117188, "rewards/margins": 0.7662774324417114, "rewards/rejected": -3.4152565002441406, "step": 15180 }, { "epoch": 0.99, "learning_rate": 5.761394245307195e-10, "logits/chosen": -1.6526172161102295, "logits/rejected": -1.8436660766601562, "logps/chosen": -484.718017578125, "logps/rejected": -684.2083740234375, "loss": 0.4828, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.846797466278076, "rewards/margins": 1.2969233989715576, "rewards/rejected": -4.143720626831055, "step": 15190 }, { "epoch": 0.99, "learning_rate": 4.6008049296358826e-10, "logits/chosen": -1.6694622039794922, "logits/rejected": -1.782187819480896, "logps/chosen": -574.0707397460938, "logps/rejected": -650.3125, "loss": 0.7595, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -2.969832181930542, "rewards/margins": 0.4895861744880676, "rewards/rejected": -3.4594180583953857, "step": 15200 }, { "epoch": 1.0, "learning_rate": 3.5706038603006146e-10, "logits/chosen": -1.8179279565811157, "logits/rejected": -1.777890920639038, "logps/chosen": -544.8775024414062, "logps/rejected": -649.1305541992188, "loss": 0.6576, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -2.877633571624756, "rewards/margins": 0.5929735898971558, "rewards/rejected": -3.470606565475464, "step": 15210 }, { "epoch": 1.0, "learning_rate": 2.670796411333165e-10, "logits/chosen": -2.3590896129608154, "logits/rejected": -2.0110230445861816, "logps/chosen": -643.3109130859375, "logps/rejected": -662.0802001953125, "loss": 0.4969, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -2.8800227642059326, "rewards/margins": 0.6459160447120667, "rewards/rejected": -3.5259385108947754, "step": 15220 }, { "epoch": 1.0, "learning_rate": 1.9013872765677455e-10, "logits/chosen": -1.6060059070587158, "logits/rejected": -1.6648750305175781, "logps/chosen": -541.9176635742188, "logps/rejected": -621.7283935546875, "loss": 0.4782, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -3.0482802391052246, "rewards/margins": 1.1852730512619019, "rewards/rejected": -4.233553886413574, "step": 15230 }, { "epoch": 1.0, "learning_rate": 1.262380469624347e-10, "logits/chosen": -1.9589645862579346, "logits/rejected": -1.5916931629180908, "logps/chosen": -624.8267822265625, "logps/rejected": -642.77490234375, "loss": 0.4946, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -2.87965726852417, "rewards/margins": 1.2809209823608398, "rewards/rejected": -4.16057825088501, "step": 15240 }, { "epoch": 1.0, "learning_rate": 7.53779323872661e-11, "logits/chosen": -2.010342597961426, "logits/rejected": -2.2257697582244873, "logps/chosen": -536.9607543945312, "logps/rejected": -731.04296875, "loss": 0.5095, "rewards/accuracies": 0.75, "rewards/chosen": -2.594208002090454, "rewards/margins": 1.1701253652572632, "rewards/rejected": -3.7643332481384277, "step": 15250 }, { "epoch": 1.0, "learning_rate": 3.7558649242652734e-11, "logits/chosen": -1.97809636592865, "logits/rejected": -1.9274822473526, "logps/chosen": -609.1251220703125, "logps/rejected": -653.0401611328125, "loss": 0.568, "rewards/accuracies": 0.75, "rewards/chosen": -3.0441153049468994, "rewards/margins": 1.080944299697876, "rewards/rejected": -4.125059604644775, "step": 15260 }, { "epoch": 1.0, "learning_rate": 1.2780394812450526e-11, "logits/chosen": -2.1693949699401855, "logits/rejected": -1.7180089950561523, "logps/chosen": -669.6077880859375, "logps/rejected": -679.9315185546875, "loss": 0.5132, "rewards/accuracies": 0.75, "rewards/chosen": -3.558014392852783, "rewards/margins": 0.8520815968513489, "rewards/rejected": -4.410096645355225, "step": 15270 }, { "epoch": 1.0, "learning_rate": 1.0432983521546646e-12, "logits/chosen": -2.0951781272888184, "logits/rejected": -1.8845336437225342, "logps/chosen": -670.9354248046875, "logps/rejected": -788.4154052734375, "loss": 0.4315, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9909508228302002, "rewards/margins": 1.453499436378479, "rewards/rejected": -3.4444503784179688, "step": 15280 }, { "epoch": 1.0, "step": 15284, "total_flos": 0.0, "train_loss": 0.5599543302822287, "train_runtime": 34322.3587, "train_samples_per_second": 1.781, "train_steps_per_second": 0.445 } ], "logging_steps": 10, "max_steps": 15284, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }