{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 13557, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4749262536873156e-09, "logits/chosen": -2.4788765907287598, "logits/rejected": -1.3292487859725952, "logps/chosen": -409.9538269042969, "logps/rejected": -238.84312438964844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.4749262536873155e-08, "logits/chosen": -1.7210251092910767, "logits/rejected": -1.4418513774871826, "logps/chosen": -164.326416015625, "logps/rejected": -148.9131317138672, "loss": 0.6932, "rewards/accuracies": 0.3611111044883728, "rewards/chosen": 6.20587752564461e-06, "rewards/margins": -2.018662962655071e-05, "rewards/rejected": 2.6392513973405585e-05, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.949852507374631e-08, "logits/chosen": -2.0204386711120605, "logits/rejected": -1.5768181085586548, "logps/chosen": -162.75619506835938, "logps/rejected": -124.29972839355469, "loss": 0.6932, "rewards/accuracies": 0.32499998807907104, "rewards/chosen": -8.318300388054922e-05, "rewards/margins": -0.00011903295671800151, "rewards/rejected": 3.58499564754311e-05, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.424778761061947e-08, "logits/chosen": -1.9532196521759033, "logits/rejected": -1.6114107370376587, "logps/chosen": -161.4169921875, "logps/rejected": -124.2623291015625, "loss": 0.6932, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.00011910348985111341, "rewards/margins": -3.172488504787907e-05, "rewards/rejected": -8.737859752727672e-05, "step": 30 }, { "epoch": 0.0, "learning_rate": 5.899705014749262e-08, "logits/chosen": -2.1480274200439453, "logits/rejected": -1.4792611598968506, "logps/chosen": -209.96633911132812, "logps/rejected": -144.23049926757812, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 2.9996541343280114e-05, "rewards/margins": 7.354726403718814e-05, "rewards/rejected": -4.3550731788855046e-05, "step": 40 }, { "epoch": 0.0, "learning_rate": 7.374631268436577e-08, "logits/chosen": -1.8642715215682983, "logits/rejected": -1.4792267084121704, "logps/chosen": -132.48971557617188, "logps/rejected": -95.5574951171875, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 3.863597157760523e-05, "rewards/margins": 6.495495472336188e-05, "rewards/rejected": -2.6318977688788436e-05, "step": 50 }, { "epoch": 0.0, "learning_rate": 8.849557522123894e-08, "logits/chosen": -1.9912035465240479, "logits/rejected": -1.4943995475769043, "logps/chosen": -144.76437377929688, "logps/rejected": -106.72172546386719, "loss": 0.6932, "rewards/accuracies": 0.25, "rewards/chosen": -6.486644997494295e-05, "rewards/margins": -0.00011546573659870774, "rewards/rejected": 5.059929026174359e-05, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.0324483775811209e-07, "logits/chosen": -1.805627465248108, "logits/rejected": -1.541938066482544, "logps/chosen": -110.87413024902344, "logps/rejected": -107.87353515625, "loss": 0.6932, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -6.68201973894611e-05, "rewards/margins": -9.235172183252871e-05, "rewards/rejected": 2.553153535700403e-05, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.1799410029498524e-07, "logits/chosen": -2.0564072132110596, "logits/rejected": -1.6781947612762451, "logps/chosen": -110.9719467163086, "logps/rejected": -92.81617736816406, "loss": 0.6931, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 8.624754991615191e-05, "rewards/margins": 9.29329835344106e-05, "rewards/rejected": -6.6854431679530535e-06, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.327433628318584e-07, "logits/chosen": -1.9488716125488281, "logits/rejected": -1.5129443407058716, "logps/chosen": -143.23350524902344, "logps/rejected": -112.37986755371094, "loss": 0.6932, "rewards/accuracies": 0.375, "rewards/chosen": -1.3049409972154535e-05, "rewards/margins": -5.60016451345291e-06, "rewards/rejected": -7.449248187185731e-06, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.4749262536873155e-07, "logits/chosen": -1.929771065711975, "logits/rejected": -1.682916283607483, "logps/chosen": -137.5438690185547, "logps/rejected": -114.3836441040039, "loss": 0.6931, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.00025407870998606086, "rewards/margins": 9.144912473857403e-05, "rewards/rejected": 0.0001626296143513173, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.622418879056047e-07, "logits/chosen": -1.7885568141937256, "logits/rejected": -1.4545568227767944, "logps/chosen": -125.30277252197266, "logps/rejected": -117.15525817871094, "loss": 0.6931, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 7.595721399411559e-05, "rewards/margins": 4.57885762443766e-05, "rewards/rejected": 3.016862683580257e-05, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.7699115044247788e-07, "logits/chosen": -1.8949912786483765, "logits/rejected": -1.4546430110931396, "logps/chosen": -174.32073974609375, "logps/rejected": -145.0150909423828, "loss": 0.6931, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.0002994768728967756, "rewards/margins": 0.00010045090311905369, "rewards/rejected": 0.0001990259625017643, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.9174041297935104e-07, "logits/chosen": -1.9836410284042358, "logits/rejected": -1.709429144859314, "logps/chosen": -141.1275634765625, "logps/rejected": -117.14542388916016, "loss": 0.6931, "rewards/accuracies": 0.625, "rewards/chosen": 0.00038248515920713544, "rewards/margins": 0.00019995639740955085, "rewards/rejected": 0.00018252880545333028, "step": 130 }, { "epoch": 0.01, "learning_rate": 2.0648967551622418e-07, "logits/chosen": -1.9752012491226196, "logits/rejected": -1.6743885278701782, "logps/chosen": -116.97663879394531, "logps/rejected": -130.48214721679688, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.00034134340239688754, "rewards/margins": 0.0002342404332011938, "rewards/rejected": 0.00010710298374760896, "step": 140 }, { "epoch": 0.01, "learning_rate": 2.2123893805309735e-07, "logits/chosen": -2.085521697998047, "logits/rejected": -1.8945324420928955, "logps/chosen": -163.8953857421875, "logps/rejected": -118.63118743896484, "loss": 0.6931, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0002981310826726258, "rewards/margins": 9.544082786305808e-06, "rewards/rejected": 0.0002885870053432882, "step": 150 }, { "epoch": 0.01, "learning_rate": 2.3598820058997048e-07, "logits/chosen": -2.0081753730773926, "logits/rejected": -1.5511795282363892, "logps/chosen": -139.1626739501953, "logps/rejected": -119.33290100097656, "loss": 0.6931, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0005023704143241048, "rewards/margins": 0.00026061449898406863, "rewards/rejected": 0.0002417558862362057, "step": 160 }, { "epoch": 0.01, "learning_rate": 2.5073746312684365e-07, "logits/chosen": -1.8279438018798828, "logits/rejected": -1.362281084060669, "logps/chosen": -190.3703155517578, "logps/rejected": -141.8903350830078, "loss": 0.6931, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0005454398924484849, "rewards/margins": 0.0003579551703296602, "rewards/rejected": 0.00018748478032648563, "step": 170 }, { "epoch": 0.01, "learning_rate": 2.654867256637168e-07, "logits/chosen": -2.153658628463745, "logits/rejected": -1.6468169689178467, "logps/chosen": -170.3775634765625, "logps/rejected": -138.7044219970703, "loss": 0.693, "rewards/accuracies": 0.625, "rewards/chosen": 0.0008149220375344157, "rewards/margins": 0.0007589702727273107, "rewards/rejected": 5.595170659944415e-05, "step": 180 }, { "epoch": 0.01, "learning_rate": 2.8023598820059e-07, "logits/chosen": -1.968446969985962, "logits/rejected": -1.6119251251220703, "logps/chosen": -125.25663757324219, "logps/rejected": -88.45571899414062, "loss": 0.6931, "rewards/accuracies": 0.5, "rewards/chosen": 0.00012582159251905978, "rewards/margins": -2.7612759367912076e-05, "rewards/rejected": 0.00015343436098191887, "step": 190 }, { "epoch": 0.01, "learning_rate": 2.949852507374631e-07, "logits/chosen": -2.017679452896118, "logits/rejected": -1.4837175607681274, "logps/chosen": -221.7025909423828, "logps/rejected": -145.14576721191406, "loss": 0.693, "rewards/accuracies": 0.5, "rewards/chosen": 0.0011342325014993548, "rewards/margins": 0.0010430633556097746, "rewards/rejected": 9.116921864915639e-05, "step": 200 }, { "epoch": 0.02, "learning_rate": 3.0973451327433626e-07, "logits/chosen": -2.0245468616485596, "logits/rejected": -1.6133474111557007, "logps/chosen": -174.06045532226562, "logps/rejected": -144.4504852294922, "loss": 0.6931, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.001081369468010962, "rewards/margins": 0.000796747743152082, "rewards/rejected": 0.00028462172485888004, "step": 210 }, { "epoch": 0.02, "learning_rate": 3.244837758112094e-07, "logits/chosen": -1.8675012588500977, "logits/rejected": -1.4732043743133545, "logps/chosen": -187.042236328125, "logps/rejected": -125.50852966308594, "loss": 0.693, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.0013512603472918272, "rewards/margins": 0.001024872763082385, "rewards/rejected": 0.00032638749689795077, "step": 220 }, { "epoch": 0.02, "learning_rate": 3.392330383480826e-07, "logits/chosen": -2.120283603668213, "logits/rejected": -1.7119636535644531, "logps/chosen": -146.96356201171875, "logps/rejected": -109.62583923339844, "loss": 0.6929, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0011240954045206308, "rewards/margins": 0.00131790095474571, "rewards/rejected": -0.00019380563753657043, "step": 230 }, { "epoch": 0.02, "learning_rate": 3.5398230088495575e-07, "logits/chosen": -1.8967628479003906, "logits/rejected": -1.433365821838379, "logps/chosen": -118.50655364990234, "logps/rejected": -108.62889099121094, "loss": 0.6929, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.00024254964955616742, "rewards/margins": 0.0021202126517891884, "rewards/rejected": -0.002362762112170458, "step": 240 }, { "epoch": 0.02, "learning_rate": 3.6873156342182887e-07, "logits/chosen": -2.0241072177886963, "logits/rejected": -1.4865143299102783, "logps/chosen": -181.15283203125, "logps/rejected": -157.55862426757812, "loss": 0.6925, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.000754393229726702, "rewards/margins": 0.003895942820236087, "rewards/rejected": -0.004650335758924484, "step": 250 }, { "epoch": 0.02, "learning_rate": 3.834808259587021e-07, "logits/chosen": -1.946054458618164, "logits/rejected": -1.6933704614639282, "logps/chosen": -175.9441375732422, "logps/rejected": -188.55960083007812, "loss": 0.6925, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0006642128573730588, "rewards/margins": 0.004433608613908291, "rewards/rejected": -0.0037693947087973356, "step": 260 }, { "epoch": 0.02, "learning_rate": 3.982300884955752e-07, "logits/chosen": -2.0795578956604004, "logits/rejected": -1.7738806009292603, "logps/chosen": -159.45887756347656, "logps/rejected": -150.00035095214844, "loss": 0.6921, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.002614680677652359, "rewards/margins": 0.007350636180490255, "rewards/rejected": -0.009965317323803902, "step": 270 }, { "epoch": 0.02, "learning_rate": 4.1297935103244836e-07, "logits/chosen": -1.8411970138549805, "logits/rejected": -1.4505590200424194, "logps/chosen": -174.99752807617188, "logps/rejected": -162.41087341308594, "loss": 0.6923, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.007584887556731701, "rewards/margins": 0.008292051963508129, "rewards/rejected": -0.01587693765759468, "step": 280 }, { "epoch": 0.02, "learning_rate": 4.2772861356932147e-07, "logits/chosen": -1.8788261413574219, "logits/rejected": -1.5402648448944092, "logps/chosen": -179.3714141845703, "logps/rejected": -184.04669189453125, "loss": 0.6902, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.015747912228107452, "rewards/margins": 0.024221275001764297, "rewards/rejected": -0.03996918722987175, "step": 290 }, { "epoch": 0.02, "learning_rate": 4.424778761061947e-07, "logits/chosen": -1.9063736200332642, "logits/rejected": -1.285650610923767, "logps/chosen": -193.56675720214844, "logps/rejected": -177.8426513671875, "loss": 0.6899, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03917139768600464, "rewards/margins": 0.03137218952178955, "rewards/rejected": -0.07054358720779419, "step": 300 }, { "epoch": 0.02, "learning_rate": 4.5722713864306786e-07, "logits/chosen": -1.880340576171875, "logits/rejected": -1.5672200918197632, "logps/chosen": -189.8310546875, "logps/rejected": -193.968017578125, "loss": 0.6894, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.051108866930007935, "rewards/margins": 0.03160933405160904, "rewards/rejected": -0.08271819353103638, "step": 310 }, { "epoch": 0.02, "learning_rate": 4.7197640117994097e-07, "logits/chosen": -1.9510247707366943, "logits/rejected": -1.391144037246704, "logps/chosen": -256.6185607910156, "logps/rejected": -268.2026672363281, "loss": 0.6869, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.09444020688533783, "rewards/margins": 0.05461013317108154, "rewards/rejected": -0.14905035495758057, "step": 320 }, { "epoch": 0.02, "learning_rate": 4.867256637168141e-07, "logits/chosen": -1.7435442209243774, "logits/rejected": -1.4472002983093262, "logps/chosen": -269.00677490234375, "logps/rejected": -251.75534057617188, "loss": 0.6916, "rewards/accuracies": 0.5, "rewards/chosen": -0.11010807752609253, "rewards/margins": 0.017304524779319763, "rewards/rejected": -0.1274126172065735, "step": 330 }, { "epoch": 0.03, "learning_rate": 5.014749262536873e-07, "logits/chosen": -1.7450830936431885, "logits/rejected": -1.26893949508667, "logps/chosen": -339.84503173828125, "logps/rejected": -366.1653747558594, "loss": 0.6884, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.17908045649528503, "rewards/margins": 0.045984722673892975, "rewards/rejected": -0.2250651866197586, "step": 340 }, { "epoch": 0.03, "learning_rate": 5.162241887905604e-07, "logits/chosen": -1.8211805820465088, "logits/rejected": -1.1877918243408203, "logps/chosen": -351.8857727050781, "logps/rejected": -368.1713562011719, "loss": 0.6864, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.17493045330047607, "rewards/margins": 0.06421728432178497, "rewards/rejected": -0.23914775252342224, "step": 350 }, { "epoch": 0.03, "learning_rate": 5.309734513274336e-07, "logits/chosen": -1.7131288051605225, "logits/rejected": -1.1882977485656738, "logps/chosen": -376.2377624511719, "logps/rejected": -398.4125671386719, "loss": 0.6852, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2058299481868744, "rewards/margins": 0.07462694495916367, "rewards/rejected": -0.28045687079429626, "step": 360 }, { "epoch": 0.03, "learning_rate": 5.457227138643067e-07, "logits/chosen": -1.7253786325454712, "logits/rejected": -1.4442790746688843, "logps/chosen": -352.2489929199219, "logps/rejected": -425.23529052734375, "loss": 0.6853, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.19627168774604797, "rewards/margins": 0.08702980726957321, "rewards/rejected": -0.2833015024662018, "step": 370 }, { "epoch": 0.03, "learning_rate": 5.6047197640118e-07, "logits/chosen": -2.0095455646514893, "logits/rejected": -1.6208763122558594, "logps/chosen": -399.4122619628906, "logps/rejected": -447.4737243652344, "loss": 0.6856, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22773921489715576, "rewards/margins": 0.0765281617641449, "rewards/rejected": -0.30426734685897827, "step": 380 }, { "epoch": 0.03, "learning_rate": 5.752212389380531e-07, "logits/chosen": -1.9783086776733398, "logits/rejected": -1.5394196510314941, "logps/chosen": -355.3728332519531, "logps/rejected": -399.5362548828125, "loss": 0.6844, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23425595462322235, "rewards/margins": 0.08753537386655807, "rewards/rejected": -0.3217913508415222, "step": 390 }, { "epoch": 0.03, "learning_rate": 5.899705014749262e-07, "logits/chosen": -1.8582544326782227, "logits/rejected": -1.2418386936187744, "logps/chosen": -412.0120544433594, "logps/rejected": -502.809814453125, "loss": 0.6819, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.256608247756958, "rewards/margins": 0.14715977013111115, "rewards/rejected": -0.40376797318458557, "step": 400 }, { "epoch": 0.03, "learning_rate": 6.047197640117994e-07, "logits/chosen": -1.8750104904174805, "logits/rejected": -1.553784728050232, "logps/chosen": -441.94659423828125, "logps/rejected": -481.3126525878906, "loss": 0.6862, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2845803201198578, "rewards/margins": 0.0506829209625721, "rewards/rejected": -0.335263192653656, "step": 410 }, { "epoch": 0.03, "learning_rate": 6.194690265486725e-07, "logits/chosen": -1.8538204431533813, "logits/rejected": -1.2214611768722534, "logps/chosen": -402.86114501953125, "logps/rejected": -490.844482421875, "loss": 0.6777, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2291877567768097, "rewards/margins": 0.15091672539710999, "rewards/rejected": -0.38010451197624207, "step": 420 }, { "epoch": 0.03, "learning_rate": 6.342182890855456e-07, "logits/chosen": -1.681492567062378, "logits/rejected": -1.2342557907104492, "logps/chosen": -446.1883239746094, "logps/rejected": -560.6768798828125, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31171560287475586, "rewards/margins": 0.14570967853069305, "rewards/rejected": -0.4574252963066101, "step": 430 }, { "epoch": 0.03, "learning_rate": 6.489675516224188e-07, "logits/chosen": -1.799670934677124, "logits/rejected": -1.3877151012420654, "logps/chosen": -501.8890686035156, "logps/rejected": -574.2443237304688, "loss": 0.683, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3564384877681732, "rewards/margins": 0.09395261853933334, "rewards/rejected": -0.45039114356040955, "step": 440 }, { "epoch": 0.03, "learning_rate": 6.637168141592921e-07, "logits/chosen": -1.6295913457870483, "logits/rejected": -1.58306884765625, "logps/chosen": -438.3116760253906, "logps/rejected": -467.53558349609375, "loss": 0.6912, "rewards/accuracies": 0.5, "rewards/chosen": -0.3185608983039856, "rewards/margins": 0.037758756428956985, "rewards/rejected": -0.3563196361064911, "step": 450 }, { "epoch": 0.03, "learning_rate": 6.784660766961652e-07, "logits/chosen": -2.0462522506713867, "logits/rejected": -1.3093647956848145, "logps/chosen": -445.26666259765625, "logps/rejected": -491.0096130371094, "loss": 0.6833, "rewards/accuracies": 0.75, "rewards/chosen": -0.26292890310287476, "rewards/margins": 0.12137428671121597, "rewards/rejected": -0.38430315256118774, "step": 460 }, { "epoch": 0.03, "learning_rate": 6.932153392330383e-07, "logits/chosen": -1.9706470966339111, "logits/rejected": -1.6422786712646484, "logps/chosen": -399.05523681640625, "logps/rejected": -410.2801208496094, "loss": 0.6898, "rewards/accuracies": 0.625, "rewards/chosen": -0.22623968124389648, "rewards/margins": 0.061574775725603104, "rewards/rejected": -0.2878144383430481, "step": 470 }, { "epoch": 0.04, "learning_rate": 7.079646017699115e-07, "logits/chosen": -1.9191665649414062, "logits/rejected": -1.687371850013733, "logps/chosen": -402.0966796875, "logps/rejected": -427.95654296875, "loss": 0.6917, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.26655441522598267, "rewards/margins": 0.033485203981399536, "rewards/rejected": -0.3000395894050598, "step": 480 }, { "epoch": 0.04, "learning_rate": 7.227138643067846e-07, "logits/chosen": -2.002131223678589, "logits/rejected": -1.603057622909546, "logps/chosen": -370.7928466796875, "logps/rejected": -447.0213928222656, "loss": 0.6872, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22369377315044403, "rewards/margins": 0.0841987356543541, "rewards/rejected": -0.3078925311565399, "step": 490 }, { "epoch": 0.04, "learning_rate": 7.374631268436577e-07, "logits/chosen": -1.8099396228790283, "logits/rejected": -1.4797179698944092, "logps/chosen": -352.0479736328125, "logps/rejected": -427.51220703125, "loss": 0.6829, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1869697868824005, "rewards/margins": 0.11212553083896637, "rewards/rejected": -0.2990953028202057, "step": 500 }, { "epoch": 0.04, "learning_rate": 7.522123893805308e-07, "logits/chosen": -1.9323205947875977, "logits/rejected": -1.4938558340072632, "logps/chosen": -432.98907470703125, "logps/rejected": -504.1278381347656, "loss": 0.6822, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2451755255460739, "rewards/margins": 0.11448284238576889, "rewards/rejected": -0.3596583902835846, "step": 510 }, { "epoch": 0.04, "learning_rate": 7.669616519174042e-07, "logits/chosen": -1.936714768409729, "logits/rejected": -1.418513536453247, "logps/chosen": -339.52630615234375, "logps/rejected": -459.5921936035156, "loss": 0.6777, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21147163212299347, "rewards/margins": 0.1384952962398529, "rewards/rejected": -0.34996694326400757, "step": 520 }, { "epoch": 0.04, "learning_rate": 7.817109144542773e-07, "logits/chosen": -1.908663034439087, "logits/rejected": -1.7417542934417725, "logps/chosen": -480.57177734375, "logps/rejected": -493.7294921875, "loss": 0.6866, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29239171743392944, "rewards/margins": 0.06508759409189224, "rewards/rejected": -0.3574792742729187, "step": 530 }, { "epoch": 0.04, "learning_rate": 7.964601769911504e-07, "logits/chosen": -1.7825477123260498, "logits/rejected": -1.2991164922714233, "logps/chosen": -494.84918212890625, "logps/rejected": -606.0709838867188, "loss": 0.6784, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3235834538936615, "rewards/margins": 0.1603105515241623, "rewards/rejected": -0.4838939607143402, "step": 540 }, { "epoch": 0.04, "learning_rate": 8.112094395280236e-07, "logits/chosen": -1.7996162176132202, "logits/rejected": -1.3353970050811768, "logps/chosen": -538.1213989257812, "logps/rejected": -592.5304565429688, "loss": 0.6852, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37833064794540405, "rewards/margins": 0.1070266142487526, "rewards/rejected": -0.4853571951389313, "step": 550 }, { "epoch": 0.04, "learning_rate": 8.259587020648967e-07, "logits/chosen": -1.6660349369049072, "logits/rejected": -1.4794137477874756, "logps/chosen": -459.8931579589844, "logps/rejected": -521.2290649414062, "loss": 0.6844, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2817347049713135, "rewards/margins": 0.09479828178882599, "rewards/rejected": -0.37653297185897827, "step": 560 }, { "epoch": 0.04, "learning_rate": 8.407079646017698e-07, "logits/chosen": -2.0589261054992676, "logits/rejected": -1.388694405555725, "logps/chosen": -428.72650146484375, "logps/rejected": -530.9616088867188, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -0.27430424094200134, "rewards/margins": 0.13126643002033234, "rewards/rejected": -0.4055706858634949, "step": 570 }, { "epoch": 0.04, "learning_rate": 8.554572271386429e-07, "logits/chosen": -1.7137943506240845, "logits/rejected": -1.4097938537597656, "logps/chosen": -506.9815368652344, "logps/rejected": -611.9991455078125, "loss": 0.6801, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3621966540813446, "rewards/margins": 0.13946011662483215, "rewards/rejected": -0.5016567707061768, "step": 580 }, { "epoch": 0.04, "learning_rate": 8.702064896755162e-07, "logits/chosen": -1.8211002349853516, "logits/rejected": -1.403028130531311, "logps/chosen": -419.3152770996094, "logps/rejected": -513.9227294921875, "loss": 0.6829, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22699742019176483, "rewards/margins": 0.12835416197776794, "rewards/rejected": -0.35535162687301636, "step": 590 }, { "epoch": 0.04, "learning_rate": 8.849557522123894e-07, "logits/chosen": -1.938796043395996, "logits/rejected": -1.4153218269348145, "logps/chosen": -435.1478576660156, "logps/rejected": -501.764892578125, "loss": 0.6849, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2674418091773987, "rewards/margins": 0.12226283550262451, "rewards/rejected": -0.3897046148777008, "step": 600 }, { "epoch": 0.04, "learning_rate": 8.997050147492625e-07, "logits/chosen": -1.8356802463531494, "logits/rejected": -1.524155855178833, "logps/chosen": -408.39874267578125, "logps/rejected": -464.541259765625, "loss": 0.6888, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2735441327095032, "rewards/margins": 0.08742935955524445, "rewards/rejected": -0.36097344756126404, "step": 610 }, { "epoch": 0.05, "learning_rate": 9.144542772861357e-07, "logits/chosen": -1.8748424053192139, "logits/rejected": -1.291590929031372, "logps/chosen": -377.0844421386719, "logps/rejected": -446.767578125, "loss": 0.6783, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21567480266094208, "rewards/margins": 0.14130783081054688, "rewards/rejected": -0.35698264837265015, "step": 620 }, { "epoch": 0.05, "learning_rate": 9.292035398230088e-07, "logits/chosen": -1.9215377569198608, "logits/rejected": -1.661214828491211, "logps/chosen": -353.2236022949219, "logps/rejected": -398.88690185546875, "loss": 0.6865, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.18977074325084686, "rewards/margins": 0.08765360713005066, "rewards/rejected": -0.2774243652820587, "step": 630 }, { "epoch": 0.05, "learning_rate": 9.439528023598819e-07, "logits/chosen": -2.074794292449951, "logits/rejected": -1.6269819736480713, "logps/chosen": -323.84912109375, "logps/rejected": -361.5697021484375, "loss": 0.6835, "rewards/accuracies": 0.625, "rewards/chosen": -0.16048943996429443, "rewards/margins": 0.08533263206481934, "rewards/rejected": -0.24582210183143616, "step": 640 }, { "epoch": 0.05, "learning_rate": 9.587020648967552e-07, "logits/chosen": -2.1747422218322754, "logits/rejected": -1.6227006912231445, "logps/chosen": -349.0054626464844, "logps/rejected": -427.7183532714844, "loss": 0.6823, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18785065412521362, "rewards/margins": 0.09891339391469955, "rewards/rejected": -0.2867640256881714, "step": 650 }, { "epoch": 0.05, "learning_rate": 9.734513274336282e-07, "logits/chosen": -2.0509836673736572, "logits/rejected": -1.539554476737976, "logps/chosen": -305.6836853027344, "logps/rejected": -407.91326904296875, "loss": 0.682, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1707884967327118, "rewards/margins": 0.11347116529941559, "rewards/rejected": -0.2842596769332886, "step": 660 }, { "epoch": 0.05, "learning_rate": 9.882005899705014e-07, "logits/chosen": -2.0536816120147705, "logits/rejected": -1.6710259914398193, "logps/chosen": -430.0809020996094, "logps/rejected": -473.7281188964844, "loss": 0.6827, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.27972978353500366, "rewards/margins": 0.08998468518257141, "rewards/rejected": -0.3697144687175751, "step": 670 }, { "epoch": 0.05, "learning_rate": 1.0029498525073746e-06, "logits/chosen": -1.9467166662216187, "logits/rejected": -1.4054864645004272, "logps/chosen": -455.39068603515625, "logps/rejected": -548.7138671875, "loss": 0.6823, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.30838245153427124, "rewards/margins": 0.12842623889446259, "rewards/rejected": -0.43680867552757263, "step": 680 }, { "epoch": 0.05, "learning_rate": 1.0176991150442478e-06, "logits/chosen": -2.18593168258667, "logits/rejected": -1.5764405727386475, "logps/chosen": -406.9405822753906, "logps/rejected": -523.674560546875, "loss": 0.6783, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.22604572772979736, "rewards/margins": 0.16819410026073456, "rewards/rejected": -0.3942398130893707, "step": 690 }, { "epoch": 0.05, "learning_rate": 1.0324483775811208e-06, "logits/chosen": -1.9470388889312744, "logits/rejected": -1.5580854415893555, "logps/chosen": -457.55682373046875, "logps/rejected": -527.3773803710938, "loss": 0.6845, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3271501958370209, "rewards/margins": 0.0997210294008255, "rewards/rejected": -0.42687124013900757, "step": 700 }, { "epoch": 0.05, "learning_rate": 1.047197640117994e-06, "logits/chosen": -1.7886316776275635, "logits/rejected": -1.6531985998153687, "logps/chosen": -471.591064453125, "logps/rejected": -576.0791015625, "loss": 0.6837, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33999642729759216, "rewards/margins": 0.11902002990245819, "rewards/rejected": -0.45901647210121155, "step": 710 }, { "epoch": 0.05, "learning_rate": 1.0619469026548673e-06, "logits/chosen": -1.95156729221344, "logits/rejected": -1.2655599117279053, "logps/chosen": -501.2447204589844, "logps/rejected": -632.8653564453125, "loss": 0.679, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3465609550476074, "rewards/margins": 0.18725420534610748, "rewards/rejected": -0.5338150858879089, "step": 720 }, { "epoch": 0.05, "learning_rate": 1.0766961651917403e-06, "logits/chosen": -2.0530593395233154, "logits/rejected": -1.8068015575408936, "logps/chosen": -426.04949951171875, "logps/rejected": -504.77301025390625, "loss": 0.6866, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24723461270332336, "rewards/margins": 0.1086445301771164, "rewards/rejected": -0.35587912797927856, "step": 730 }, { "epoch": 0.05, "learning_rate": 1.0914454277286135e-06, "logits/chosen": -2.163651943206787, "logits/rejected": -1.8072885274887085, "logps/chosen": -369.70928955078125, "logps/rejected": -433.8926696777344, "loss": 0.6845, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.19832053780555725, "rewards/margins": 0.0880778506398201, "rewards/rejected": -0.28639841079711914, "step": 740 }, { "epoch": 0.06, "learning_rate": 1.1061946902654867e-06, "logits/chosen": -1.8631846904754639, "logits/rejected": -1.4294825792312622, "logps/chosen": -397.5494689941406, "logps/rejected": -488.4278869628906, "loss": 0.6791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1732659488916397, "rewards/margins": 0.14968810975551605, "rewards/rejected": -0.3229540288448334, "step": 750 }, { "epoch": 0.06, "learning_rate": 1.12094395280236e-06, "logits/chosen": -1.9430814981460571, "logits/rejected": -1.4920926094055176, "logps/chosen": -493.79791259765625, "logps/rejected": -579.3615112304688, "loss": 0.6821, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.328820139169693, "rewards/margins": 0.13369175791740417, "rewards/rejected": -0.4625118672847748, "step": 760 }, { "epoch": 0.06, "learning_rate": 1.135693215339233e-06, "logits/chosen": -1.9381330013275146, "logits/rejected": -1.542937994003296, "logps/chosen": -514.8754272460938, "logps/rejected": -618.3922119140625, "loss": 0.6823, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3782934248447418, "rewards/margins": 0.15745186805725098, "rewards/rejected": -0.5357452630996704, "step": 770 }, { "epoch": 0.06, "learning_rate": 1.1504424778761061e-06, "logits/chosen": -2.047244071960449, "logits/rejected": -1.431876301765442, "logps/chosen": -429.2142639160156, "logps/rejected": -509.6145935058594, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.27867770195007324, "rewards/margins": 0.13149061799049377, "rewards/rejected": -0.41016826033592224, "step": 780 }, { "epoch": 0.06, "learning_rate": 1.1651917404129794e-06, "logits/chosen": -2.1001222133636475, "logits/rejected": -1.8800718784332275, "logps/chosen": -351.47137451171875, "logps/rejected": -442.72918701171875, "loss": 0.6833, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.22439010441303253, "rewards/margins": 0.1098000556230545, "rewards/rejected": -0.33419016003608704, "step": 790 }, { "epoch": 0.06, "learning_rate": 1.1799410029498524e-06, "logits/chosen": -1.8464915752410889, "logits/rejected": -1.4037649631500244, "logps/chosen": -431.52947998046875, "logps/rejected": -476.43475341796875, "loss": 0.6853, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.27375873923301697, "rewards/margins": 0.0840066522359848, "rewards/rejected": -0.35776540637016296, "step": 800 }, { "epoch": 0.06, "learning_rate": 1.1946902654867256e-06, "logits/chosen": -2.0362536907196045, "logits/rejected": -1.766473412513733, "logps/chosen": -397.8314208984375, "logps/rejected": -490.51934814453125, "loss": 0.6851, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23283085227012634, "rewards/margins": 0.10074906051158905, "rewards/rejected": -0.3335798680782318, "step": 810 }, { "epoch": 0.06, "learning_rate": 1.2094395280235988e-06, "logits/chosen": -2.0025668144226074, "logits/rejected": -1.7058022022247314, "logps/chosen": -402.10357666015625, "logps/rejected": -466.14178466796875, "loss": 0.6825, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2635675072669983, "rewards/margins": 0.10235609114170074, "rewards/rejected": -0.36592361330986023, "step": 820 }, { "epoch": 0.06, "learning_rate": 1.224188790560472e-06, "logits/chosen": -1.8466784954071045, "logits/rejected": -1.5103213787078857, "logps/chosen": -530.7879638671875, "logps/rejected": -664.9498291015625, "loss": 0.6821, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3944378197193146, "rewards/margins": 0.10908160358667374, "rewards/rejected": -0.5035194158554077, "step": 830 }, { "epoch": 0.06, "learning_rate": 1.238938053097345e-06, "logits/chosen": -2.017857074737549, "logits/rejected": -1.5236464738845825, "logps/chosen": -605.5632934570312, "logps/rejected": -653.9263916015625, "loss": 0.6855, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4302627444267273, "rewards/margins": 0.10957585275173187, "rewards/rejected": -0.5398386120796204, "step": 840 }, { "epoch": 0.06, "learning_rate": 1.2536873156342182e-06, "logits/chosen": -2.0231575965881348, "logits/rejected": -1.601133942604065, "logps/chosen": -628.6102294921875, "logps/rejected": -704.9871826171875, "loss": 0.6865, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4486079216003418, "rewards/margins": 0.09623664617538452, "rewards/rejected": -0.5448445081710815, "step": 850 }, { "epoch": 0.06, "learning_rate": 1.2684365781710913e-06, "logits/chosen": -1.998510718345642, "logits/rejected": -1.627704381942749, "logps/chosen": -579.5115966796875, "logps/rejected": -640.9594116210938, "loss": 0.6872, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4033943712711334, "rewards/margins": 0.09413693100214005, "rewards/rejected": -0.49753132462501526, "step": 860 }, { "epoch": 0.06, "learning_rate": 1.2831858407079645e-06, "logits/chosen": -2.0589041709899902, "logits/rejected": -1.75360906124115, "logps/chosen": -527.2274780273438, "logps/rejected": -574.2579345703125, "loss": 0.6875, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3569214940071106, "rewards/margins": 0.055931419134140015, "rewards/rejected": -0.412852942943573, "step": 870 }, { "epoch": 0.06, "learning_rate": 1.2979351032448377e-06, "logits/chosen": -1.9059603214263916, "logits/rejected": -1.6478197574615479, "logps/chosen": -505.27520751953125, "logps/rejected": -593.4174194335938, "loss": 0.6845, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.363817036151886, "rewards/margins": 0.09282372891902924, "rewards/rejected": -0.45664072036743164, "step": 880 }, { "epoch": 0.07, "learning_rate": 1.312684365781711e-06, "logits/chosen": -2.1370632648468018, "logits/rejected": -1.5720163583755493, "logps/chosen": -458.6434020996094, "logps/rejected": -589.1796875, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.28893446922302246, "rewards/margins": 0.16648206114768982, "rewards/rejected": -0.4554165005683899, "step": 890 }, { "epoch": 0.07, "learning_rate": 1.3274336283185841e-06, "logits/chosen": -2.1695172786712646, "logits/rejected": -1.760046362876892, "logps/chosen": -451.30517578125, "logps/rejected": -537.4163818359375, "loss": 0.6813, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27182963490486145, "rewards/margins": 0.11412505805492401, "rewards/rejected": -0.38595470786094666, "step": 900 }, { "epoch": 0.07, "learning_rate": 1.3421828908554571e-06, "logits/chosen": -1.9909499883651733, "logits/rejected": -1.4555021524429321, "logps/chosen": -478.3407287597656, "logps/rejected": -610.8736572265625, "loss": 0.678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32013702392578125, "rewards/margins": 0.16912724077701569, "rewards/rejected": -0.4892643094062805, "step": 910 }, { "epoch": 0.07, "learning_rate": 1.3569321533923304e-06, "logits/chosen": -2.095519542694092, "logits/rejected": -1.5021215677261353, "logps/chosen": -559.5098266601562, "logps/rejected": -661.9122314453125, "loss": 0.6818, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3872610926628113, "rewards/margins": 0.13750417530536652, "rewards/rejected": -0.5247652530670166, "step": 920 }, { "epoch": 0.07, "learning_rate": 1.3716814159292034e-06, "logits/chosen": -2.0601422786712646, "logits/rejected": -1.6056276559829712, "logps/chosen": -518.3754272460938, "logps/rejected": -585.2120361328125, "loss": 0.6843, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35217779874801636, "rewards/margins": 0.10391063988208771, "rewards/rejected": -0.4560883939266205, "step": 930 }, { "epoch": 0.07, "learning_rate": 1.3864306784660766e-06, "logits/chosen": -1.9960517883300781, "logits/rejected": -1.5679510831832886, "logps/chosen": -422.38421630859375, "logps/rejected": -517.7061157226562, "loss": 0.6825, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24933883547782898, "rewards/margins": 0.12334072589874268, "rewards/rejected": -0.37267953157424927, "step": 940 }, { "epoch": 0.07, "learning_rate": 1.4011799410029498e-06, "logits/chosen": -2.2044031620025635, "logits/rejected": -1.4114129543304443, "logps/chosen": -490.9397888183594, "logps/rejected": -625.98193359375, "loss": 0.6823, "rewards/accuracies": 0.75, "rewards/chosen": -0.32792535424232483, "rewards/margins": 0.1850518137216568, "rewards/rejected": -0.512977123260498, "step": 950 }, { "epoch": 0.07, "learning_rate": 1.415929203539823e-06, "logits/chosen": -2.088587522506714, "logits/rejected": -1.5615050792694092, "logps/chosen": -431.64874267578125, "logps/rejected": -586.7955932617188, "loss": 0.6792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2757507860660553, "rewards/margins": 0.18378469347953796, "rewards/rejected": -0.4595354497432709, "step": 960 }, { "epoch": 0.07, "learning_rate": 1.4306784660766962e-06, "logits/chosen": -2.3048384189605713, "logits/rejected": -1.8547544479370117, "logps/chosen": -379.9644775390625, "logps/rejected": -442.5160217285156, "loss": 0.6825, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.20097026228904724, "rewards/margins": 0.10036973655223846, "rewards/rejected": -0.3013400435447693, "step": 970 }, { "epoch": 0.07, "learning_rate": 1.4454277286135692e-06, "logits/chosen": -2.076782464981079, "logits/rejected": -1.7366771697998047, "logps/chosen": -347.0558776855469, "logps/rejected": -385.65081787109375, "loss": 0.6863, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.16413168609142303, "rewards/margins": 0.09623827785253525, "rewards/rejected": -0.2603699564933777, "step": 980 }, { "epoch": 0.07, "learning_rate": 1.4601769911504425e-06, "logits/chosen": -2.223386526107788, "logits/rejected": -1.7704728841781616, "logps/chosen": -342.82135009765625, "logps/rejected": -420.33184814453125, "loss": 0.6839, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.19803206622600555, "rewards/margins": 0.1105206161737442, "rewards/rejected": -0.30855265259742737, "step": 990 }, { "epoch": 0.07, "learning_rate": 1.4749262536873155e-06, "logits/chosen": -2.352505683898926, "logits/rejected": -1.7893301248550415, "logps/chosen": -321.72674560546875, "logps/rejected": -470.55059814453125, "loss": 0.6786, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.17554494738578796, "rewards/margins": 0.19477452337741852, "rewards/rejected": -0.37031951546669006, "step": 1000 }, { "epoch": 0.07, "learning_rate": 1.4896755162241887e-06, "logits/chosen": -1.9986333847045898, "logits/rejected": -1.5083431005477905, "logps/chosen": -409.71478271484375, "logps/rejected": -539.4691162109375, "loss": 0.6805, "rewards/accuracies": 0.75, "rewards/chosen": -0.27818092703819275, "rewards/margins": 0.14660140872001648, "rewards/rejected": -0.42478233575820923, "step": 1010 }, { "epoch": 0.08, "learning_rate": 1.5044247787610617e-06, "logits/chosen": -2.0591859817504883, "logits/rejected": -1.4957928657531738, "logps/chosen": -592.274169921875, "logps/rejected": -701.61865234375, "loss": 0.6836, "rewards/accuracies": 0.75, "rewards/chosen": -0.3902294933795929, "rewards/margins": 0.15593229234218597, "rewards/rejected": -0.5461617708206177, "step": 1020 }, { "epoch": 0.08, "learning_rate": 1.5191740412979351e-06, "logits/chosen": -1.979495644569397, "logits/rejected": -1.7574342489242554, "logps/chosen": -464.3360290527344, "logps/rejected": -573.986083984375, "loss": 0.6823, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3196040689945221, "rewards/margins": 0.12114562839269638, "rewards/rejected": -0.44074973464012146, "step": 1030 }, { "epoch": 0.08, "learning_rate": 1.5339233038348083e-06, "logits/chosen": -1.9681415557861328, "logits/rejected": -1.350480079650879, "logps/chosen": -463.69622802734375, "logps/rejected": -601.4488525390625, "loss": 0.6824, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31228235363960266, "rewards/margins": 0.1724194586277008, "rewards/rejected": -0.48470181226730347, "step": 1040 }, { "epoch": 0.08, "learning_rate": 1.5486725663716813e-06, "logits/chosen": -2.2591731548309326, "logits/rejected": -1.7553138732910156, "logps/chosen": -392.440185546875, "logps/rejected": -486.1963806152344, "loss": 0.681, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23498043417930603, "rewards/margins": 0.13875766098499298, "rewards/rejected": -0.3737381100654602, "step": 1050 }, { "epoch": 0.08, "learning_rate": 1.5634218289085546e-06, "logits/chosen": -2.031299352645874, "logits/rejected": -1.3939927816390991, "logps/chosen": -435.9671936035156, "logps/rejected": -533.9425048828125, "loss": 0.6819, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22676675021648407, "rewards/margins": 0.18411658704280853, "rewards/rejected": -0.4108833372592926, "step": 1060 }, { "epoch": 0.08, "learning_rate": 1.5781710914454276e-06, "logits/chosen": -1.8945518732070923, "logits/rejected": -1.4091362953186035, "logps/chosen": -539.440673828125, "logps/rejected": -612.0310668945312, "loss": 0.6867, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.395731657743454, "rewards/margins": 0.12188084423542023, "rewards/rejected": -0.5176124572753906, "step": 1070 }, { "epoch": 0.08, "learning_rate": 1.5929203539823008e-06, "logits/chosen": -2.253856658935547, "logits/rejected": -1.6569303274154663, "logps/chosen": -394.5130920410156, "logps/rejected": -530.5807495117188, "loss": 0.6794, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22903656959533691, "rewards/margins": 0.17829763889312744, "rewards/rejected": -0.40733417868614197, "step": 1080 }, { "epoch": 0.08, "learning_rate": 1.6076696165191738e-06, "logits/chosen": -2.1263439655303955, "logits/rejected": -1.598833441734314, "logps/chosen": -667.5548095703125, "logps/rejected": -696.6605834960938, "loss": 0.6879, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4709213674068451, "rewards/margins": 0.12012644112110138, "rewards/rejected": -0.5910478830337524, "step": 1090 }, { "epoch": 0.08, "learning_rate": 1.6224188790560472e-06, "logits/chosen": -2.1289896965026855, "logits/rejected": -1.3893194198608398, "logps/chosen": -594.585205078125, "logps/rejected": -716.3053588867188, "loss": 0.6787, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3927874267101288, "rewards/margins": 0.19674177467823029, "rewards/rejected": -0.5895292162895203, "step": 1100 }, { "epoch": 0.08, "learning_rate": 1.6371681415929204e-06, "logits/chosen": -1.8561840057373047, "logits/rejected": -1.3663225173950195, "logps/chosen": -669.5776977539062, "logps/rejected": -752.024658203125, "loss": 0.6889, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.46565762162208557, "rewards/margins": 0.11932416260242462, "rewards/rejected": -0.5849817395210266, "step": 1110 }, { "epoch": 0.08, "learning_rate": 1.6519174041297934e-06, "logits/chosen": -2.0483651161193848, "logits/rejected": -1.426218032836914, "logps/chosen": -516.1016845703125, "logps/rejected": -638.51171875, "loss": 0.6787, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3440650403499603, "rewards/margins": 0.17543914914131165, "rewards/rejected": -0.5195042490959167, "step": 1120 }, { "epoch": 0.08, "learning_rate": 1.6666666666666667e-06, "logits/chosen": -2.0667457580566406, "logits/rejected": -1.5793195962905884, "logps/chosen": -539.2823486328125, "logps/rejected": -640.5115966796875, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37464168667793274, "rewards/margins": 0.14029303193092346, "rewards/rejected": -0.5149346590042114, "step": 1130 }, { "epoch": 0.08, "learning_rate": 1.6814159292035397e-06, "logits/chosen": -2.0009560585021973, "logits/rejected": -1.537710428237915, "logps/chosen": -436.174072265625, "logps/rejected": -524.16162109375, "loss": 0.6824, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2712467312812805, "rewards/margins": 0.1515611708164215, "rewards/rejected": -0.42280787229537964, "step": 1140 }, { "epoch": 0.08, "learning_rate": 1.6961651917404129e-06, "logits/chosen": -2.1933929920196533, "logits/rejected": -1.6670795679092407, "logps/chosen": -348.17242431640625, "logps/rejected": -427.4918518066406, "loss": 0.6815, "rewards/accuracies": 0.75, "rewards/chosen": -0.18797625601291656, "rewards/margins": 0.14571046829223633, "rewards/rejected": -0.3336867094039917, "step": 1150 }, { "epoch": 0.09, "learning_rate": 1.7109144542772859e-06, "logits/chosen": -1.9461838006973267, "logits/rejected": -1.5537418127059937, "logps/chosen": -355.2071838378906, "logps/rejected": -443.58001708984375, "loss": 0.6819, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20610089600086212, "rewards/margins": 0.11573970317840576, "rewards/rejected": -0.3218405842781067, "step": 1160 }, { "epoch": 0.09, "learning_rate": 1.7256637168141593e-06, "logits/chosen": -2.252216339111328, "logits/rejected": -1.6497509479522705, "logps/chosen": -494.3583984375, "logps/rejected": -589.7620849609375, "loss": 0.6814, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29563161730766296, "rewards/margins": 0.1543828547000885, "rewards/rejected": -0.4500144422054291, "step": 1170 }, { "epoch": 0.09, "learning_rate": 1.7404129793510323e-06, "logits/chosen": -2.1780076026916504, "logits/rejected": -1.790736436843872, "logps/chosen": -514.9185180664062, "logps/rejected": -605.7840576171875, "loss": 0.6829, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.32291343808174133, "rewards/margins": 0.11645036935806274, "rewards/rejected": -0.4393637776374817, "step": 1180 }, { "epoch": 0.09, "learning_rate": 1.7551622418879055e-06, "logits/chosen": -2.0907061100006104, "logits/rejected": -1.4407957792282104, "logps/chosen": -466.4165954589844, "logps/rejected": -641.7086791992188, "loss": 0.6802, "rewards/accuracies": 0.875, "rewards/chosen": -0.28980547189712524, "rewards/margins": 0.19592610001564026, "rewards/rejected": -0.4857315421104431, "step": 1190 }, { "epoch": 0.09, "learning_rate": 1.7699115044247788e-06, "logits/chosen": -2.275804281234741, "logits/rejected": -1.7585642337799072, "logps/chosen": -346.27783203125, "logps/rejected": -509.17401123046875, "loss": 0.6761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.16943533718585968, "rewards/margins": 0.1816909909248352, "rewards/rejected": -0.3511263430118561, "step": 1200 }, { "epoch": 0.09, "learning_rate": 1.7846607669616518e-06, "logits/chosen": -2.1075799465179443, "logits/rejected": -1.5012038946151733, "logps/chosen": -297.3506164550781, "logps/rejected": -406.990478515625, "loss": 0.6838, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10909414291381836, "rewards/margins": 0.17410647869110107, "rewards/rejected": -0.28320062160491943, "step": 1210 }, { "epoch": 0.09, "learning_rate": 1.799410029498525e-06, "logits/chosen": -2.1270291805267334, "logits/rejected": -1.643423318862915, "logps/chosen": -387.9724426269531, "logps/rejected": -435.3370056152344, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1793821156024933, "rewards/margins": 0.11739878356456757, "rewards/rejected": -0.29678088426589966, "step": 1220 }, { "epoch": 0.09, "learning_rate": 1.814159292035398e-06, "logits/chosen": -2.2574245929718018, "logits/rejected": -1.7427527904510498, "logps/chosen": -454.8583984375, "logps/rejected": -611.4116821289062, "loss": 0.6797, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.255622923374176, "rewards/margins": 0.1822855919599533, "rewards/rejected": -0.43790850043296814, "step": 1230 }, { "epoch": 0.09, "learning_rate": 1.8289085545722714e-06, "logits/chosen": -2.194693088531494, "logits/rejected": -1.7533257007598877, "logps/chosen": -645.2263793945312, "logps/rejected": -723.169921875, "loss": 0.6872, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4186306595802307, "rewards/margins": 0.10327009111642838, "rewards/rejected": -0.5219007730484009, "step": 1240 }, { "epoch": 0.09, "learning_rate": 1.8436578171091444e-06, "logits/chosen": -1.981554388999939, "logits/rejected": -1.625763177871704, "logps/chosen": -474.239501953125, "logps/rejected": -576.1403198242188, "loss": 0.6827, "rewards/accuracies": 0.625, "rewards/chosen": -0.3345000743865967, "rewards/margins": 0.12988083064556122, "rewards/rejected": -0.4643809199333191, "step": 1250 }, { "epoch": 0.09, "learning_rate": 1.8584070796460177e-06, "logits/chosen": -1.932936429977417, "logits/rejected": -1.690974473953247, "logps/chosen": -493.59613037109375, "logps/rejected": -580.0507202148438, "loss": 0.6854, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.36805400252342224, "rewards/margins": 0.09922134131193161, "rewards/rejected": -0.46727538108825684, "step": 1260 }, { "epoch": 0.09, "learning_rate": 1.8731563421828909e-06, "logits/chosen": -1.9871766567230225, "logits/rejected": -1.3164575099945068, "logps/chosen": -444.3135681152344, "logps/rejected": -581.8632202148438, "loss": 0.679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2506588101387024, "rewards/margins": 0.16451381146907806, "rewards/rejected": -0.41517263650894165, "step": 1270 }, { "epoch": 0.09, "learning_rate": 1.8879056047197639e-06, "logits/chosen": -1.9025882482528687, "logits/rejected": -1.4781690835952759, "logps/chosen": -433.8937072753906, "logps/rejected": -526.9969482421875, "loss": 0.6856, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26545897126197815, "rewards/margins": 0.12487940490245819, "rewards/rejected": -0.39033836126327515, "step": 1280 }, { "epoch": 0.1, "learning_rate": 1.902654867256637e-06, "logits/chosen": -2.282041072845459, "logits/rejected": -1.714155912399292, "logps/chosen": -430.82379150390625, "logps/rejected": -596.111572265625, "loss": 0.6775, "rewards/accuracies": 0.875, "rewards/chosen": -0.27087658643722534, "rewards/margins": 0.19729985296726227, "rewards/rejected": -0.4681764245033264, "step": 1290 }, { "epoch": 0.1, "learning_rate": 1.9174041297935103e-06, "logits/chosen": -1.8444983959197998, "logits/rejected": -1.3272278308868408, "logps/chosen": -561.034423828125, "logps/rejected": -709.1934204101562, "loss": 0.6768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4263208508491516, "rewards/margins": 0.17058885097503662, "rewards/rejected": -0.5969096422195435, "step": 1300 }, { "epoch": 0.1, "learning_rate": 1.9321533923303833e-06, "logits/chosen": -2.2317490577697754, "logits/rejected": -1.9343578815460205, "logps/chosen": -516.4401245117188, "logps/rejected": -600.7124633789062, "loss": 0.6822, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3139973282814026, "rewards/margins": 0.1263313740491867, "rewards/rejected": -0.4403286576271057, "step": 1310 }, { "epoch": 0.1, "learning_rate": 1.9469026548672563e-06, "logits/chosen": -1.789336919784546, "logits/rejected": -1.358984351158142, "logps/chosen": -596.3206176757812, "logps/rejected": -721.9378662109375, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46541228890419006, "rewards/margins": 0.1389506608247757, "rewards/rejected": -0.604362964630127, "step": 1320 }, { "epoch": 0.1, "learning_rate": 1.9616519174041298e-06, "logits/chosen": -2.2022478580474854, "logits/rejected": -1.5103787183761597, "logps/chosen": -500.68731689453125, "logps/rejected": -660.9244995117188, "loss": 0.676, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36251771450042725, "rewards/margins": 0.19239473342895508, "rewards/rejected": -0.5549124479293823, "step": 1330 }, { "epoch": 0.1, "learning_rate": 1.9764011799410028e-06, "logits/chosen": -2.214374303817749, "logits/rejected": -1.6918233633041382, "logps/chosen": -496.39324951171875, "logps/rejected": -643.2579956054688, "loss": 0.6792, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3670996427536011, "rewards/margins": 0.15187014639377594, "rewards/rejected": -0.5189697742462158, "step": 1340 }, { "epoch": 0.1, "learning_rate": 1.991150442477876e-06, "logits/chosen": -1.8122888803482056, "logits/rejected": -1.3947474956512451, "logps/chosen": -694.30712890625, "logps/rejected": -767.5508422851562, "loss": 0.6846, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5380566716194153, "rewards/margins": 0.11284542083740234, "rewards/rejected": -0.6509021520614624, "step": 1350 }, { "epoch": 0.1, "learning_rate": 1.9999994696057103e-06, "logits/chosen": -1.9325687885284424, "logits/rejected": -1.3985236883163452, "logps/chosen": -703.7893676757812, "logps/rejected": -823.6638793945312, "loss": 0.6821, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5505725145339966, "rewards/margins": 0.1536271870136261, "rewards/rejected": -0.7041997313499451, "step": 1360 }, { "epoch": 0.1, "learning_rate": 1.999993502676413e-06, "logits/chosen": -1.8051211833953857, "logits/rejected": -1.4404356479644775, "logps/chosen": -608.4217529296875, "logps/rejected": -709.807373046875, "loss": 0.6859, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4883767068386078, "rewards/margins": 0.09106218814849854, "rewards/rejected": -0.5794388651847839, "step": 1370 }, { "epoch": 0.1, "learning_rate": 1.999980905864648e-06, "logits/chosen": -2.163144826889038, "logits/rejected": -1.6438839435577393, "logps/chosen": -533.7173461914062, "logps/rejected": -607.3411865234375, "loss": 0.6807, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3377875089645386, "rewards/margins": 0.15285398066043854, "rewards/rejected": -0.4906415045261383, "step": 1380 }, { "epoch": 0.1, "learning_rate": 1.999961679253931e-06, "logits/chosen": -2.1403937339782715, "logits/rejected": -1.7530654668807983, "logps/chosen": -519.6219482421875, "logps/rejected": -657.9866943359375, "loss": 0.6797, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34226375818252563, "rewards/margins": 0.12511073052883148, "rewards/rejected": -0.4673744738101959, "step": 1390 }, { "epoch": 0.1, "learning_rate": 1.999935822971734e-06, "logits/chosen": -1.9589500427246094, "logits/rejected": -1.63176691532135, "logps/chosen": -551.3702392578125, "logps/rejected": -648.7730102539062, "loss": 0.6853, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.44185614585876465, "rewards/margins": 0.10365728288888931, "rewards/rejected": -0.5455134510993958, "step": 1400 }, { "epoch": 0.1, "learning_rate": 1.9999033371894815e-06, "logits/chosen": -2.215045690536499, "logits/rejected": -1.4908822774887085, "logps/chosen": -535.8220825195312, "logps/rejected": -718.9690551757812, "loss": 0.6818, "rewards/accuracies": 0.75, "rewards/chosen": -0.3644847273826599, "rewards/margins": 0.21920037269592285, "rewards/rejected": -0.583685040473938, "step": 1410 }, { "epoch": 0.1, "learning_rate": 1.9998642221225523e-06, "logits/chosen": -2.1449320316314697, "logits/rejected": -1.481069803237915, "logps/chosen": -448.8369140625, "logps/rejected": -547.2723388671875, "loss": 0.6775, "rewards/accuracies": 0.75, "rewards/chosen": -0.30630773305892944, "rewards/margins": 0.15731574594974518, "rewards/rejected": -0.4636234641075134, "step": 1420 }, { "epoch": 0.11, "learning_rate": 1.9998184780302757e-06, "logits/chosen": -2.166686534881592, "logits/rejected": -1.7364368438720703, "logps/chosen": -405.36309814453125, "logps/rejected": -519.4251098632812, "loss": 0.6785, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2770271301269531, "rewards/margins": 0.13991717994213104, "rewards/rejected": -0.4169442653656006, "step": 1430 }, { "epoch": 0.11, "learning_rate": 1.999766105215932e-06, "logits/chosen": -2.1755428314208984, "logits/rejected": -1.6912771463394165, "logps/chosen": -318.1728210449219, "logps/rejected": -395.6455993652344, "loss": 0.6791, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.16724170744419098, "rewards/margins": 0.14642342925071716, "rewards/rejected": -0.31366512179374695, "step": 1440 }, { "epoch": 0.11, "learning_rate": 1.9997071040267493e-06, "logits/chosen": -2.2681000232696533, "logits/rejected": -1.6288955211639404, "logps/chosen": -430.5436096191406, "logps/rejected": -497.3349609375, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.20805959403514862, "rewards/margins": 0.1304154098033905, "rewards/rejected": -0.3384750485420227, "step": 1450 }, { "epoch": 0.11, "learning_rate": 1.9996414748539005e-06, "logits/chosen": -2.1371142864227295, "logits/rejected": -1.6711323261260986, "logps/chosen": -509.31097412109375, "logps/rejected": -652.7835083007812, "loss": 0.6819, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4125543236732483, "rewards/margins": 0.14607742428779602, "rewards/rejected": -0.5586317777633667, "step": 1460 }, { "epoch": 0.11, "learning_rate": 1.999569218132503e-06, "logits/chosen": -1.9511295557022095, "logits/rejected": -1.4729387760162354, "logps/chosen": -632.75537109375, "logps/rejected": -757.8477783203125, "loss": 0.6798, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4689842164516449, "rewards/margins": 0.16114775836467743, "rewards/rejected": -0.6301319599151611, "step": 1470 }, { "epoch": 0.11, "learning_rate": 1.999490334341612e-06, "logits/chosen": -2.0273563861846924, "logits/rejected": -1.4449615478515625, "logps/chosen": -583.7864379882812, "logps/rejected": -733.46240234375, "loss": 0.6784, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43196600675582886, "rewards/margins": 0.19194307923316956, "rewards/rejected": -0.6239091157913208, "step": 1480 }, { "epoch": 0.11, "learning_rate": 1.9994048240042233e-06, "logits/chosen": -2.2775654792785645, "logits/rejected": -1.7384496927261353, "logps/chosen": -412.26251220703125, "logps/rejected": -515.9073486328125, "loss": 0.6869, "rewards/accuracies": 0.625, "rewards/chosen": -0.27084943652153015, "rewards/margins": 0.1301964819431305, "rewards/rejected": -0.40104585886001587, "step": 1490 }, { "epoch": 0.11, "learning_rate": 1.9993126876872622e-06, "logits/chosen": -2.173295021057129, "logits/rejected": -1.6451059579849243, "logps/chosen": -412.520263671875, "logps/rejected": -487.0702209472656, "loss": 0.6826, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20431682467460632, "rewards/margins": 0.13868796825408936, "rewards/rejected": -0.3430047631263733, "step": 1500 }, { "epoch": 0.11, "learning_rate": 1.9992139260015867e-06, "logits/chosen": -1.971475601196289, "logits/rejected": -1.8406391143798828, "logps/chosen": -403.51373291015625, "logps/rejected": -487.52435302734375, "loss": 0.6872, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2509104609489441, "rewards/margins": 0.06281408667564392, "rewards/rejected": -0.3137245178222656, "step": 1510 }, { "epoch": 0.11, "learning_rate": 1.9991085396019785e-06, "logits/chosen": -1.9074268341064453, "logits/rejected": -1.6915900707244873, "logps/chosen": -546.7852783203125, "logps/rejected": -630.8258056640625, "loss": 0.6872, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3934883773326874, "rewards/margins": 0.08878784626722336, "rewards/rejected": -0.4822762608528137, "step": 1520 }, { "epoch": 0.11, "learning_rate": 1.9989965291871425e-06, "logits/chosen": -2.318209171295166, "logits/rejected": -1.7701486349105835, "logps/chosen": -446.51031494140625, "logps/rejected": -580.0472412109375, "loss": 0.6817, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3165345788002014, "rewards/margins": 0.14032648503780365, "rewards/rejected": -0.4568610191345215, "step": 1530 }, { "epoch": 0.11, "learning_rate": 1.998877895499699e-06, "logits/chosen": -1.9554054737091064, "logits/rejected": -1.6552547216415405, "logps/chosen": -633.8460693359375, "logps/rejected": -723.4652099609375, "loss": 0.684, "rewards/accuracies": 0.625, "rewards/chosen": -0.475745290517807, "rewards/margins": 0.11988280713558197, "rewards/rejected": -0.5956281423568726, "step": 1540 }, { "epoch": 0.11, "learning_rate": 1.9987526393261806e-06, "logits/chosen": -2.227120876312256, "logits/rejected": -1.2914965152740479, "logps/chosen": -646.6467895507812, "logps/rejected": -782.1441650390625, "loss": 0.6773, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.45034775137901306, "rewards/margins": 0.22521519660949707, "rewards/rejected": -0.6755629777908325, "step": 1550 }, { "epoch": 0.12, "learning_rate": 1.9986207614970263e-06, "logits/chosen": -2.1799514293670654, "logits/rejected": -1.455152988433838, "logps/chosen": -554.6602172851562, "logps/rejected": -653.5736083984375, "loss": 0.6809, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3758121132850647, "rewards/margins": 0.16006751358509064, "rewards/rejected": -0.5358796715736389, "step": 1560 }, { "epoch": 0.12, "learning_rate": 1.998482262886576e-06, "logits/chosen": -2.0476324558258057, "logits/rejected": -1.80020272731781, "logps/chosen": -509.1025390625, "logps/rejected": -597.7630615234375, "loss": 0.6814, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3788357079029083, "rewards/margins": 0.0950925201177597, "rewards/rejected": -0.4739282727241516, "step": 1570 }, { "epoch": 0.12, "learning_rate": 1.998337144413066e-06, "logits/chosen": -2.563739061355591, "logits/rejected": -2.1014506816864014, "logps/chosen": -460.3665466308594, "logps/rejected": -547.9821166992188, "loss": 0.681, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2672463357448578, "rewards/margins": 0.13321053981781006, "rewards/rejected": -0.40045690536499023, "step": 1580 }, { "epoch": 0.12, "learning_rate": 1.99818540703862e-06, "logits/chosen": -2.1856913566589355, "logits/rejected": -1.879394769668579, "logps/chosen": -490.0625915527344, "logps/rejected": -637.53662109375, "loss": 0.6839, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.319746196269989, "rewards/margins": 0.16988806426525116, "rewards/rejected": -0.48963427543640137, "step": 1590 }, { "epoch": 0.12, "learning_rate": 1.9980270517692463e-06, "logits/chosen": -2.1974167823791504, "logits/rejected": -1.8297550678253174, "logps/chosen": -392.4290771484375, "logps/rejected": -544.1526489257812, "loss": 0.6799, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2363094836473465, "rewards/margins": 0.18921510875225067, "rewards/rejected": -0.42552462220191956, "step": 1600 }, { "epoch": 0.12, "learning_rate": 1.997862079654828e-06, "logits/chosen": -2.1048672199249268, "logits/rejected": -1.4488255977630615, "logps/chosen": -385.8687744140625, "logps/rejected": -527.2644653320312, "loss": 0.6814, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2404567301273346, "rewards/margins": 0.16764281690120697, "rewards/rejected": -0.40809956192970276, "step": 1610 }, { "epoch": 0.12, "learning_rate": 1.997690491789118e-06, "logits/chosen": -2.2880477905273438, "logits/rejected": -1.533829689025879, "logps/chosen": -356.5924377441406, "logps/rejected": -487.3312072753906, "loss": 0.6778, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21497996151447296, "rewards/margins": 0.19192147254943848, "rewards/rejected": -0.40690141916275024, "step": 1620 }, { "epoch": 0.12, "learning_rate": 1.997512289309732e-06, "logits/chosen": -2.21211838722229, "logits/rejected": -1.6809186935424805, "logps/chosen": -437.10589599609375, "logps/rejected": -568.8510131835938, "loss": 0.6773, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2840419411659241, "rewards/margins": 0.1726645529270172, "rewards/rejected": -0.4567064642906189, "step": 1630 }, { "epoch": 0.12, "learning_rate": 1.9973274733981376e-06, "logits/chosen": -2.021399974822998, "logits/rejected": -1.578420877456665, "logps/chosen": -359.6808166503906, "logps/rejected": -497.19671630859375, "loss": 0.6843, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2049027383327484, "rewards/margins": 0.16140273213386536, "rewards/rejected": -0.3663054406642914, "step": 1640 }, { "epoch": 0.12, "learning_rate": 1.997136045279652e-06, "logits/chosen": -2.1971583366394043, "logits/rejected": -1.3694688081741333, "logps/chosen": -378.8840026855469, "logps/rejected": -468.3028869628906, "loss": 0.6784, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.200626403093338, "rewards/margins": 0.1378859430551529, "rewards/rejected": -0.3385123312473297, "step": 1650 }, { "epoch": 0.12, "learning_rate": 1.9969380062234286e-06, "logits/chosen": -2.414825439453125, "logits/rejected": -1.9209610223770142, "logps/chosen": -354.9041442871094, "logps/rejected": -477.86810302734375, "loss": 0.6827, "rewards/accuracies": 0.75, "rewards/chosen": -0.19583168625831604, "rewards/margins": 0.12506791949272156, "rewards/rejected": -0.3208996057510376, "step": 1660 }, { "epoch": 0.12, "learning_rate": 1.9967333575424514e-06, "logits/chosen": -2.3396060466766357, "logits/rejected": -1.7946226596832275, "logps/chosen": -410.3387756347656, "logps/rejected": -455.72576904296875, "loss": 0.6846, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2290501594543457, "rewards/margins": 0.10495376586914062, "rewards/rejected": -0.3340039551258087, "step": 1670 }, { "epoch": 0.12, "learning_rate": 1.996522100593526e-06, "logits/chosen": -2.0134437084198, "logits/rejected": -1.5269577503204346, "logps/chosen": -372.8021545410156, "logps/rejected": -483.17193603515625, "loss": 0.676, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2021164447069168, "rewards/margins": 0.14655785262584686, "rewards/rejected": -0.34867429733276367, "step": 1680 }, { "epoch": 0.12, "learning_rate": 1.996304236777271e-06, "logits/chosen": -2.1244611740112305, "logits/rejected": -1.8585565090179443, "logps/chosen": -331.31695556640625, "logps/rejected": -511.27813720703125, "loss": 0.6762, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18127967417240143, "rewards/margins": 0.15818683803081512, "rewards/rejected": -0.33946651220321655, "step": 1690 }, { "epoch": 0.13, "learning_rate": 1.9960797675381063e-06, "logits/chosen": -1.9992763996124268, "logits/rejected": -1.549080729484558, "logps/chosen": -424.00653076171875, "logps/rejected": -504.54058837890625, "loss": 0.6821, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2610388994216919, "rewards/margins": 0.13908721506595612, "rewards/rejected": -0.40012606978416443, "step": 1700 }, { "epoch": 0.13, "learning_rate": 1.995848694364247e-06, "logits/chosen": -2.1716229915618896, "logits/rejected": -1.561492681503296, "logps/chosen": -404.39385986328125, "logps/rejected": -504.2957458496094, "loss": 0.6852, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27615439891815186, "rewards/margins": 0.13192516565322876, "rewards/rejected": -0.4080795347690582, "step": 1710 }, { "epoch": 0.13, "learning_rate": 1.9956110187876906e-06, "logits/chosen": -1.8921140432357788, "logits/rejected": -1.7192795276641846, "logps/chosen": -407.2046813964844, "logps/rejected": -506.37921142578125, "loss": 0.684, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.29606521129608154, "rewards/margins": 0.0948551595211029, "rewards/rejected": -0.39092037081718445, "step": 1720 }, { "epoch": 0.13, "learning_rate": 1.995366742384209e-06, "logits/chosen": -1.955749750137329, "logits/rejected": -1.7744699716567993, "logps/chosen": -520.3878173828125, "logps/rejected": -679.7459716796875, "loss": 0.6821, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35224515199661255, "rewards/margins": 0.13809241354465485, "rewards/rejected": -0.4903375506401062, "step": 1730 }, { "epoch": 0.13, "learning_rate": 1.9951158667733355e-06, "logits/chosen": -1.8754088878631592, "logits/rejected": -1.5112924575805664, "logps/chosen": -529.1380004882812, "logps/rejected": -714.394775390625, "loss": 0.6805, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3480129539966583, "rewards/margins": 0.1700725257396698, "rewards/rejected": -0.5180854201316833, "step": 1740 }, { "epoch": 0.13, "learning_rate": 1.9948583936183577e-06, "logits/chosen": -2.362107038497925, "logits/rejected": -1.6589584350585938, "logps/chosen": -426.4891662597656, "logps/rejected": -487.17205810546875, "loss": 0.6848, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22277331352233887, "rewards/margins": 0.12259776890277863, "rewards/rejected": -0.3453710675239563, "step": 1750 }, { "epoch": 0.13, "learning_rate": 1.9945943246263035e-06, "logits/chosen": -2.134082078933716, "logits/rejected": -1.558114767074585, "logps/chosen": -436.6587829589844, "logps/rejected": -546.7854614257812, "loss": 0.6804, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27406829595565796, "rewards/margins": 0.16911070048809052, "rewards/rejected": -0.44317904114723206, "step": 1760 }, { "epoch": 0.13, "learning_rate": 1.99432366154793e-06, "logits/chosen": -2.0211758613586426, "logits/rejected": -1.394614577293396, "logps/chosen": -403.2091064453125, "logps/rejected": -604.1204833984375, "loss": 0.6704, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.26712530851364136, "rewards/margins": 0.23116913437843323, "rewards/rejected": -0.49829450249671936, "step": 1770 }, { "epoch": 0.13, "learning_rate": 1.9940464061777133e-06, "logits/chosen": -2.127425193786621, "logits/rejected": -1.556174397468567, "logps/chosen": -439.883544921875, "logps/rejected": -613.7667236328125, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2673827111721039, "rewards/margins": 0.2295324057340622, "rewards/rejected": -0.49691519141197205, "step": 1780 }, { "epoch": 0.13, "learning_rate": 1.9937625603538366e-06, "logits/chosen": -2.1981849670410156, "logits/rejected": -1.7439749240875244, "logps/chosen": -516.1121826171875, "logps/rejected": -600.1390380859375, "loss": 0.6766, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32249101996421814, "rewards/margins": 0.14593812823295593, "rewards/rejected": -0.46842917799949646, "step": 1790 }, { "epoch": 0.13, "learning_rate": 1.9934721259581756e-06, "logits/chosen": -2.0773940086364746, "logits/rejected": -1.364639163017273, "logps/chosen": -516.6227416992188, "logps/rejected": -682.7853393554688, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3710174262523651, "rewards/margins": 0.2085229605436325, "rewards/rejected": -0.5795403718948364, "step": 1800 }, { "epoch": 0.13, "learning_rate": 1.9931751049162895e-06, "logits/chosen": -2.057982921600342, "logits/rejected": -1.7405674457550049, "logps/chosen": -523.1428833007812, "logps/rejected": -691.8033447265625, "loss": 0.677, "rewards/accuracies": 0.625, "rewards/chosen": -0.39121729135513306, "rewards/margins": 0.16593009233474731, "rewards/rejected": -0.5571473240852356, "step": 1810 }, { "epoch": 0.13, "learning_rate": 1.9928714991974053e-06, "logits/chosen": -1.7760746479034424, "logits/rejected": -1.6465364694595337, "logps/chosen": -626.7222900390625, "logps/rejected": -736.0545654296875, "loss": 0.6814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4608880579471588, "rewards/margins": 0.11436257511377335, "rewards/rejected": -0.5752506256103516, "step": 1820 }, { "epoch": 0.13, "learning_rate": 1.9925613108144063e-06, "logits/chosen": -1.9623081684112549, "logits/rejected": -1.7467591762542725, "logps/chosen": -507.3812561035156, "logps/rejected": -615.3094482421875, "loss": 0.6839, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37416714429855347, "rewards/margins": 0.11307764053344727, "rewards/rejected": -0.48724475502967834, "step": 1830 }, { "epoch": 0.14, "learning_rate": 1.9922445418238186e-06, "logits/chosen": -2.107588291168213, "logits/rejected": -1.5955352783203125, "logps/chosen": -598.5543212890625, "logps/rejected": -692.2508544921875, "loss": 0.6836, "rewards/accuracies": 0.75, "rewards/chosen": -0.3920690417289734, "rewards/margins": 0.1306825876235962, "rewards/rejected": -0.5227516889572144, "step": 1840 }, { "epoch": 0.14, "learning_rate": 1.9919211943257968e-06, "logits/chosen": -2.0120301246643066, "logits/rejected": -1.3793399333953857, "logps/chosen": -437.8439025878906, "logps/rejected": -598.2174682617188, "loss": 0.6771, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25381165742874146, "rewards/margins": 0.19378872215747833, "rewards/rejected": -0.4476003050804138, "step": 1850 }, { "epoch": 0.14, "learning_rate": 1.991591270464111e-06, "logits/chosen": -2.064737319946289, "logits/rejected": -1.5516713857650757, "logps/chosen": -571.68017578125, "logps/rejected": -719.4283447265625, "loss": 0.6799, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4267357289791107, "rewards/margins": 0.18366481363773346, "rewards/rejected": -0.6104005575180054, "step": 1860 }, { "epoch": 0.14, "learning_rate": 1.991254772426132e-06, "logits/chosen": -1.9879413843154907, "logits/rejected": -1.4912253618240356, "logps/chosen": -416.9712829589844, "logps/rejected": -534.0332641601562, "loss": 0.6796, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2814635634422302, "rewards/margins": 0.13361966609954834, "rewards/rejected": -0.41508325934410095, "step": 1870 }, { "epoch": 0.14, "learning_rate": 1.990911702442816e-06, "logits/chosen": -2.1051621437072754, "logits/rejected": -1.6577152013778687, "logps/chosen": -376.0788269042969, "logps/rejected": -516.6329345703125, "loss": 0.6786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27118977904319763, "rewards/margins": 0.1492997258901596, "rewards/rejected": -0.42048949003219604, "step": 1880 }, { "epoch": 0.14, "learning_rate": 1.990562062788692e-06, "logits/chosen": -2.217991352081299, "logits/rejected": -1.6552913188934326, "logps/chosen": -403.11077880859375, "logps/rejected": -585.6751098632812, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.23589582741260529, "rewards/margins": 0.22134093940258026, "rewards/rejected": -0.45723676681518555, "step": 1890 }, { "epoch": 0.14, "learning_rate": 1.990205855781845e-06, "logits/chosen": -2.229877233505249, "logits/rejected": -1.9553369283676147, "logps/chosen": -461.4639587402344, "logps/rejected": -540.4766845703125, "loss": 0.6848, "rewards/accuracies": 0.625, "rewards/chosen": -0.2340126782655716, "rewards/margins": 0.14036735892295837, "rewards/rejected": -0.3743800222873688, "step": 1900 }, { "epoch": 0.14, "learning_rate": 1.9898430837839003e-06, "logits/chosen": -2.375298023223877, "logits/rejected": -1.7536661624908447, "logps/chosen": -418.77813720703125, "logps/rejected": -559.9016723632812, "loss": 0.6832, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.21028265357017517, "rewards/margins": 0.19404336810112, "rewards/rejected": -0.4043259620666504, "step": 1910 }, { "epoch": 0.14, "learning_rate": 1.9894737492000096e-06, "logits/chosen": -2.2256250381469727, "logits/rejected": -1.7377650737762451, "logps/chosen": -412.86083984375, "logps/rejected": -498.0398864746094, "loss": 0.6847, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24303090572357178, "rewards/margins": 0.10196753591299057, "rewards/rejected": -0.34499844908714294, "step": 1920 }, { "epoch": 0.14, "learning_rate": 1.989097854478834e-06, "logits/chosen": -2.0625693798065186, "logits/rejected": -1.69179368019104, "logps/chosen": -417.42401123046875, "logps/rejected": -542.69677734375, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27093639969825745, "rewards/margins": 0.1289728730916977, "rewards/rejected": -0.39990925788879395, "step": 1930 }, { "epoch": 0.14, "learning_rate": 1.9887154021125274e-06, "logits/chosen": -2.1333374977111816, "logits/rejected": -1.6996504068374634, "logps/chosen": -508.44964599609375, "logps/rejected": -597.1027221679688, "loss": 0.6857, "rewards/accuracies": 0.75, "rewards/chosen": -0.2977626919746399, "rewards/margins": 0.11212259531021118, "rewards/rejected": -0.4098852276802063, "step": 1940 }, { "epoch": 0.14, "learning_rate": 1.98832639463672e-06, "logits/chosen": -2.265817165374756, "logits/rejected": -1.724732756614685, "logps/chosen": -456.67254638671875, "logps/rejected": -578.61767578125, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29322826862335205, "rewards/margins": 0.16853314638137817, "rewards/rejected": -0.46176138520240784, "step": 1950 }, { "epoch": 0.14, "learning_rate": 1.9879308346305024e-06, "logits/chosen": -2.156749725341797, "logits/rejected": -1.7813732624053955, "logps/chosen": -363.7722473144531, "logps/rejected": -398.2575378417969, "loss": 0.689, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.15288913249969482, "rewards/margins": 0.08133377134799957, "rewards/rejected": -0.2342229187488556, "step": 1960 }, { "epoch": 0.15, "learning_rate": 1.9875287247164086e-06, "logits/chosen": -2.113213300704956, "logits/rejected": -1.7418320178985596, "logps/chosen": -365.4167785644531, "logps/rejected": -477.7752990722656, "loss": 0.6828, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23300385475158691, "rewards/margins": 0.11913783848285675, "rewards/rejected": -0.35214167833328247, "step": 1970 }, { "epoch": 0.15, "learning_rate": 1.9871200675603975e-06, "logits/chosen": -2.0890650749206543, "logits/rejected": -1.6295700073242188, "logps/chosen": -363.0270080566406, "logps/rejected": -474.58221435546875, "loss": 0.6836, "rewards/accuracies": 0.625, "rewards/chosen": -0.227474644780159, "rewards/margins": 0.12380275875329971, "rewards/rejected": -0.3512773811817169, "step": 1980 }, { "epoch": 0.15, "learning_rate": 1.986704865871835e-06, "logits/chosen": -2.1811935901641846, "logits/rejected": -1.477246880531311, "logps/chosen": -367.9621887207031, "logps/rejected": -556.4381713867188, "loss": 0.6757, "rewards/accuracies": 0.75, "rewards/chosen": -0.21954922378063202, "rewards/margins": 0.21322636306285858, "rewards/rejected": -0.4327755868434906, "step": 1990 }, { "epoch": 0.15, "learning_rate": 1.9862831224034777e-06, "logits/chosen": -2.1246094703674316, "logits/rejected": -1.8382002115249634, "logps/chosen": -480.79534912109375, "logps/rejected": -585.4622802734375, "loss": 0.683, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3411432206630707, "rewards/margins": 0.12348712980747223, "rewards/rejected": -0.4646304249763489, "step": 2000 }, { "epoch": 0.15, "learning_rate": 1.985854839951453e-06, "logits/chosen": -2.4246280193328857, "logits/rejected": -1.6570818424224854, "logps/chosen": -542.4102172851562, "logps/rejected": -663.9178466796875, "loss": 0.6809, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3478885889053345, "rewards/margins": 0.1833447962999344, "rewards/rejected": -0.5312334299087524, "step": 2010 }, { "epoch": 0.15, "learning_rate": 1.9854200213552424e-06, "logits/chosen": -2.2673583030700684, "logits/rejected": -1.6084970235824585, "logps/chosen": -459.7041015625, "logps/rejected": -615.490478515625, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30520546436309814, "rewards/margins": 0.20309552550315857, "rewards/rejected": -0.5083010196685791, "step": 2020 }, { "epoch": 0.15, "learning_rate": 1.9849786694976603e-06, "logits/chosen": -2.2747583389282227, "logits/rejected": -1.7293875217437744, "logps/chosen": -491.15997314453125, "logps/rejected": -651.22021484375, "loss": 0.6786, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29279032349586487, "rewards/margins": 0.18734855949878693, "rewards/rejected": -0.4801388680934906, "step": 2030 }, { "epoch": 0.15, "learning_rate": 1.9845307873048368e-06, "logits/chosen": -2.182408571243286, "logits/rejected": -1.6915943622589111, "logps/chosen": -451.22802734375, "logps/rejected": -570.6722412109375, "loss": 0.6836, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2847336530685425, "rewards/margins": 0.14361009001731873, "rewards/rejected": -0.4283437728881836, "step": 2040 }, { "epoch": 0.15, "learning_rate": 1.9840763777461974e-06, "logits/chosen": -2.3452675342559814, "logits/rejected": -1.6239277124404907, "logps/chosen": -371.0315246582031, "logps/rejected": -525.3397216796875, "loss": 0.6817, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23248565196990967, "rewards/margins": 0.1838010549545288, "rewards/rejected": -0.4162867069244385, "step": 2050 }, { "epoch": 0.15, "learning_rate": 1.9836154438344437e-06, "logits/chosen": -2.111175775527954, "logits/rejected": -1.6222105026245117, "logps/chosen": -383.6284484863281, "logps/rejected": -546.2367553710938, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22101931273937225, "rewards/margins": 0.1963001936674118, "rewards/rejected": -0.41731953620910645, "step": 2060 }, { "epoch": 0.15, "learning_rate": 1.9831479886255327e-06, "logits/chosen": -2.271444797515869, "logits/rejected": -1.78238046169281, "logps/chosen": -464.9911193847656, "logps/rejected": -571.6522216796875, "loss": 0.6791, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2932819724082947, "rewards/margins": 0.157526895403862, "rewards/rejected": -0.4508088529109955, "step": 2070 }, { "epoch": 0.15, "learning_rate": 1.9826740152186585e-06, "logits/chosen": -2.031881332397461, "logits/rejected": -1.8723499774932861, "logps/chosen": -482.19744873046875, "logps/rejected": -583.9158935546875, "loss": 0.6843, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3403838872909546, "rewards/margins": 0.08817645162343979, "rewards/rejected": -0.428560346364975, "step": 2080 }, { "epoch": 0.15, "learning_rate": 1.982193526756229e-06, "logits/chosen": -2.205690383911133, "logits/rejected": -1.8983230590820312, "logps/chosen": -521.61328125, "logps/rejected": -603.3867797851562, "loss": 0.6834, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3288525938987732, "rewards/margins": 0.10827143490314484, "rewards/rejected": -0.4371240735054016, "step": 2090 }, { "epoch": 0.15, "learning_rate": 1.9817065264238474e-06, "logits/chosen": -2.3094544410705566, "logits/rejected": -2.005171775817871, "logps/chosen": -392.32415771484375, "logps/rejected": -464.47528076171875, "loss": 0.6861, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24556884169578552, "rewards/margins": 0.10507579147815704, "rewards/rejected": -0.35064464807510376, "step": 2100 }, { "epoch": 0.16, "learning_rate": 1.9812130174502894e-06, "logits/chosen": -2.123615264892578, "logits/rejected": -1.6644569635391235, "logps/chosen": -492.80767822265625, "logps/rejected": -604.5327758789062, "loss": 0.6834, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31881827116012573, "rewards/margins": 0.13975997269153595, "rewards/rejected": -0.45857828855514526, "step": 2110 }, { "epoch": 0.16, "learning_rate": 1.9807130031074823e-06, "logits/chosen": -1.7924562692642212, "logits/rejected": -1.4658875465393066, "logps/chosen": -462.9961853027344, "logps/rejected": -590.7240600585938, "loss": 0.679, "rewards/accuracies": 0.625, "rewards/chosen": -0.2967238426208496, "rewards/margins": 0.15920817852020264, "rewards/rejected": -0.45593199133872986, "step": 2120 }, { "epoch": 0.16, "learning_rate": 1.9802064867104846e-06, "logits/chosen": -2.1177046298980713, "logits/rejected": -1.6453683376312256, "logps/chosen": -512.650390625, "logps/rejected": -599.59130859375, "loss": 0.6795, "rewards/accuracies": 0.75, "rewards/chosen": -0.3396984934806824, "rewards/margins": 0.13592877984046936, "rewards/rejected": -0.47562727332115173, "step": 2130 }, { "epoch": 0.16, "learning_rate": 1.9796934716174616e-06, "logits/chosen": -2.057852029800415, "logits/rejected": -1.4254345893859863, "logps/chosen": -462.8641052246094, "logps/rejected": -545.9205932617188, "loss": 0.679, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2823829650878906, "rewards/margins": 0.15673565864562988, "rewards/rejected": -0.4391186237335205, "step": 2140 }, { "epoch": 0.16, "learning_rate": 1.979173961229666e-06, "logits/chosen": -2.0168380737304688, "logits/rejected": -1.5286903381347656, "logps/chosen": -445.228515625, "logps/rejected": -527.3106079101562, "loss": 0.6841, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2762971818447113, "rewards/margins": 0.12248468399047852, "rewards/rejected": -0.3987818658351898, "step": 2150 }, { "epoch": 0.16, "learning_rate": 1.9786479589914124e-06, "logits/chosen": -2.084642171859741, "logits/rejected": -1.5525319576263428, "logps/chosen": -500.0741271972656, "logps/rejected": -674.259765625, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3186063766479492, "rewards/margins": 0.18435920774936676, "rewards/rejected": -0.5029655694961548, "step": 2160 }, { "epoch": 0.16, "learning_rate": 1.9781154683900565e-06, "logits/chosen": -2.159834384918213, "logits/rejected": -1.626802682876587, "logps/chosen": -480.7779846191406, "logps/rejected": -619.9588623046875, "loss": 0.6798, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3393288254737854, "rewards/margins": 0.14126846194267273, "rewards/rejected": -0.48059725761413574, "step": 2170 }, { "epoch": 0.16, "learning_rate": 1.9775764929559717e-06, "logits/chosen": -2.224571466445923, "logits/rejected": -1.8512550592422485, "logps/chosen": -466.88018798828125, "logps/rejected": -599.4132080078125, "loss": 0.6821, "rewards/accuracies": 0.75, "rewards/chosen": -0.3125208020210266, "rewards/margins": 0.1303352415561676, "rewards/rejected": -0.4428560137748718, "step": 2180 }, { "epoch": 0.16, "learning_rate": 1.977031036262525e-06, "logits/chosen": -1.9518458843231201, "logits/rejected": -1.4746030569076538, "logps/chosen": -404.6263732910156, "logps/rejected": -550.0557861328125, "loss": 0.6866, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.25938206911087036, "rewards/margins": 0.17425484955310822, "rewards/rejected": -0.43363696336746216, "step": 2190 }, { "epoch": 0.16, "learning_rate": 1.976479101926053e-06, "logits/chosen": -2.216073989868164, "logits/rejected": -1.77593195438385, "logps/chosen": -425.962646484375, "logps/rejected": -564.1510620117188, "loss": 0.6815, "rewards/accuracies": 0.75, "rewards/chosen": -0.28561583161354065, "rewards/margins": 0.18433088064193726, "rewards/rejected": -0.4699467122554779, "step": 2200 }, { "epoch": 0.16, "learning_rate": 1.9759206936058403e-06, "logits/chosen": -1.9891319274902344, "logits/rejected": -1.8986231088638306, "logps/chosen": -473.3721618652344, "logps/rejected": -604.6970825195312, "loss": 0.6829, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31628313660621643, "rewards/margins": 0.13196630775928497, "rewards/rejected": -0.4482494294643402, "step": 2210 }, { "epoch": 0.16, "learning_rate": 1.9753558150040913e-06, "logits/chosen": -2.1194186210632324, "logits/rejected": -1.5483397245407104, "logps/chosen": -475.1519470214844, "logps/rejected": -582.8697509765625, "loss": 0.6825, "rewards/accuracies": 0.625, "rewards/chosen": -0.3011470437049866, "rewards/margins": 0.14656169712543488, "rewards/rejected": -0.4477086663246155, "step": 2220 }, { "epoch": 0.16, "learning_rate": 1.9747844698659104e-06, "logits/chosen": -2.5058562755584717, "logits/rejected": -1.905408501625061, "logps/chosen": -365.6783142089844, "logps/rejected": -447.09210205078125, "loss": 0.6843, "rewards/accuracies": 0.625, "rewards/chosen": -0.1576617807149887, "rewards/margins": 0.12236142158508301, "rewards/rejected": -0.2800232172012329, "step": 2230 }, { "epoch": 0.17, "learning_rate": 1.9742066619792717e-06, "logits/chosen": -2.12882399559021, "logits/rejected": -1.6879276037216187, "logps/chosen": -450.955078125, "logps/rejected": -582.2972412109375, "loss": 0.679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3059519827365875, "rewards/margins": 0.1596696525812149, "rewards/rejected": -0.4656216502189636, "step": 2240 }, { "epoch": 0.17, "learning_rate": 1.973622395175e-06, "logits/chosen": -2.171835422515869, "logits/rejected": -1.8018054962158203, "logps/chosen": -434.1897888183594, "logps/rejected": -542.2033081054688, "loss": 0.6835, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3012549877166748, "rewards/margins": 0.1230471134185791, "rewards/rejected": -0.4243021011352539, "step": 2250 }, { "epoch": 0.17, "learning_rate": 1.97303167332674e-06, "logits/chosen": -2.097998857498169, "logits/rejected": -1.9425437450408936, "logps/chosen": -473.98797607421875, "logps/rejected": -614.0018310546875, "loss": 0.6805, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.340290367603302, "rewards/margins": 0.1450144201517105, "rewards/rejected": -0.4853048324584961, "step": 2260 }, { "epoch": 0.17, "learning_rate": 1.972434500350933e-06, "logits/chosen": -2.4390273094177246, "logits/rejected": -1.942826509475708, "logps/chosen": -435.69512939453125, "logps/rejected": -485.67266845703125, "loss": 0.6842, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2675899267196655, "rewards/margins": 0.11649657785892487, "rewards/rejected": -0.3840864896774292, "step": 2270 }, { "epoch": 0.17, "learning_rate": 1.971830880206793e-06, "logits/chosen": -2.3053627014160156, "logits/rejected": -1.9114669561386108, "logps/chosen": -426.8675842285156, "logps/rejected": -555.3189697265625, "loss": 0.6803, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2829509675502777, "rewards/margins": 0.11832394450902939, "rewards/rejected": -0.4012749195098877, "step": 2280 }, { "epoch": 0.17, "learning_rate": 1.9712208168962748e-06, "logits/chosen": -2.243980646133423, "logits/rejected": -1.7068296670913696, "logps/chosen": -445.9646911621094, "logps/rejected": -621.0426025390625, "loss": 0.6791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.308194100856781, "rewards/margins": 0.19002577662467957, "rewards/rejected": -0.49821990728378296, "step": 2290 }, { "epoch": 0.17, "learning_rate": 1.970604314464054e-06, "logits/chosen": -2.3639848232269287, "logits/rejected": -1.80299973487854, "logps/chosen": -440.9212341308594, "logps/rejected": -587.9340209960938, "loss": 0.6817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2849770188331604, "rewards/margins": 0.1762705296278, "rewards/rejected": -0.46124759316444397, "step": 2300 }, { "epoch": 0.17, "learning_rate": 1.969981376997495e-06, "logits/chosen": -2.011101245880127, "logits/rejected": -1.4220387935638428, "logps/chosen": -472.9423828125, "logps/rejected": -609.1708374023438, "loss": 0.6806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3158239722251892, "rewards/margins": 0.18666964769363403, "rewards/rejected": -0.5024936199188232, "step": 2310 }, { "epoch": 0.17, "learning_rate": 1.969352008626628e-06, "logits/chosen": -2.2058448791503906, "logits/rejected": -1.593542218208313, "logps/chosen": -457.04559326171875, "logps/rejected": -593.0643310546875, "loss": 0.6803, "rewards/accuracies": 0.75, "rewards/chosen": -0.278074711561203, "rewards/margins": 0.16135917603969574, "rewards/rejected": -0.43943387269973755, "step": 2320 }, { "epoch": 0.17, "learning_rate": 1.968716213524117e-06, "logits/chosen": -2.0567851066589355, "logits/rejected": -1.619385004043579, "logps/chosen": -574.7120361328125, "logps/rejected": -707.8516235351562, "loss": 0.6832, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4373759329319, "rewards/margins": 0.13795319199562073, "rewards/rejected": -0.5753291249275208, "step": 2330 }, { "epoch": 0.17, "learning_rate": 1.968073995905237e-06, "logits/chosen": -2.0958778858184814, "logits/rejected": -1.35481858253479, "logps/chosen": -563.2327880859375, "logps/rejected": -758.2282104492188, "loss": 0.6743, "rewards/accuracies": 0.875, "rewards/chosen": -0.3958517909049988, "rewards/margins": 0.24637925624847412, "rewards/rejected": -0.6422311067581177, "step": 2340 }, { "epoch": 0.17, "learning_rate": 1.9674253600278425e-06, "logits/chosen": -2.156848907470703, "logits/rejected": -1.7237293720245361, "logps/chosen": -534.9508056640625, "logps/rejected": -636.97119140625, "loss": 0.6866, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3888207674026489, "rewards/margins": 0.12209241092205048, "rewards/rejected": -0.5109131932258606, "step": 2350 }, { "epoch": 0.17, "learning_rate": 1.9667703101923417e-06, "logits/chosen": -2.1130833625793457, "logits/rejected": -1.4509655237197876, "logps/chosen": -507.66845703125, "logps/rejected": -685.2469482421875, "loss": 0.6759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36636030673980713, "rewards/margins": 0.20439592003822327, "rewards/rejected": -0.5707562565803528, "step": 2360 }, { "epoch": 0.17, "learning_rate": 1.966108850741666e-06, "logits/chosen": -2.0595879554748535, "logits/rejected": -1.4561467170715332, "logps/chosen": -524.9714965820312, "logps/rejected": -697.8748779296875, "loss": 0.6792, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39158666133880615, "rewards/margins": 0.21709027886390686, "rewards/rejected": -0.6086769104003906, "step": 2370 }, { "epoch": 0.18, "learning_rate": 1.965440986061242e-06, "logits/chosen": -2.1323249340057373, "logits/rejected": -1.6522941589355469, "logps/chosen": -439.0110778808594, "logps/rejected": -566.6097412109375, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26706576347351074, "rewards/margins": 0.15532997250556946, "rewards/rejected": -0.4223957657814026, "step": 2380 }, { "epoch": 0.18, "learning_rate": 1.9647667205789633e-06, "logits/chosen": -2.0582847595214844, "logits/rejected": -1.634580373764038, "logps/chosen": -406.7088317871094, "logps/rejected": -611.8225708007812, "loss": 0.6779, "rewards/accuracies": 0.75, "rewards/chosen": -0.24526092410087585, "rewards/margins": 0.2408144772052765, "rewards/rejected": -0.48607540130615234, "step": 2390 }, { "epoch": 0.18, "learning_rate": 1.9640860587651596e-06, "logits/chosen": -2.160285472869873, "logits/rejected": -1.7338612079620361, "logps/chosen": -357.09588623046875, "logps/rejected": -479.0682678222656, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20668864250183105, "rewards/margins": 0.1845724880695343, "rewards/rejected": -0.39126116037368774, "step": 2400 }, { "epoch": 0.18, "learning_rate": 1.963399005132568e-06, "logits/chosen": -2.284696578979492, "logits/rejected": -1.7340952157974243, "logps/chosen": -469.50579833984375, "logps/rejected": -572.64208984375, "loss": 0.68, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3007621169090271, "rewards/margins": 0.1408412754535675, "rewards/rejected": -0.441603422164917, "step": 2410 }, { "epoch": 0.18, "learning_rate": 1.9627055642363024e-06, "logits/chosen": -2.282900810241699, "logits/rejected": -1.688323974609375, "logps/chosen": -524.0994873046875, "logps/rejected": -654.2434692382812, "loss": 0.6791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35869601368904114, "rewards/margins": 0.17842108011245728, "rewards/rejected": -0.5371171236038208, "step": 2420 }, { "epoch": 0.18, "learning_rate": 1.962005740673826e-06, "logits/chosen": -2.1299984455108643, "logits/rejected": -1.7650219202041626, "logps/chosen": -551.5155029296875, "logps/rejected": -609.9064331054688, "loss": 0.682, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4002091884613037, "rewards/margins": 0.11301169544458389, "rewards/rejected": -0.5132209062576294, "step": 2430 }, { "epoch": 0.18, "learning_rate": 1.961299539084915e-06, "logits/chosen": -2.237705707550049, "logits/rejected": -1.590634822845459, "logps/chosen": -415.1426696777344, "logps/rejected": -539.1908569335938, "loss": 0.6771, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.24505171179771423, "rewards/margins": 0.18921124935150146, "rewards/rejected": -0.4342629313468933, "step": 2440 }, { "epoch": 0.18, "learning_rate": 1.960586964151634e-06, "logits/chosen": -2.0702338218688965, "logits/rejected": -1.7162843942642212, "logps/chosen": -427.79327392578125, "logps/rejected": -459.9962463378906, "loss": 0.6858, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2035369873046875, "rewards/margins": 0.10265177488327026, "rewards/rejected": -0.30618876218795776, "step": 2450 }, { "epoch": 0.18, "learning_rate": 1.9598680205983023e-06, "logits/chosen": -2.3718433380126953, "logits/rejected": -1.858899474143982, "logps/chosen": -368.81207275390625, "logps/rejected": -503.04412841796875, "loss": 0.6758, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1279810220003128, "rewards/margins": 0.2198273241519928, "rewards/rejected": -0.3478083610534668, "step": 2460 }, { "epoch": 0.18, "learning_rate": 1.959142713191462e-06, "logits/chosen": -2.3450517654418945, "logits/rejected": -1.554194688796997, "logps/chosen": -367.3561096191406, "logps/rejected": -559.859619140625, "loss": 0.6748, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23402157425880432, "rewards/margins": 0.23393464088439941, "rewards/rejected": -0.46795615553855896, "step": 2470 }, { "epoch": 0.18, "learning_rate": 1.9584110467398462e-06, "logits/chosen": -2.1535983085632324, "logits/rejected": -1.7044436931610107, "logps/chosen": -446.1212463378906, "logps/rejected": -600.7359619140625, "loss": 0.6834, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3234890401363373, "rewards/margins": 0.1435890644788742, "rewards/rejected": -0.46707814931869507, "step": 2480 }, { "epoch": 0.18, "learning_rate": 1.9576730260943503e-06, "logits/chosen": -2.042705535888672, "logits/rejected": -1.7747453451156616, "logps/chosen": -496.30645751953125, "logps/rejected": -661.5699462890625, "loss": 0.6818, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3300403356552124, "rewards/margins": 0.18328246474266052, "rewards/rejected": -0.5133228302001953, "step": 2490 }, { "epoch": 0.18, "learning_rate": 1.9569286561479953e-06, "logits/chosen": -2.1458046436309814, "logits/rejected": -1.6760085821151733, "logps/chosen": -466.779052734375, "logps/rejected": -647.1060791015625, "loss": 0.6761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3019923269748688, "rewards/margins": 0.21470943093299866, "rewards/rejected": -0.5167017579078674, "step": 2500 }, { "epoch": 0.19, "learning_rate": 1.956177941835899e-06, "logits/chosen": -2.0877645015716553, "logits/rejected": -1.6405738592147827, "logps/chosen": -522.7928466796875, "logps/rejected": -605.4293212890625, "loss": 0.6812, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3375188410282135, "rewards/margins": 0.13148614764213562, "rewards/rejected": -0.46900495886802673, "step": 2510 }, { "epoch": 0.19, "learning_rate": 1.9554208881352414e-06, "logits/chosen": -2.091726303100586, "logits/rejected": -1.5628125667572021, "logps/chosen": -469.1730041503906, "logps/rejected": -632.3187255859375, "loss": 0.6863, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33747076988220215, "rewards/margins": 0.15197408199310303, "rewards/rejected": -0.4894448220729828, "step": 2520 }, { "epoch": 0.19, "learning_rate": 1.954657500065232e-06, "logits/chosen": -2.2024099826812744, "logits/rejected": -1.709242582321167, "logps/chosen": -489.3817443847656, "logps/rejected": -594.0743408203125, "loss": 0.6837, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3141201138496399, "rewards/margins": 0.15102660655975342, "rewards/rejected": -0.4651466906070709, "step": 2530 }, { "epoch": 0.19, "learning_rate": 1.9538877826870767e-06, "logits/chosen": -2.164644718170166, "logits/rejected": -1.5440808534622192, "logps/chosen": -440.83843994140625, "logps/rejected": -553.8544921875, "loss": 0.6804, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2569544017314911, "rewards/margins": 0.15411385893821716, "rewards/rejected": -0.41106826066970825, "step": 2540 }, { "epoch": 0.19, "learning_rate": 1.953111741103945e-06, "logits/chosen": -2.1803534030914307, "logits/rejected": -1.6868035793304443, "logps/chosen": -460.7574768066406, "logps/rejected": -635.1110229492188, "loss": 0.6755, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3071635365486145, "rewards/margins": 0.1996069848537445, "rewards/rejected": -0.5067704916000366, "step": 2550 }, { "epoch": 0.19, "learning_rate": 1.952329380460934e-06, "logits/chosen": -2.098507881164551, "logits/rejected": -1.4962226152420044, "logps/chosen": -436.7218322753906, "logps/rejected": -643.6398315429688, "loss": 0.677, "rewards/accuracies": 0.75, "rewards/chosen": -0.297123521566391, "rewards/margins": 0.22820787131786346, "rewards/rejected": -0.5253313779830933, "step": 2560 }, { "epoch": 0.19, "learning_rate": 1.9515407059450372e-06, "logits/chosen": -2.1640865802764893, "logits/rejected": -1.7035068273544312, "logps/chosen": -516.2990112304688, "logps/rejected": -627.0225830078125, "loss": 0.6803, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30143266916275024, "rewards/margins": 0.16093429923057556, "rewards/rejected": -0.4623669683933258, "step": 2570 }, { "epoch": 0.19, "learning_rate": 1.9507457227851075e-06, "logits/chosen": -2.2468857765197754, "logits/rejected": -1.683065414428711, "logps/chosen": -441.48883056640625, "logps/rejected": -580.8575439453125, "loss": 0.6811, "rewards/accuracies": 0.75, "rewards/chosen": -0.2822521924972534, "rewards/margins": 0.18676209449768066, "rewards/rejected": -0.4690142273902893, "step": 2580 }, { "epoch": 0.19, "learning_rate": 1.949944436251824e-06, "logits/chosen": -2.0707316398620605, "logits/rejected": -1.5447280406951904, "logps/chosen": -538.9352416992188, "logps/rejected": -696.1736450195312, "loss": 0.6733, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3681526780128479, "rewards/margins": 0.20710666477680206, "rewards/rejected": -0.5752593278884888, "step": 2590 }, { "epoch": 0.19, "learning_rate": 1.9491368516576576e-06, "logits/chosen": -2.196861982345581, "logits/rejected": -1.5139522552490234, "logps/chosen": -495.76776123046875, "logps/rejected": -645.4644165039062, "loss": 0.6821, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3059159815311432, "rewards/margins": 0.20407457649707794, "rewards/rejected": -0.5099905133247375, "step": 2600 }, { "epoch": 0.19, "learning_rate": 1.948322974356833e-06, "logits/chosen": -2.0717501640319824, "logits/rejected": -1.6602550745010376, "logps/chosen": -447.01953125, "logps/rejected": -547.1569213867188, "loss": 0.6804, "rewards/accuracies": 0.75, "rewards/chosen": -0.3065660893917084, "rewards/margins": 0.10858442634344101, "rewards/rejected": -0.41515055298805237, "step": 2610 }, { "epoch": 0.19, "learning_rate": 1.947502809745297e-06, "logits/chosen": -2.245804786682129, "logits/rejected": -1.8320928812026978, "logps/chosen": -395.7469177246094, "logps/rejected": -534.1380615234375, "loss": 0.6793, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23131605982780457, "rewards/margins": 0.19176152348518372, "rewards/rejected": -0.4230775833129883, "step": 2620 }, { "epoch": 0.19, "learning_rate": 1.946676363260679e-06, "logits/chosen": -2.2056007385253906, "logits/rejected": -1.8109477758407593, "logps/chosen": -376.3828125, "logps/rejected": -529.7716064453125, "loss": 0.6803, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2292071282863617, "rewards/margins": 0.18400098383426666, "rewards/rejected": -0.41320815682411194, "step": 2630 }, { "epoch": 0.19, "learning_rate": 1.945843640382258e-06, "logits/chosen": -2.10235333442688, "logits/rejected": -1.4423744678497314, "logps/chosen": -407.5528564453125, "logps/rejected": -581.073974609375, "loss": 0.6789, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.25159019231796265, "rewards/margins": 0.20525255799293518, "rewards/rejected": -0.4568428099155426, "step": 2640 }, { "epoch": 0.2, "learning_rate": 1.945004646630925e-06, "logits/chosen": -2.0482876300811768, "logits/rejected": -1.896924614906311, "logps/chosen": -386.3375244140625, "logps/rejected": -492.47613525390625, "loss": 0.6802, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23553243279457092, "rewards/margins": 0.1384381651878357, "rewards/rejected": -0.373970627784729, "step": 2650 }, { "epoch": 0.2, "learning_rate": 1.9441593875691457e-06, "logits/chosen": -2.10493803024292, "logits/rejected": -1.5971938371658325, "logps/chosen": -404.35943603515625, "logps/rejected": -545.6893310546875, "loss": 0.6815, "rewards/accuracies": 0.75, "rewards/chosen": -0.21274252235889435, "rewards/margins": 0.17012625932693481, "rewards/rejected": -0.38286876678466797, "step": 2660 }, { "epoch": 0.2, "learning_rate": 1.943307868800925e-06, "logits/chosen": -2.134692430496216, "logits/rejected": -1.8001025915145874, "logps/chosen": -385.9622497558594, "logps/rejected": -534.080078125, "loss": 0.6791, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23474851250648499, "rewards/margins": 0.1776428520679474, "rewards/rejected": -0.4123913645744324, "step": 2670 }, { "epoch": 0.2, "learning_rate": 1.9424500959717687e-06, "logits/chosen": -1.9379689693450928, "logits/rejected": -1.5355756282806396, "logps/chosen": -465.15594482421875, "logps/rejected": -546.3622436523438, "loss": 0.6829, "rewards/accuracies": 0.75, "rewards/chosen": -0.2712392508983612, "rewards/margins": 0.15018419921398163, "rewards/rejected": -0.4214234948158264, "step": 2680 }, { "epoch": 0.2, "learning_rate": 1.941586074768647e-06, "logits/chosen": -2.091031551361084, "logits/rejected": -1.3861663341522217, "logps/chosen": -373.93817138671875, "logps/rejected": -564.0825805664062, "loss": 0.6725, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22535483539104462, "rewards/margins": 0.22474846243858337, "rewards/rejected": -0.4501033425331116, "step": 2690 }, { "epoch": 0.2, "learning_rate": 1.9407158109199564e-06, "logits/chosen": -1.6693627834320068, "logits/rejected": -1.3641051054000854, "logps/chosen": -397.1299133300781, "logps/rejected": -555.8023071289062, "loss": 0.6793, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2729909121990204, "rewards/margins": 0.15987563133239746, "rewards/rejected": -0.43286657333374023, "step": 2700 }, { "epoch": 0.2, "learning_rate": 1.9398393101954805e-06, "logits/chosen": -2.1973793506622314, "logits/rejected": -1.6700327396392822, "logps/chosen": -456.0106506347656, "logps/rejected": -547.7840576171875, "loss": 0.6826, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3064795434474945, "rewards/margins": 0.13850918412208557, "rewards/rejected": -0.4449886679649353, "step": 2710 }, { "epoch": 0.2, "learning_rate": 1.938956578406355e-06, "logits/chosen": -2.004473924636841, "logits/rejected": -1.48871648311615, "logps/chosen": -430.23321533203125, "logps/rejected": -650.4468994140625, "loss": 0.6823, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3086654543876648, "rewards/margins": 0.24743004143238068, "rewards/rejected": -0.5560954809188843, "step": 2720 }, { "epoch": 0.2, "learning_rate": 1.938067621405025e-06, "logits/chosen": -1.9475698471069336, "logits/rejected": -1.3925867080688477, "logps/chosen": -567.3215942382812, "logps/rejected": -695.9805908203125, "loss": 0.6777, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38023585081100464, "rewards/margins": 0.1983848363161087, "rewards/rejected": -0.5786207318305969, "step": 2730 }, { "epoch": 0.2, "learning_rate": 1.93717244508521e-06, "logits/chosen": -2.2313830852508545, "logits/rejected": -1.676138162612915, "logps/chosen": -549.9027709960938, "logps/rejected": -649.9061279296875, "loss": 0.6783, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30851373076438904, "rewards/margins": 0.17082026600837708, "rewards/rejected": -0.4793340563774109, "step": 2740 }, { "epoch": 0.2, "learning_rate": 1.9362710553818616e-06, "logits/chosen": -2.2363827228546143, "logits/rejected": -1.701989769935608, "logps/chosen": -506.1192321777344, "logps/rejected": -608.6435546875, "loss": 0.6816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3408898115158081, "rewards/margins": 0.12728413939476013, "rewards/rejected": -0.46817392110824585, "step": 2750 }, { "epoch": 0.2, "learning_rate": 1.9353634582711267e-06, "logits/chosen": -2.105170249938965, "logits/rejected": -1.7042697668075562, "logps/chosen": -468.1239318847656, "logps/rejected": -655.5587768554688, "loss": 0.6783, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30737969279289246, "rewards/margins": 0.182169109582901, "rewards/rejected": -0.48954883217811584, "step": 2760 }, { "epoch": 0.2, "learning_rate": 1.934449659770307e-06, "logits/chosen": -2.141309976577759, "logits/rejected": -1.5400238037109375, "logps/chosen": -444.45367431640625, "logps/rejected": -624.2310791015625, "loss": 0.6803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2941246032714844, "rewards/margins": 0.21606364846229553, "rewards/rejected": -0.5101882815361023, "step": 2770 }, { "epoch": 0.21, "learning_rate": 1.933529665937818e-06, "logits/chosen": -2.142371654510498, "logits/rejected": -1.7160084247589111, "logps/chosen": -480.2945861816406, "logps/rejected": -616.71435546875, "loss": 0.68, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2939785122871399, "rewards/margins": 0.17008240520954132, "rewards/rejected": -0.46406087279319763, "step": 2780 }, { "epoch": 0.21, "learning_rate": 1.932603482873151e-06, "logits/chosen": -2.19148325920105, "logits/rejected": -1.5244728326797485, "logps/chosen": -423.2686462402344, "logps/rejected": -608.168701171875, "loss": 0.6721, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2889011800289154, "rewards/margins": 0.22881221771240234, "rewards/rejected": -0.5177133679389954, "step": 2790 }, { "epoch": 0.21, "learning_rate": 1.9316711167168295e-06, "logits/chosen": -2.012528657913208, "logits/rejected": -1.4075462818145752, "logps/chosen": -439.5467224121094, "logps/rejected": -611.07861328125, "loss": 0.671, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25834766030311584, "rewards/margins": 0.22502920031547546, "rewards/rejected": -0.4833768308162689, "step": 2800 }, { "epoch": 0.21, "learning_rate": 1.930732573650373e-06, "logits/chosen": -1.934926986694336, "logits/rejected": -1.5416839122772217, "logps/chosen": -472.89471435546875, "logps/rejected": -625.7611694335938, "loss": 0.6789, "rewards/accuracies": 0.75, "rewards/chosen": -0.2992047667503357, "rewards/margins": 0.18512657284736633, "rewards/rejected": -0.4843314290046692, "step": 2810 }, { "epoch": 0.21, "learning_rate": 1.9297878598962513e-06, "logits/chosen": -1.847731590270996, "logits/rejected": -1.3784606456756592, "logps/chosen": -474.4896545410156, "logps/rejected": -658.54248046875, "loss": 0.6754, "rewards/accuracies": 0.75, "rewards/chosen": -0.3008356988430023, "rewards/margins": 0.1790848672389984, "rewards/rejected": -0.47992056608200073, "step": 2820 }, { "epoch": 0.21, "learning_rate": 1.9288369817178463e-06, "logits/chosen": -2.09458589553833, "logits/rejected": -1.4930298328399658, "logps/chosen": -563.6424560546875, "logps/rejected": -666.8118896484375, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37855371832847595, "rewards/margins": 0.15176668763160706, "rewards/rejected": -0.530320405960083, "step": 2830 }, { "epoch": 0.21, "learning_rate": 1.9278799454194103e-06, "logits/chosen": -2.0325164794921875, "logits/rejected": -1.8140875101089478, "logps/chosen": -425.4775390625, "logps/rejected": -579.103759765625, "loss": 0.6823, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25914591550827026, "rewards/margins": 0.1370120346546173, "rewards/rejected": -0.3961579203605652, "step": 2840 }, { "epoch": 0.21, "learning_rate": 1.9269167573460217e-06, "logits/chosen": -1.969421625137329, "logits/rejected": -1.589790940284729, "logps/chosen": -499.8255920410156, "logps/rejected": -598.4398193359375, "loss": 0.6822, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3385053873062134, "rewards/margins": 0.1456705778837204, "rewards/rejected": -0.48417600989341736, "step": 2850 }, { "epoch": 0.21, "learning_rate": 1.9259474238835462e-06, "logits/chosen": -2.139881134033203, "logits/rejected": -1.6491024494171143, "logps/chosen": -386.8104553222656, "logps/rejected": -660.2227783203125, "loss": 0.6735, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24412527680397034, "rewards/margins": 0.18042223155498505, "rewards/rejected": -0.4245475232601166, "step": 2860 }, { "epoch": 0.21, "learning_rate": 1.9249719514585927e-06, "logits/chosen": -2.085484743118286, "logits/rejected": -1.6318724155426025, "logps/chosen": -503.95355224609375, "logps/rejected": -628.2764892578125, "loss": 0.6832, "rewards/accuracies": 0.75, "rewards/chosen": -0.3221589922904968, "rewards/margins": 0.15578065812587738, "rewards/rejected": -0.4779396057128906, "step": 2870 }, { "epoch": 0.21, "learning_rate": 1.9239903465384708e-06, "logits/chosen": -1.8350389003753662, "logits/rejected": -1.259713888168335, "logps/chosen": -458.4513244628906, "logps/rejected": -610.3905639648438, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.3344591557979584, "rewards/margins": 0.1712384670972824, "rewards/rejected": -0.5056976675987244, "step": 2880 }, { "epoch": 0.21, "learning_rate": 1.923002615631147e-06, "logits/chosen": -2.197864055633545, "logits/rejected": -1.769700288772583, "logps/chosen": -449.5767517089844, "logps/rejected": -536.9815063476562, "loss": 0.6873, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2946578562259674, "rewards/margins": 0.10272173583507538, "rewards/rejected": -0.3973795771598816, "step": 2890 }, { "epoch": 0.21, "learning_rate": 1.922008765285205e-06, "logits/chosen": -2.1408984661102295, "logits/rejected": -1.648498773574829, "logps/chosen": -380.9702453613281, "logps/rejected": -517.1094970703125, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22218433022499084, "rewards/margins": 0.1487577259540558, "rewards/rejected": -0.37094205617904663, "step": 2900 }, { "epoch": 0.21, "learning_rate": 1.9210088020897976e-06, "logits/chosen": -2.18157696723938, "logits/rejected": -2.002652645111084, "logps/chosen": -557.7557373046875, "logps/rejected": -663.537109375, "loss": 0.6828, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3620392680168152, "rewards/margins": 0.11512680351734161, "rewards/rejected": -0.477166086435318, "step": 2910 }, { "epoch": 0.22, "learning_rate": 1.920002732674607e-06, "logits/chosen": -2.1287896633148193, "logits/rejected": -1.6273607015609741, "logps/chosen": -517.1826782226562, "logps/rejected": -664.0628051757812, "loss": 0.6774, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3370513319969177, "rewards/margins": 0.19202138483524323, "rewards/rejected": -0.5290727615356445, "step": 2920 }, { "epoch": 0.22, "learning_rate": 1.9189905637097983e-06, "logits/chosen": -2.1397616863250732, "logits/rejected": -1.7486894130706787, "logps/chosen": -504.70489501953125, "logps/rejected": -606.0086669921875, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3682883679866791, "rewards/margins": 0.13075003027915955, "rewards/rejected": -0.49903836846351624, "step": 2930 }, { "epoch": 0.22, "learning_rate": 1.917972301905976e-06, "logits/chosen": -2.1539790630340576, "logits/rejected": -1.666317343711853, "logps/chosen": -552.3563232421875, "logps/rejected": -659.6040649414062, "loss": 0.6792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3875843584537506, "rewards/margins": 0.15216998755931854, "rewards/rejected": -0.5397543907165527, "step": 2940 }, { "epoch": 0.22, "learning_rate": 1.9169479540141407e-06, "logits/chosen": -2.155515432357788, "logits/rejected": -1.5311315059661865, "logps/chosen": -582.3014526367188, "logps/rejected": -696.2979736328125, "loss": 0.6781, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4387715458869934, "rewards/margins": 0.1510915458202362, "rewards/rejected": -0.5898630619049072, "step": 2950 }, { "epoch": 0.22, "learning_rate": 1.9159175268256424e-06, "logits/chosen": -2.2768607139587402, "logits/rejected": -1.801196813583374, "logps/chosen": -494.99493408203125, "logps/rejected": -647.9757080078125, "loss": 0.6807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3422335684299469, "rewards/margins": 0.16842179000377655, "rewards/rejected": -0.510655403137207, "step": 2960 }, { "epoch": 0.22, "learning_rate": 1.9148810271721365e-06, "logits/chosen": -2.194025754928589, "logits/rejected": -1.5466454029083252, "logps/chosen": -404.00946044921875, "logps/rejected": -544.4857177734375, "loss": 0.6783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2398199737071991, "rewards/margins": 0.21102336049079895, "rewards/rejected": -0.4508433938026428, "step": 2970 }, { "epoch": 0.22, "learning_rate": 1.9138384619255388e-06, "logits/chosen": -2.20519757270813, "logits/rejected": -1.6715962886810303, "logps/chosen": -484.58538818359375, "logps/rejected": -539.4605712890625, "loss": 0.6864, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.313626766204834, "rewards/margins": 0.07926097512245178, "rewards/rejected": -0.39288777112960815, "step": 2980 }, { "epoch": 0.22, "learning_rate": 1.9127898379979785e-06, "logits/chosen": -2.054356098175049, "logits/rejected": -1.6347715854644775, "logps/chosen": -412.08636474609375, "logps/rejected": -515.6576538085938, "loss": 0.6813, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28097906708717346, "rewards/margins": 0.12739433348178864, "rewards/rejected": -0.4083734154701233, "step": 2990 }, { "epoch": 0.22, "learning_rate": 1.911735162341755e-06, "logits/chosen": -2.1943540573120117, "logits/rejected": -1.8242552280426025, "logps/chosen": -437.42431640625, "logps/rejected": -505.9149475097656, "loss": 0.6869, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2753000557422638, "rewards/margins": 0.09773708134889603, "rewards/rejected": -0.3730371594429016, "step": 3000 }, { "epoch": 0.22, "learning_rate": 1.9106744419492894e-06, "logits/chosen": -2.264991283416748, "logits/rejected": -1.9455993175506592, "logps/chosen": -418.12493896484375, "logps/rejected": -508.1954650878906, "loss": 0.6838, "rewards/accuracies": 0.75, "rewards/chosen": -0.2575262188911438, "rewards/margins": 0.12060721218585968, "rewards/rejected": -0.3781334459781647, "step": 3010 }, { "epoch": 0.22, "learning_rate": 1.9096076838530786e-06, "logits/chosen": -2.3657612800598145, "logits/rejected": -1.5565624237060547, "logps/chosen": -377.0730895996094, "logps/rejected": -506.0045471191406, "loss": 0.6807, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.225591778755188, "rewards/margins": 0.1655091792345047, "rewards/rejected": -0.3911009728908539, "step": 3020 }, { "epoch": 0.22, "learning_rate": 1.908534895125649e-06, "logits/chosen": -2.105147123336792, "logits/rejected": -1.5140655040740967, "logps/chosen": -402.0430908203125, "logps/rejected": -578.5714111328125, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2557045817375183, "rewards/margins": 0.20685040950775146, "rewards/rejected": -0.46255502104759216, "step": 3030 }, { "epoch": 0.22, "learning_rate": 1.9074560828795104e-06, "logits/chosen": -2.0416293144226074, "logits/rejected": -1.4791113138198853, "logps/chosen": -427.8404235839844, "logps/rejected": -597.9266357421875, "loss": 0.6786, "rewards/accuracies": 0.75, "rewards/chosen": -0.29454824328422546, "rewards/margins": 0.208732008934021, "rewards/rejected": -0.5032802820205688, "step": 3040 }, { "epoch": 0.22, "learning_rate": 1.9063712542671078e-06, "logits/chosen": -2.0290465354919434, "logits/rejected": -2.134934186935425, "logps/chosen": -377.42413330078125, "logps/rejected": -514.2342529296875, "loss": 0.6823, "rewards/accuracies": 0.625, "rewards/chosen": -0.23370258510112762, "rewards/margins": 0.12122875452041626, "rewards/rejected": -0.3549313545227051, "step": 3050 }, { "epoch": 0.23, "learning_rate": 1.9052804164807737e-06, "logits/chosen": -2.4395174980163574, "logits/rejected": -1.838876485824585, "logps/chosen": -400.8329162597656, "logps/rejected": -493.4871520996094, "loss": 0.6779, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.20629139244556427, "rewards/margins": 0.16466112434864044, "rewards/rejected": -0.3709525167942047, "step": 3060 }, { "epoch": 0.23, "learning_rate": 1.9041835767526822e-06, "logits/chosen": -2.1876797676086426, "logits/rejected": -1.7854764461517334, "logps/chosen": -450.77716064453125, "logps/rejected": -603.3948974609375, "loss": 0.6805, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28999581933021545, "rewards/margins": 0.1584526002407074, "rewards/rejected": -0.44844841957092285, "step": 3070 }, { "epoch": 0.23, "learning_rate": 1.903080742354798e-06, "logits/chosen": -2.1836769580841064, "logits/rejected": -1.6458380222320557, "logps/chosen": -419.86175537109375, "logps/rejected": -558.9939575195312, "loss": 0.6813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24749478697776794, "rewards/margins": 0.1643299162387848, "rewards/rejected": -0.41182470321655273, "step": 3080 }, { "epoch": 0.23, "learning_rate": 1.901971920598832e-06, "logits/chosen": -2.3274083137512207, "logits/rejected": -1.6865243911743164, "logps/chosen": -501.293701171875, "logps/rejected": -634.9478759765625, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3538026809692383, "rewards/margins": 0.1824498474597931, "rewards/rejected": -0.536252498626709, "step": 3090 }, { "epoch": 0.23, "learning_rate": 1.9008571188361888e-06, "logits/chosen": -2.0796470642089844, "logits/rejected": -1.7598650455474854, "logps/chosen": -465.89910888671875, "logps/rejected": -574.8653564453125, "loss": 0.6831, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27644291520118713, "rewards/margins": 0.15571348369121552, "rewards/rejected": -0.43215641379356384, "step": 3100 }, { "epoch": 0.23, "learning_rate": 1.899736344457922e-06, "logits/chosen": -1.9877760410308838, "logits/rejected": -1.6371557712554932, "logps/chosen": -485.68408203125, "logps/rejected": -700.5065307617188, "loss": 0.6746, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3162660002708435, "rewards/margins": 0.20398874580860138, "rewards/rejected": -0.5202547311782837, "step": 3110 }, { "epoch": 0.23, "learning_rate": 1.8986096048946822e-06, "logits/chosen": -2.0560553073883057, "logits/rejected": -1.4426850080490112, "logps/chosen": -494.567138671875, "logps/rejected": -648.7655639648438, "loss": 0.6772, "rewards/accuracies": 0.75, "rewards/chosen": -0.32616931200027466, "rewards/margins": 0.19364163279533386, "rewards/rejected": -0.5198109149932861, "step": 3120 }, { "epoch": 0.23, "learning_rate": 1.8974769076166684e-06, "logits/chosen": -2.1997365951538086, "logits/rejected": -1.5459136962890625, "logps/chosen": -453.47747802734375, "logps/rejected": -576.3797607421875, "loss": 0.6764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27390947937965393, "rewards/margins": 0.19066807627677917, "rewards/rejected": -0.4645775258541107, "step": 3130 }, { "epoch": 0.23, "learning_rate": 1.8963382601335793e-06, "logits/chosen": -2.2718236446380615, "logits/rejected": -1.9487724304199219, "logps/chosen": -355.4183044433594, "logps/rejected": -510.501220703125, "loss": 0.6779, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.18386347591876984, "rewards/margins": 0.20814315974712372, "rewards/rejected": -0.39200666546821594, "step": 3140 }, { "epoch": 0.23, "learning_rate": 1.8951936699945633e-06, "logits/chosen": -2.1893093585968018, "logits/rejected": -1.8610244989395142, "logps/chosen": -511.5162048339844, "logps/rejected": -616.1066284179688, "loss": 0.6823, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3733901381492615, "rewards/margins": 0.13487958908081055, "rewards/rejected": -0.5082696676254272, "step": 3150 }, { "epoch": 0.23, "learning_rate": 1.8940431447881676e-06, "logits/chosen": -2.1516637802124023, "logits/rejected": -1.6326501369476318, "logps/chosen": -533.9788818359375, "logps/rejected": -643.1709594726562, "loss": 0.6831, "rewards/accuracies": 0.75, "rewards/chosen": -0.33893266320228577, "rewards/margins": 0.15569022297859192, "rewards/rejected": -0.4946228861808777, "step": 3160 }, { "epoch": 0.23, "learning_rate": 1.8928866921422883e-06, "logits/chosen": -2.0768840312957764, "logits/rejected": -1.6885178089141846, "logps/chosen": -553.6319580078125, "logps/rejected": -652.4114990234375, "loss": 0.6845, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3926646113395691, "rewards/margins": 0.13167552649974823, "rewards/rejected": -0.5243401527404785, "step": 3170 }, { "epoch": 0.23, "learning_rate": 1.891724319724121e-06, "logits/chosen": -2.2388126850128174, "logits/rejected": -1.7100732326507568, "logps/chosen": -467.63983154296875, "logps/rejected": -568.2694091796875, "loss": 0.6809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3158666491508484, "rewards/margins": 0.14076802134513855, "rewards/rejected": -0.45663467049598694, "step": 3180 }, { "epoch": 0.24, "learning_rate": 1.8905560352401078e-06, "logits/chosen": -2.2990920543670654, "logits/rejected": -1.9180845022201538, "logps/chosen": -462.1502990722656, "logps/rejected": -554.536376953125, "loss": 0.6833, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2593669295310974, "rewards/margins": 0.10183145850896835, "rewards/rejected": -0.36119842529296875, "step": 3190 }, { "epoch": 0.24, "learning_rate": 1.8893818464358877e-06, "logits/chosen": -2.275900363922119, "logits/rejected": -1.9424655437469482, "logps/chosen": -476.88726806640625, "logps/rejected": -607.058837890625, "loss": 0.6829, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2616773545742035, "rewards/margins": 0.16231346130371094, "rewards/rejected": -0.42399081587791443, "step": 3200 }, { "epoch": 0.24, "learning_rate": 1.8882017610962455e-06, "logits/chosen": -2.306851863861084, "logits/rejected": -1.9681587219238281, "logps/chosen": -395.8658447265625, "logps/rejected": -540.9508666992188, "loss": 0.6801, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21775195002555847, "rewards/margins": 0.1706216037273407, "rewards/rejected": -0.3883734941482544, "step": 3210 }, { "epoch": 0.24, "learning_rate": 1.887015787045059e-06, "logits/chosen": -2.216426372528076, "logits/rejected": -1.917083978652954, "logps/chosen": -447.6256408691406, "logps/rejected": -612.8665771484375, "loss": 0.6802, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2534784972667694, "rewards/margins": 0.19091734290122986, "rewards/rejected": -0.44439584016799927, "step": 3220 }, { "epoch": 0.24, "learning_rate": 1.8858239321452477e-06, "logits/chosen": -2.1315035820007324, "logits/rejected": -1.349755883216858, "logps/chosen": -471.4364318847656, "logps/rejected": -675.3856811523438, "loss": 0.6812, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3014078736305237, "rewards/margins": 0.23902232944965363, "rewards/rejected": -0.5404302477836609, "step": 3230 }, { "epoch": 0.24, "learning_rate": 1.8846262042987213e-06, "logits/chosen": -2.3818116188049316, "logits/rejected": -1.7089860439300537, "logps/chosen": -468.42694091796875, "logps/rejected": -579.0781860351562, "loss": 0.6793, "rewards/accuracies": 0.75, "rewards/chosen": -0.27350351214408875, "rewards/margins": 0.18680444359779358, "rewards/rejected": -0.4603079855442047, "step": 3240 }, { "epoch": 0.24, "learning_rate": 1.8834226114463258e-06, "logits/chosen": -2.1591928005218506, "logits/rejected": -1.6002795696258545, "logps/chosen": -423.29180908203125, "logps/rejected": -573.6384887695312, "loss": 0.6823, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28067439794540405, "rewards/margins": 0.18121960759162903, "rewards/rejected": -0.46189403533935547, "step": 3250 }, { "epoch": 0.24, "learning_rate": 1.8822131615677922e-06, "logits/chosen": -2.0326995849609375, "logits/rejected": -1.5907227993011475, "logps/chosen": -420.341552734375, "logps/rejected": -604.4678344726562, "loss": 0.6817, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.25137898325920105, "rewards/margins": 0.1764204055070877, "rewards/rejected": -0.42779937386512756, "step": 3260 }, { "epoch": 0.24, "learning_rate": 1.8809978626816832e-06, "logits/chosen": -2.1845452785491943, "logits/rejected": -1.6516386270523071, "logps/chosen": -569.3966674804688, "logps/rejected": -703.9217529296875, "loss": 0.678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3624291718006134, "rewards/margins": 0.181131511926651, "rewards/rejected": -0.5435606837272644, "step": 3270 }, { "epoch": 0.24, "learning_rate": 1.87977672284534e-06, "logits/chosen": -2.066312551498413, "logits/rejected": -1.581826090812683, "logps/chosen": -525.1586303710938, "logps/rejected": -690.1304321289062, "loss": 0.6782, "rewards/accuracies": 0.75, "rewards/chosen": -0.35725125670433044, "rewards/margins": 0.20405729115009308, "rewards/rejected": -0.5613085627555847, "step": 3280 }, { "epoch": 0.24, "learning_rate": 1.8785497501548289e-06, "logits/chosen": -2.236414909362793, "logits/rejected": -1.7572968006134033, "logps/chosen": -396.40496826171875, "logps/rejected": -516.777587890625, "loss": 0.6782, "rewards/accuracies": 0.625, "rewards/chosen": -0.25678423047065735, "rewards/margins": 0.157109335064888, "rewards/rejected": -0.41389361023902893, "step": 3290 }, { "epoch": 0.24, "learning_rate": 1.877316952744887e-06, "logits/chosen": -2.2805285453796387, "logits/rejected": -1.9164774417877197, "logps/chosen": -491.749755859375, "logps/rejected": -554.29443359375, "loss": 0.6832, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3092234134674072, "rewards/margins": 0.10072050988674164, "rewards/rejected": -0.40994390845298767, "step": 3300 }, { "epoch": 0.24, "learning_rate": 1.87607833878887e-06, "logits/chosen": -2.0511462688446045, "logits/rejected": -1.7306734323501587, "logps/chosen": -528.37353515625, "logps/rejected": -659.133056640625, "loss": 0.6795, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3274194896221161, "rewards/margins": 0.16418960690498352, "rewards/rejected": -0.4916090965270996, "step": 3310 }, { "epoch": 0.24, "learning_rate": 1.874833916498696e-06, "logits/chosen": -2.1535844802856445, "logits/rejected": -1.5973514318466187, "logps/chosen": -500.28411865234375, "logps/rejected": -629.2081909179688, "loss": 0.6826, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34470513463020325, "rewards/margins": 0.17242224514484406, "rewards/rejected": -0.5171273946762085, "step": 3320 }, { "epoch": 0.25, "learning_rate": 1.8735836941247918e-06, "logits/chosen": -2.1188108921051025, "logits/rejected": -1.6151816844940186, "logps/chosen": -452.9765625, "logps/rejected": -569.5828247070312, "loss": 0.6823, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.27020031213760376, "rewards/margins": 0.1729101985692978, "rewards/rejected": -0.44311046600341797, "step": 3330 }, { "epoch": 0.25, "learning_rate": 1.872327679956039e-06, "logits/chosen": -2.281315803527832, "logits/rejected": -2.011960506439209, "logps/chosen": -363.4384765625, "logps/rejected": -446.4459533691406, "loss": 0.6817, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.20521073043346405, "rewards/margins": 0.09956765919923782, "rewards/rejected": -0.30477839708328247, "step": 3340 }, { "epoch": 0.25, "learning_rate": 1.8710658823197178e-06, "logits/chosen": -2.178260087966919, "logits/rejected": -1.5344918966293335, "logps/chosen": -368.27618408203125, "logps/rejected": -515.8880615234375, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.18563881516456604, "rewards/margins": 0.20653435587882996, "rewards/rejected": -0.3921731412410736, "step": 3350 }, { "epoch": 0.25, "learning_rate": 1.8697983095814527e-06, "logits/chosen": -2.2567574977874756, "logits/rejected": -1.8783210515975952, "logps/chosen": -407.43768310546875, "logps/rejected": -576.4716796875, "loss": 0.6769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24289360642433167, "rewards/margins": 0.20078220963478088, "rewards/rejected": -0.44367581605911255, "step": 3360 }, { "epoch": 0.25, "learning_rate": 1.8685249701451568e-06, "logits/chosen": -2.2157158851623535, "logits/rejected": -1.7176374197006226, "logps/chosen": -315.6075134277344, "logps/rejected": -444.6148986816406, "loss": 0.683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14777794480323792, "rewards/margins": 0.16888603568077087, "rewards/rejected": -0.3166639804840088, "step": 3370 }, { "epoch": 0.25, "learning_rate": 1.867245872452975e-06, "logits/chosen": -2.202772617340088, "logits/rejected": -1.7215312719345093, "logps/chosen": -363.6238098144531, "logps/rejected": -527.775390625, "loss": 0.6794, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20758096873760223, "rewards/margins": 0.19644469022750854, "rewards/rejected": -0.40402570366859436, "step": 3380 }, { "epoch": 0.25, "learning_rate": 1.865961024985231e-06, "logits/chosen": -2.2822372913360596, "logits/rejected": -1.6779611110687256, "logps/chosen": -269.3408508300781, "logps/rejected": -354.0631408691406, "loss": 0.6855, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11017131805419922, "rewards/margins": 0.1118772029876709, "rewards/rejected": -0.22204852104187012, "step": 3390 }, { "epoch": 0.25, "learning_rate": 1.8646704362603667e-06, "logits/chosen": -1.9833332300186157, "logits/rejected": -1.6525599956512451, "logps/chosen": -275.6925964355469, "logps/rejected": -417.490966796875, "loss": 0.683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.11261290311813354, "rewards/margins": 0.14060403406620026, "rewards/rejected": -0.2532169222831726, "step": 3400 }, { "epoch": 0.25, "learning_rate": 1.8633741148348896e-06, "logits/chosen": -2.3288495540618896, "logits/rejected": -1.915209174156189, "logps/chosen": -333.0775146484375, "logps/rejected": -435.3648986816406, "loss": 0.6815, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1905587613582611, "rewards/margins": 0.13556984066963196, "rewards/rejected": -0.32612860202789307, "step": 3410 }, { "epoch": 0.25, "learning_rate": 1.8620720693033146e-06, "logits/chosen": -2.173358678817749, "logits/rejected": -1.5056231021881104, "logps/chosen": -377.93292236328125, "logps/rejected": -557.4208374023438, "loss": 0.674, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.22213120758533478, "rewards/margins": 0.23569905757904053, "rewards/rejected": -0.4578302502632141, "step": 3420 }, { "epoch": 0.25, "learning_rate": 1.8607643082981062e-06, "logits/chosen": -1.9603687524795532, "logits/rejected": -1.5679285526275635, "logps/chosen": -471.6044006347656, "logps/rejected": -614.069091796875, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3327406346797943, "rewards/margins": 0.1443999707698822, "rewards/rejected": -0.4771406054496765, "step": 3430 }, { "epoch": 0.25, "learning_rate": 1.8594508404896224e-06, "logits/chosen": -2.277125358581543, "logits/rejected": -1.8673824071884155, "logps/chosen": -412.68359375, "logps/rejected": -603.1144409179688, "loss": 0.6763, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24241769313812256, "rewards/margins": 0.20567169785499573, "rewards/rejected": -0.4480893611907959, "step": 3440 }, { "epoch": 0.25, "learning_rate": 1.8581316745860574e-06, "logits/chosen": -2.1190567016601562, "logits/rejected": -1.6675297021865845, "logps/chosen": -461.33575439453125, "logps/rejected": -613.1353759765625, "loss": 0.6774, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2747267186641693, "rewards/margins": 0.18219716846942902, "rewards/rejected": -0.4569239020347595, "step": 3450 }, { "epoch": 0.26, "learning_rate": 1.8568068193333829e-06, "logits/chosen": -2.3937952518463135, "logits/rejected": -1.6431299448013306, "logps/chosen": -428.59454345703125, "logps/rejected": -572.0279541015625, "loss": 0.6761, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22817477583885193, "rewards/margins": 0.21848170459270477, "rewards/rejected": -0.4466565251350403, "step": 3460 }, { "epoch": 0.26, "learning_rate": 1.8554762835152903e-06, "logits/chosen": -2.350799560546875, "logits/rejected": -1.812771201133728, "logps/chosen": -503.7105407714844, "logps/rejected": -618.2540283203125, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3287715017795563, "rewards/margins": 0.15721328556537628, "rewards/rejected": -0.485984742641449, "step": 3470 }, { "epoch": 0.26, "learning_rate": 1.8541400759531329e-06, "logits/chosen": -2.136531352996826, "logits/rejected": -1.7000839710235596, "logps/chosen": -536.6117553710938, "logps/rejected": -665.7025756835938, "loss": 0.6806, "rewards/accuracies": 0.75, "rewards/chosen": -0.3613160252571106, "rewards/margins": 0.15201520919799805, "rewards/rejected": -0.5133312344551086, "step": 3480 }, { "epoch": 0.26, "learning_rate": 1.8527982055058678e-06, "logits/chosen": -2.2240211963653564, "logits/rejected": -1.6533706188201904, "logps/chosen": -494.8193359375, "logps/rejected": -629.0800170898438, "loss": 0.6747, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33604860305786133, "rewards/margins": 0.17029063403606415, "rewards/rejected": -0.5063392519950867, "step": 3490 }, { "epoch": 0.26, "learning_rate": 1.851450681069996e-06, "logits/chosen": -2.188687562942505, "logits/rejected": -1.6220413446426392, "logps/chosen": -497.95513916015625, "logps/rejected": -663.9769287109375, "loss": 0.6787, "rewards/accuracies": 0.75, "rewards/chosen": -0.34961333870887756, "rewards/margins": 0.19753897190093994, "rewards/rejected": -0.5471522808074951, "step": 3500 }, { "epoch": 0.26, "learning_rate": 1.8500975115795038e-06, "logits/chosen": -2.301785945892334, "logits/rejected": -1.4786956310272217, "logps/chosen": -575.890869140625, "logps/rejected": -742.2033081054688, "loss": 0.6756, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3994269371032715, "rewards/margins": 0.21259137988090515, "rewards/rejected": -0.6120182871818542, "step": 3510 }, { "epoch": 0.26, "learning_rate": 1.848738706005805e-06, "logits/chosen": -1.770435094833374, "logits/rejected": -1.6196300983428955, "logps/chosen": -553.7947998046875, "logps/rejected": -705.3516235351562, "loss": 0.6776, "rewards/accuracies": 0.75, "rewards/chosen": -0.4058055877685547, "rewards/margins": 0.17214584350585938, "rewards/rejected": -0.5779514312744141, "step": 3520 }, { "epoch": 0.26, "learning_rate": 1.8473742733576783e-06, "logits/chosen": -2.160247564315796, "logits/rejected": -1.6814651489257812, "logps/chosen": -459.6809997558594, "logps/rejected": -642.7891845703125, "loss": 0.6736, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3377230763435364, "rewards/margins": 0.22060656547546387, "rewards/rejected": -0.5583296418190002, "step": 3530 }, { "epoch": 0.26, "learning_rate": 1.8460042226812113e-06, "logits/chosen": -2.100548267364502, "logits/rejected": -1.3630101680755615, "logps/chosen": -454.73052978515625, "logps/rejected": -620.87841796875, "loss": 0.6796, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26816755533218384, "rewards/margins": 0.21168629825115204, "rewards/rejected": -0.47985386848449707, "step": 3540 }, { "epoch": 0.26, "learning_rate": 1.844628563059738e-06, "logits/chosen": -2.272909164428711, "logits/rejected": -1.9529247283935547, "logps/chosen": -345.3569641113281, "logps/rejected": -479.5646057128906, "loss": 0.6831, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.164809912443161, "rewards/margins": 0.13782036304473877, "rewards/rejected": -0.3026302456855774, "step": 3550 }, { "epoch": 0.26, "learning_rate": 1.8432473036137789e-06, "logits/chosen": -2.114405632019043, "logits/rejected": -1.6197696924209595, "logps/chosen": -419.6341247558594, "logps/rejected": -584.8107299804688, "loss": 0.672, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22013385593891144, "rewards/margins": 0.20978549122810364, "rewards/rejected": -0.4299193322658539, "step": 3560 }, { "epoch": 0.26, "learning_rate": 1.8418604535009813e-06, "logits/chosen": -1.828354835510254, "logits/rejected": -1.5221083164215088, "logps/chosen": -520.0449829101562, "logps/rejected": -631.411376953125, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3795250952243805, "rewards/margins": 0.13715405762195587, "rewards/rejected": -0.5166791081428528, "step": 3570 }, { "epoch": 0.26, "learning_rate": 1.8404680219160587e-06, "logits/chosen": -2.174349308013916, "logits/rejected": -1.670850396156311, "logps/chosen": -518.0616455078125, "logps/rejected": -709.1240234375, "loss": 0.6754, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3573710322380066, "rewards/margins": 0.23356160521507263, "rewards/rejected": -0.5909326672554016, "step": 3580 }, { "epoch": 0.26, "learning_rate": 1.8390700180907284e-06, "logits/chosen": -2.119563102722168, "logits/rejected": -1.7936102151870728, "logps/chosen": -514.925048828125, "logps/rejected": -586.8099365234375, "loss": 0.6825, "rewards/accuracies": 0.625, "rewards/chosen": -0.37554794549942017, "rewards/margins": 0.11262337118387222, "rewards/rejected": -0.488171249628067, "step": 3590 }, { "epoch": 0.27, "learning_rate": 1.8376664512936512e-06, "logits/chosen": -2.1899209022521973, "logits/rejected": -1.7174314260482788, "logps/chosen": -543.5714111328125, "logps/rejected": -683.0172119140625, "loss": 0.6814, "rewards/accuracies": 0.625, "rewards/chosen": -0.37990066409111023, "rewards/margins": 0.1929222196340561, "rewards/rejected": -0.5728229284286499, "step": 3600 }, { "epoch": 0.27, "learning_rate": 1.8362573308303717e-06, "logits/chosen": -2.224994659423828, "logits/rejected": -1.7353214025497437, "logps/chosen": -566.430908203125, "logps/rejected": -714.4229736328125, "loss": 0.6755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.38946858048439026, "rewards/margins": 0.1967330127954483, "rewards/rejected": -0.586201548576355, "step": 3610 }, { "epoch": 0.27, "learning_rate": 1.8348426660432526e-06, "logits/chosen": -2.1247034072875977, "logits/rejected": -1.6833877563476562, "logps/chosen": -612.07080078125, "logps/rejected": -734.741943359375, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40646082162857056, "rewards/margins": 0.1772623062133789, "rewards/rejected": -0.5837231278419495, "step": 3620 }, { "epoch": 0.27, "learning_rate": 1.8334224663114166e-06, "logits/chosen": -2.200002908706665, "logits/rejected": -1.592591643333435, "logps/chosen": -542.77734375, "logps/rejected": -757.78271484375, "loss": 0.6735, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3589392304420471, "rewards/margins": 0.27085310220718384, "rewards/rejected": -0.629792332649231, "step": 3630 }, { "epoch": 0.27, "learning_rate": 1.8319967410506824e-06, "logits/chosen": -2.1502890586853027, "logits/rejected": -1.582765817642212, "logps/chosen": -487.6127014160156, "logps/rejected": -708.6649169921875, "loss": 0.6747, "rewards/accuracies": 0.75, "rewards/chosen": -0.3207107186317444, "rewards/margins": 0.24090635776519775, "rewards/rejected": -0.5616170763969421, "step": 3640 }, { "epoch": 0.27, "learning_rate": 1.8305654997135012e-06, "logits/chosen": -2.344989061355591, "logits/rejected": -1.9361757040023804, "logps/chosen": -460.01043701171875, "logps/rejected": -535.62646484375, "loss": 0.6894, "rewards/accuracies": 0.625, "rewards/chosen": -0.2555355131626129, "rewards/margins": 0.1098545640707016, "rewards/rejected": -0.3653901219367981, "step": 3650 }, { "epoch": 0.27, "learning_rate": 1.8291287517888979e-06, "logits/chosen": -2.1642680168151855, "logits/rejected": -1.7833521366119385, "logps/chosen": -351.78045654296875, "logps/rejected": -478.5384826660156, "loss": 0.6824, "rewards/accuracies": 0.625, "rewards/chosen": -0.21502408385276794, "rewards/margins": 0.1356123983860016, "rewards/rejected": -0.35063648223876953, "step": 3660 }, { "epoch": 0.27, "learning_rate": 1.8276865068024035e-06, "logits/chosen": -2.244497776031494, "logits/rejected": -1.4874540567398071, "logps/chosen": -392.787353515625, "logps/rejected": -553.6170654296875, "loss": 0.6715, "rewards/accuracies": 0.75, "rewards/chosen": -0.23025837540626526, "rewards/margins": 0.20502617955207825, "rewards/rejected": -0.4352845251560211, "step": 3670 }, { "epoch": 0.27, "learning_rate": 1.8262387743159948e-06, "logits/chosen": -2.196091413497925, "logits/rejected": -1.6632823944091797, "logps/chosen": -453.5072326660156, "logps/rejected": -612.3277587890625, "loss": 0.6791, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32509034872055054, "rewards/margins": 0.16454902291297913, "rewards/rejected": -0.48963937163352966, "step": 3680 }, { "epoch": 0.27, "learning_rate": 1.8247855639280308e-06, "logits/chosen": -2.184633493423462, "logits/rejected": -1.5168755054473877, "logps/chosen": -436.63055419921875, "logps/rejected": -561.3179931640625, "loss": 0.6877, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2765442728996277, "rewards/margins": 0.18030035495758057, "rewards/rejected": -0.45684465765953064, "step": 3690 }, { "epoch": 0.27, "learning_rate": 1.8233268852731878e-06, "logits/chosen": -2.057413101196289, "logits/rejected": -1.6107254028320312, "logps/chosen": -466.52166748046875, "logps/rejected": -672.2536010742188, "loss": 0.6787, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30314168334007263, "rewards/margins": 0.21430103480815887, "rewards/rejected": -0.5174427032470703, "step": 3700 }, { "epoch": 0.27, "learning_rate": 1.821862748022397e-06, "logits/chosen": -2.26648211479187, "logits/rejected": -1.7409303188323975, "logps/chosen": -494.6626892089844, "logps/rejected": -674.7767944335938, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33277279138565063, "rewards/margins": 0.1746859848499298, "rewards/rejected": -0.5074588060379028, "step": 3710 }, { "epoch": 0.27, "learning_rate": 1.8203931618827793e-06, "logits/chosen": -2.082948923110962, "logits/rejected": -1.50017249584198, "logps/chosen": -484.67315673828125, "logps/rejected": -620.4537353515625, "loss": 0.6753, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3300681412220001, "rewards/margins": 0.20017755031585693, "rewards/rejected": -0.5302456617355347, "step": 3720 }, { "epoch": 0.28, "learning_rate": 1.8189181365975811e-06, "logits/chosen": -1.8513768911361694, "logits/rejected": -1.851190209388733, "logps/chosen": -482.638671875, "logps/rejected": -574.6032104492188, "loss": 0.6878, "rewards/accuracies": 0.625, "rewards/chosen": -0.37790215015411377, "rewards/margins": 0.0822509154677391, "rewards/rejected": -0.4601530432701111, "step": 3730 }, { "epoch": 0.28, "learning_rate": 1.8174376819461106e-06, "logits/chosen": -2.071769952774048, "logits/rejected": -1.7234508991241455, "logps/chosen": -407.94818115234375, "logps/rejected": -577.9144287109375, "loss": 0.6795, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2519489824771881, "rewards/margins": 0.1624341756105423, "rewards/rejected": -0.4143831133842468, "step": 3740 }, { "epoch": 0.28, "learning_rate": 1.8159518077436717e-06, "logits/chosen": -2.0386135578155518, "logits/rejected": -1.9060420989990234, "logps/chosen": -384.38897705078125, "logps/rejected": -502.99755859375, "loss": 0.6818, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.21941158175468445, "rewards/margins": 0.1418587863445282, "rewards/rejected": -0.36127039790153503, "step": 3750 }, { "epoch": 0.28, "learning_rate": 1.8144605238415e-06, "logits/chosen": -2.305105447769165, "logits/rejected": -1.6421782970428467, "logps/chosen": -475.2816467285156, "logps/rejected": -609.8496704101562, "loss": 0.6807, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3003297448158264, "rewards/margins": 0.18115052580833435, "rewards/rejected": -0.48148030042648315, "step": 3760 }, { "epoch": 0.28, "learning_rate": 1.8129638401266962e-06, "logits/chosen": -2.1796767711639404, "logits/rejected": -1.6100256443023682, "logps/chosen": -545.9366455078125, "logps/rejected": -662.8981323242188, "loss": 0.6834, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37525978684425354, "rewards/margins": 0.1478993445634842, "rewards/rejected": -0.5231590867042542, "step": 3770 }, { "epoch": 0.28, "learning_rate": 1.811461766522162e-06, "logits/chosen": -2.1486411094665527, "logits/rejected": -1.6779826879501343, "logps/chosen": -468.40740966796875, "logps/rejected": -607.7574462890625, "loss": 0.6753, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32366281747817993, "rewards/margins": 0.17954519391059875, "rewards/rejected": -0.5032079815864563, "step": 3780 }, { "epoch": 0.28, "learning_rate": 1.8099543129865343e-06, "logits/chosen": -2.0680243968963623, "logits/rejected": -1.624258041381836, "logps/chosen": -459.97576904296875, "logps/rejected": -664.1590576171875, "loss": 0.677, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32371994853019714, "rewards/margins": 0.21814627945423126, "rewards/rejected": -0.5418661832809448, "step": 3790 }, { "epoch": 0.28, "learning_rate": 1.808441489514117e-06, "logits/chosen": -2.3361058235168457, "logits/rejected": -1.7900335788726807, "logps/chosen": -542.5493774414062, "logps/rejected": -676.4155883789062, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3946964740753174, "rewards/margins": 0.15905709564685822, "rewards/rejected": -0.5537535548210144, "step": 3800 }, { "epoch": 0.28, "learning_rate": 1.8069233061348172e-06, "logits/chosen": -2.219559669494629, "logits/rejected": -1.8813817501068115, "logps/chosen": -466.25311279296875, "logps/rejected": -599.365234375, "loss": 0.677, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2573816776275635, "rewards/margins": 0.1765788048505783, "rewards/rejected": -0.433960497379303, "step": 3810 }, { "epoch": 0.28, "learning_rate": 1.805399772914078e-06, "logits/chosen": -2.1704041957855225, "logits/rejected": -1.6930497884750366, "logps/chosen": -519.326416015625, "logps/rejected": -735.81298828125, "loss": 0.6743, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35828307271003723, "rewards/margins": 0.20819079875946045, "rewards/rejected": -0.5664738416671753, "step": 3820 }, { "epoch": 0.28, "learning_rate": 1.8038708999528106e-06, "logits/chosen": -2.377058982849121, "logits/rejected": -2.008300542831421, "logps/chosen": -440.29345703125, "logps/rejected": -547.1416625976562, "loss": 0.6824, "rewards/accuracies": 0.625, "rewards/chosen": -0.2734847366809845, "rewards/margins": 0.1133466586470604, "rewards/rejected": -0.3868314027786255, "step": 3830 }, { "epoch": 0.28, "learning_rate": 1.8023366973873299e-06, "logits/chosen": -2.2230992317199707, "logits/rejected": -1.6204019784927368, "logps/chosen": -525.0546264648438, "logps/rejected": -661.845458984375, "loss": 0.6817, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.34012648463249207, "rewards/margins": 0.18068328499794006, "rewards/rejected": -0.5208097696304321, "step": 3840 }, { "epoch": 0.28, "learning_rate": 1.8007971753892836e-06, "logits/chosen": -1.9409358501434326, "logits/rejected": -1.7351688146591187, "logps/chosen": -578.9671020507812, "logps/rejected": -733.5281982421875, "loss": 0.6812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4219551086425781, "rewards/margins": 0.16463562846183777, "rewards/rejected": -0.5865907669067383, "step": 3850 }, { "epoch": 0.28, "learning_rate": 1.799252344165589e-06, "logits/chosen": -2.1604971885681152, "logits/rejected": -1.6232223510742188, "logps/chosen": -538.1886596679688, "logps/rejected": -625.3087158203125, "loss": 0.6802, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33763736486434937, "rewards/margins": 0.14131012558937073, "rewards/rejected": -0.4789474904537201, "step": 3860 }, { "epoch": 0.29, "learning_rate": 1.797702213958361e-06, "logits/chosen": -2.3919167518615723, "logits/rejected": -1.7761833667755127, "logps/chosen": -417.01153564453125, "logps/rejected": -642.82177734375, "loss": 0.6724, "rewards/accuracies": 0.75, "rewards/chosen": -0.2808604836463928, "rewards/margins": 0.2472628355026245, "rewards/rejected": -0.5281232595443726, "step": 3870 }, { "epoch": 0.29, "learning_rate": 1.7961467950448484e-06, "logits/chosen": -2.063586711883545, "logits/rejected": -1.5861233472824097, "logps/chosen": -507.21258544921875, "logps/rejected": -640.3582153320312, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32451337575912476, "rewards/margins": 0.16722214221954346, "rewards/rejected": -0.4917355477809906, "step": 3880 }, { "epoch": 0.29, "learning_rate": 1.7945860977373619e-06, "logits/chosen": -2.086826801300049, "logits/rejected": -1.4113986492156982, "logps/chosen": -522.3760986328125, "logps/rejected": -714.2114868164062, "loss": 0.6741, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36156660318374634, "rewards/margins": 0.23775120079517365, "rewards/rejected": -0.5993177890777588, "step": 3890 }, { "epoch": 0.29, "learning_rate": 1.7930201323832094e-06, "logits/chosen": -2.185270309448242, "logits/rejected": -1.8300464153289795, "logps/chosen": -513.3651123046875, "logps/rejected": -641.8380737304688, "loss": 0.6811, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.29074206948280334, "rewards/margins": 0.17478156089782715, "rewards/rejected": -0.4655235707759857, "step": 3900 }, { "epoch": 0.29, "learning_rate": 1.7914489093646237e-06, "logits/chosen": -1.989579200744629, "logits/rejected": -1.5804967880249023, "logps/chosen": -599.8699340820312, "logps/rejected": -776.3284912109375, "loss": 0.6783, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4398646354675293, "rewards/margins": 0.20449014008045197, "rewards/rejected": -0.6443547606468201, "step": 3910 }, { "epoch": 0.29, "learning_rate": 1.789872439098697e-06, "logits/chosen": -2.297780990600586, "logits/rejected": -1.99337899684906, "logps/chosen": -579.8453979492188, "logps/rejected": -723.2843017578125, "loss": 0.6842, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3962760269641876, "rewards/margins": 0.1594689041376114, "rewards/rejected": -0.5557448863983154, "step": 3920 }, { "epoch": 0.29, "learning_rate": 1.7882907320373094e-06, "logits/chosen": -1.9565351009368896, "logits/rejected": -1.7975467443466187, "logps/chosen": -587.4010620117188, "logps/rejected": -711.1478881835938, "loss": 0.6806, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.421872079372406, "rewards/margins": 0.15700072050094604, "rewards/rejected": -0.5788728594779968, "step": 3930 }, { "epoch": 0.29, "learning_rate": 1.7867037986670615e-06, "logits/chosen": -2.1219065189361572, "logits/rejected": -1.9457588195800781, "logps/chosen": -459.93878173828125, "logps/rejected": -565.8150024414062, "loss": 0.6842, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31133121252059937, "rewards/margins": 0.1255917251110077, "rewards/rejected": -0.43692296743392944, "step": 3940 }, { "epoch": 0.29, "learning_rate": 1.7851116495092028e-06, "logits/chosen": -1.9448798894882202, "logits/rejected": -1.2336145639419556, "logps/chosen": -588.9042358398438, "logps/rejected": -722.6289672851562, "loss": 0.683, "rewards/accuracies": 0.75, "rewards/chosen": -0.4171927571296692, "rewards/margins": 0.18712499737739563, "rewards/rejected": -0.6043176651000977, "step": 3950 }, { "epoch": 0.29, "learning_rate": 1.783514295119564e-06, "logits/chosen": -2.0796303749084473, "logits/rejected": -1.59702467918396, "logps/chosen": -503.8788146972656, "logps/rejected": -639.061279296875, "loss": 0.683, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31948181986808777, "rewards/margins": 0.17538881301879883, "rewards/rejected": -0.49487072229385376, "step": 3960 }, { "epoch": 0.29, "learning_rate": 1.781911746088486e-06, "logits/chosen": -2.049903631210327, "logits/rejected": -1.5173251628875732, "logps/chosen": -589.2283935546875, "logps/rejected": -788.1104736328125, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -0.450288861989975, "rewards/margins": 0.21096885204315186, "rewards/rejected": -0.6612577438354492, "step": 3970 }, { "epoch": 0.29, "learning_rate": 1.780304013040749e-06, "logits/chosen": -2.0252816677093506, "logits/rejected": -1.495496392250061, "logps/chosen": -494.3642578125, "logps/rejected": -637.5883178710938, "loss": 0.6778, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36198824644088745, "rewards/margins": 0.17119598388671875, "rewards/rejected": -0.5331842303276062, "step": 3980 }, { "epoch": 0.29, "learning_rate": 1.7786911066355038e-06, "logits/chosen": -2.2881853580474854, "logits/rejected": -1.8169502019882202, "logps/chosen": -468.18499755859375, "logps/rejected": -607.6126708984375, "loss": 0.6773, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2602108120918274, "rewards/margins": 0.1857840120792389, "rewards/rejected": -0.4459947943687439, "step": 3990 }, { "epoch": 0.3, "learning_rate": 1.7770730375661996e-06, "logits/chosen": -2.093794345855713, "logits/rejected": -1.6662814617156982, "logps/chosen": -537.5603637695312, "logps/rejected": -658.3980712890625, "loss": 0.6836, "rewards/accuracies": 0.625, "rewards/chosen": -0.395205557346344, "rewards/margins": 0.13812445104122162, "rewards/rejected": -0.5333299040794373, "step": 4000 }, { "epoch": 0.3, "learning_rate": 1.7754498165605137e-06, "logits/chosen": -2.123521089553833, "logits/rejected": -1.8673675060272217, "logps/chosen": -531.849365234375, "logps/rejected": -615.1085205078125, "loss": 0.6858, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35496771335601807, "rewards/margins": 0.11298409849405289, "rewards/rejected": -0.46795183420181274, "step": 4010 }, { "epoch": 0.3, "learning_rate": 1.7738214543802807e-06, "logits/chosen": -2.144113779067993, "logits/rejected": -1.9066448211669922, "logps/chosen": -461.445556640625, "logps/rejected": -532.9085083007812, "loss": 0.684, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3193840980529785, "rewards/margins": 0.0860183835029602, "rewards/rejected": -0.40540242195129395, "step": 4020 }, { "epoch": 0.3, "learning_rate": 1.7721879618214208e-06, "logits/chosen": -1.9446882009506226, "logits/rejected": -1.5558723211288452, "logps/chosen": -462.38543701171875, "logps/rejected": -595.52001953125, "loss": 0.6757, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2922886312007904, "rewards/margins": 0.18245439231395721, "rewards/rejected": -0.47474297881126404, "step": 4030 }, { "epoch": 0.3, "learning_rate": 1.770549349713868e-06, "logits/chosen": -1.984279990196228, "logits/rejected": -1.5509977340698242, "logps/chosen": -408.73736572265625, "logps/rejected": -630.3936767578125, "loss": 0.6734, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2710419297218323, "rewards/margins": 0.2583908438682556, "rewards/rejected": -0.5294327735900879, "step": 4040 }, { "epoch": 0.3, "learning_rate": 1.7689056289214982e-06, "logits/chosen": -2.070711612701416, "logits/rejected": -1.6969900131225586, "logps/chosen": -537.2640991210938, "logps/rejected": -671.4686279296875, "loss": 0.686, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4176475405693054, "rewards/margins": 0.1399337351322174, "rewards/rejected": -0.5575813055038452, "step": 4050 }, { "epoch": 0.3, "learning_rate": 1.767256810342058e-06, "logits/chosen": -1.9355144500732422, "logits/rejected": -1.5486150979995728, "logps/chosen": -555.4055786132812, "logps/rejected": -718.777587890625, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.40205293893814087, "rewards/margins": 0.18269900977611542, "rewards/rejected": -0.5847519636154175, "step": 4060 }, { "epoch": 0.3, "learning_rate": 1.765602904907092e-06, "logits/chosen": -2.2658398151397705, "logits/rejected": -1.5616555213928223, "logps/chosen": -521.5490112304688, "logps/rejected": -669.2052001953125, "loss": 0.6768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2870790958404541, "rewards/margins": 0.20861539244651794, "rewards/rejected": -0.49569448828697205, "step": 4070 }, { "epoch": 0.3, "learning_rate": 1.76394392358187e-06, "logits/chosen": -2.0604186058044434, "logits/rejected": -1.3372647762298584, "logps/chosen": -477.74298095703125, "logps/rejected": -652.2027587890625, "loss": 0.6818, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3213496208190918, "rewards/margins": 0.24479933083057404, "rewards/rejected": -0.5661489367485046, "step": 4080 }, { "epoch": 0.3, "learning_rate": 1.7622798773653147e-06, "logits/chosen": -1.9646533727645874, "logits/rejected": -1.4652903079986572, "logps/chosen": -467.997802734375, "logps/rejected": -595.4482421875, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32150495052337646, "rewards/margins": 0.15941481292247772, "rewards/rejected": -0.480919748544693, "step": 4090 }, { "epoch": 0.3, "learning_rate": 1.7606107772899285e-06, "logits/chosen": -2.1648483276367188, "logits/rejected": -1.6774685382843018, "logps/chosen": -418.1348571777344, "logps/rejected": -588.9632568359375, "loss": 0.6791, "rewards/accuracies": 0.75, "rewards/chosen": -0.2817802131175995, "rewards/margins": 0.16068518161773682, "rewards/rejected": -0.4424653947353363, "step": 4100 }, { "epoch": 0.3, "learning_rate": 1.7589366344217209e-06, "logits/chosen": -2.170992136001587, "logits/rejected": -1.6958091259002686, "logps/chosen": -505.0699768066406, "logps/rejected": -663.721923828125, "loss": 0.6821, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3395533263683319, "rewards/margins": 0.18259796500205994, "rewards/rejected": -0.5221512913703918, "step": 4110 }, { "epoch": 0.3, "learning_rate": 1.7572574598601343e-06, "logits/chosen": -2.3483119010925293, "logits/rejected": -1.7402360439300537, "logps/chosen": -568.6649169921875, "logps/rejected": -700.5016479492188, "loss": 0.6796, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37087783217430115, "rewards/margins": 0.20340189337730408, "rewards/rejected": -0.57427978515625, "step": 4120 }, { "epoch": 0.3, "learning_rate": 1.7555732647379707e-06, "logits/chosen": -2.237067222595215, "logits/rejected": -1.727752923965454, "logps/chosen": -489.348388671875, "logps/rejected": -610.6319580078125, "loss": 0.6791, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31350192427635193, "rewards/margins": 0.16762886941432953, "rewards/rejected": -0.48113077878952026, "step": 4130 }, { "epoch": 0.31, "learning_rate": 1.753884060221319e-06, "logits/chosen": -2.122357130050659, "logits/rejected": -1.9452130794525146, "logps/chosen": -519.1387939453125, "logps/rejected": -662.7869873046875, "loss": 0.6817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3379341959953308, "rewards/margins": 0.14725303649902344, "rewards/rejected": -0.48518723249435425, "step": 4140 }, { "epoch": 0.31, "learning_rate": 1.752189857509479e-06, "logits/chosen": -2.1590523719787598, "logits/rejected": -1.688349723815918, "logps/chosen": -406.0447082519531, "logps/rejected": -558.8823852539062, "loss": 0.6772, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20851263403892517, "rewards/margins": 0.19289086759090424, "rewards/rejected": -0.4014035165309906, "step": 4150 }, { "epoch": 0.31, "learning_rate": 1.7504906678348883e-06, "logits/chosen": -2.3320469856262207, "logits/rejected": -2.1057870388031006, "logps/chosen": -428.6915588378906, "logps/rejected": -492.50994873046875, "loss": 0.6859, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2531988024711609, "rewards/margins": 0.060397446155548096, "rewards/rejected": -0.313596248626709, "step": 4160 }, { "epoch": 0.31, "learning_rate": 1.7487865024630484e-06, "logits/chosen": -2.1392784118652344, "logits/rejected": -1.6052732467651367, "logps/chosen": -402.0005798339844, "logps/rejected": -571.4208984375, "loss": 0.6723, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2569015622138977, "rewards/margins": 0.2017710655927658, "rewards/rejected": -0.4586726129055023, "step": 4170 }, { "epoch": 0.31, "learning_rate": 1.7470773726924486e-06, "logits/chosen": -2.154756546020508, "logits/rejected": -1.7098820209503174, "logps/chosen": -442.5440979003906, "logps/rejected": -593.1409301757812, "loss": 0.6773, "rewards/accuracies": 0.625, "rewards/chosen": -0.27092552185058594, "rewards/margins": 0.1997191607952118, "rewards/rejected": -0.47064465284347534, "step": 4180 }, { "epoch": 0.31, "learning_rate": 1.745363289854492e-06, "logits/chosen": -2.2491514682769775, "logits/rejected": -1.7716474533081055, "logps/chosen": -573.3902587890625, "logps/rejected": -695.5729370117188, "loss": 0.6793, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35133495926856995, "rewards/margins": 0.20070576667785645, "rewards/rejected": -0.552040696144104, "step": 4190 }, { "epoch": 0.31, "learning_rate": 1.7436442653134203e-06, "logits/chosen": -2.2069103717803955, "logits/rejected": -1.9085267782211304, "logps/chosen": -509.54095458984375, "logps/rejected": -645.0572509765625, "loss": 0.6839, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28282544016838074, "rewards/margins": 0.18671007454395294, "rewards/rejected": -0.46953558921813965, "step": 4200 }, { "epoch": 0.31, "learning_rate": 1.7419203104662383e-06, "logits/chosen": -1.9154536724090576, "logits/rejected": -1.4585381746292114, "logps/chosen": -587.3914184570312, "logps/rejected": -713.2484130859375, "loss": 0.6795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40087518095970154, "rewards/margins": 0.17562779784202576, "rewards/rejected": -0.5765030384063721, "step": 4210 }, { "epoch": 0.31, "learning_rate": 1.7401914367426378e-06, "logits/chosen": -2.0847182273864746, "logits/rejected": -1.6891742944717407, "logps/chosen": -598.373779296875, "logps/rejected": -684.5889892578125, "loss": 0.6815, "rewards/accuracies": 0.75, "rewards/chosen": -0.45796719193458557, "rewards/margins": 0.1334395706653595, "rewards/rejected": -0.5914067625999451, "step": 4220 }, { "epoch": 0.31, "learning_rate": 1.7384576556049236e-06, "logits/chosen": -1.9870359897613525, "logits/rejected": -1.4280996322631836, "logps/chosen": -497.2145080566406, "logps/rejected": -751.9065551757812, "loss": 0.6704, "rewards/accuracies": 0.875, "rewards/chosen": -0.36298537254333496, "rewards/margins": 0.2600043714046478, "rewards/rejected": -0.6229897737503052, "step": 4230 }, { "epoch": 0.31, "learning_rate": 1.7367189785479349e-06, "logits/chosen": -2.2327778339385986, "logits/rejected": -1.547808051109314, "logps/chosen": -436.25506591796875, "logps/rejected": -592.7633056640625, "loss": 0.6757, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.26830607652664185, "rewards/margins": 0.21723619103431702, "rewards/rejected": -0.48554229736328125, "step": 4240 }, { "epoch": 0.31, "learning_rate": 1.7349754170989713e-06, "logits/chosen": -2.314298152923584, "logits/rejected": -1.895703673362732, "logps/chosen": -467.2147521972656, "logps/rejected": -555.1045532226562, "loss": 0.6829, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24445469677448273, "rewards/margins": 0.14920572936534882, "rewards/rejected": -0.39366039633750916, "step": 4250 }, { "epoch": 0.31, "learning_rate": 1.7332269828177157e-06, "logits/chosen": -2.159226655960083, "logits/rejected": -1.5471376180648804, "logps/chosen": -545.0155029296875, "logps/rejected": -642.6307983398438, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35951709747314453, "rewards/margins": 0.15139329433441162, "rewards/rejected": -0.5109103918075562, "step": 4260 }, { "epoch": 0.31, "learning_rate": 1.731473687296157e-06, "logits/chosen": -1.9356123208999634, "logits/rejected": -1.6450507640838623, "logps/chosen": -419.3009338378906, "logps/rejected": -570.725341796875, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2267940789461136, "rewards/margins": 0.17342403531074524, "rewards/rejected": -0.40021806955337524, "step": 4270 }, { "epoch": 0.32, "learning_rate": 1.729715542158514e-06, "logits/chosen": -1.9253000020980835, "logits/rejected": -1.507803201675415, "logps/chosen": -454.472412109375, "logps/rejected": -660.8861694335938, "loss": 0.6769, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3279040455818176, "rewards/margins": 0.22385776042938232, "rewards/rejected": -0.5517618656158447, "step": 4280 }, { "epoch": 0.32, "learning_rate": 1.7279525590611579e-06, "logits/chosen": -2.1207075119018555, "logits/rejected": -1.6969352960586548, "logps/chosen": -488.72149658203125, "logps/rejected": -661.4598388671875, "loss": 0.6773, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31326940655708313, "rewards/margins": 0.1997639387845993, "rewards/rejected": -0.513033390045166, "step": 4290 }, { "epoch": 0.32, "learning_rate": 1.7261847496925356e-06, "logits/chosen": -1.9558384418487549, "logits/rejected": -1.4640185832977295, "logps/chosen": -455.021484375, "logps/rejected": -624.3304443359375, "loss": 0.6776, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.29411453008651733, "rewards/margins": 0.1966308355331421, "rewards/rejected": -0.4907453656196594, "step": 4300 }, { "epoch": 0.32, "learning_rate": 1.724412125773091e-06, "logits/chosen": -2.1303703784942627, "logits/rejected": -1.7238067388534546, "logps/chosen": -426.27801513671875, "logps/rejected": -573.51611328125, "loss": 0.6802, "rewards/accuracies": 0.625, "rewards/chosen": -0.31388649344444275, "rewards/margins": 0.152658611536026, "rewards/rejected": -0.4665451645851135, "step": 4310 }, { "epoch": 0.32, "learning_rate": 1.7226346990551895e-06, "logits/chosen": -2.1193807125091553, "logits/rejected": -1.532954216003418, "logps/chosen": -314.6426696777344, "logps/rejected": -538.82080078125, "loss": 0.6684, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17345337569713593, "rewards/margins": 0.24753646552562714, "rewards/rejected": -0.42098990082740784, "step": 4320 }, { "epoch": 0.32, "learning_rate": 1.7208524813230376e-06, "logits/chosen": -2.05538272857666, "logits/rejected": -1.6054359674453735, "logps/chosen": -473.86114501953125, "logps/rejected": -626.6461791992188, "loss": 0.6803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3312576115131378, "rewards/margins": 0.17099735140800476, "rewards/rejected": -0.5022549629211426, "step": 4330 }, { "epoch": 0.32, "learning_rate": 1.719065484392606e-06, "logits/chosen": -2.1343181133270264, "logits/rejected": -1.3791255950927734, "logps/chosen": -469.369140625, "logps/rejected": -672.6207275390625, "loss": 0.6762, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3267568051815033, "rewards/margins": 0.23635618388652802, "rewards/rejected": -0.5631130337715149, "step": 4340 }, { "epoch": 0.32, "learning_rate": 1.7172737201115517e-06, "logits/chosen": -2.0032098293304443, "logits/rejected": -1.4315135478973389, "logps/chosen": -549.9168701171875, "logps/rejected": -756.9041748046875, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.40169841051101685, "rewards/margins": 0.2337942123413086, "rewards/rejected": -0.6354926228523254, "step": 4350 }, { "epoch": 0.32, "learning_rate": 1.715477200359138e-06, "logits/chosen": -2.1765224933624268, "logits/rejected": -1.4790656566619873, "logps/chosen": -463.57574462890625, "logps/rejected": -627.0445556640625, "loss": 0.6813, "rewards/accuracies": 0.75, "rewards/chosen": -0.30690470337867737, "rewards/margins": 0.1766970455646515, "rewards/rejected": -0.48360174894332886, "step": 4360 }, { "epoch": 0.32, "learning_rate": 1.7136759370461576e-06, "logits/chosen": -2.1671957969665527, "logits/rejected": -1.587101697921753, "logps/chosen": -521.4742431640625, "logps/rejected": -613.5319213867188, "loss": 0.6817, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3073802888393402, "rewards/margins": 0.16449584066867828, "rewards/rejected": -0.4718760848045349, "step": 4370 }, { "epoch": 0.32, "learning_rate": 1.7118699421148514e-06, "logits/chosen": -2.128262519836426, "logits/rejected": -1.3129889965057373, "logps/chosen": -488.33221435546875, "logps/rejected": -637.0216674804688, "loss": 0.6755, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2931111454963684, "rewards/margins": 0.22989805042743683, "rewards/rejected": -0.523009181022644, "step": 4380 }, { "epoch": 0.32, "learning_rate": 1.7100592275388321e-06, "logits/chosen": -2.078209161758423, "logits/rejected": -1.274139165878296, "logps/chosen": -580.8555297851562, "logps/rejected": -735.2132568359375, "loss": 0.6727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3785826861858368, "rewards/margins": 0.2263055145740509, "rewards/rejected": -0.6048882007598877, "step": 4390 }, { "epoch": 0.32, "learning_rate": 1.7082438053230022e-06, "logits/chosen": -2.050480842590332, "logits/rejected": -1.7970082759857178, "logps/chosen": -557.9434814453125, "logps/rejected": -703.9541015625, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.37633997201919556, "rewards/margins": 0.1491759568452835, "rewards/rejected": -0.5255159139633179, "step": 4400 }, { "epoch": 0.33, "learning_rate": 1.7064236875034759e-06, "logits/chosen": -2.1940956115722656, "logits/rejected": -1.540006399154663, "logps/chosen": -521.12060546875, "logps/rejected": -768.491943359375, "loss": 0.6735, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3686451315879822, "rewards/margins": 0.27233055233955383, "rewards/rejected": -0.6409756541252136, "step": 4410 }, { "epoch": 0.33, "learning_rate": 1.7045988861474984e-06, "logits/chosen": -1.8533947467803955, "logits/rejected": -1.7004337310791016, "logps/chosen": -519.8616943359375, "logps/rejected": -670.2671508789062, "loss": 0.6772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3860221207141876, "rewards/margins": 0.16260817646980286, "rewards/rejected": -0.5486302375793457, "step": 4420 }, { "epoch": 0.33, "learning_rate": 1.7027694133533675e-06, "logits/chosen": -1.8456790447235107, "logits/rejected": -1.2395192384719849, "logps/chosen": -509.32098388671875, "logps/rejected": -683.9597778320312, "loss": 0.6748, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3832164704799652, "rewards/margins": 0.2112501561641693, "rewards/rejected": -0.5944666266441345, "step": 4430 }, { "epoch": 0.33, "learning_rate": 1.7009352812503512e-06, "logits/chosen": -2.1018052101135254, "logits/rejected": -1.700852394104004, "logps/chosen": -437.2511291503906, "logps/rejected": -599.37353515625, "loss": 0.677, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26463156938552856, "rewards/margins": 0.197494775056839, "rewards/rejected": -0.46212631464004517, "step": 4440 }, { "epoch": 0.33, "learning_rate": 1.699096501998609e-06, "logits/chosen": -1.9784557819366455, "logits/rejected": -1.6200892925262451, "logps/chosen": -489.38702392578125, "logps/rejected": -614.6553344726562, "loss": 0.6846, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31755512952804565, "rewards/margins": 0.15108129382133484, "rewards/rejected": -0.4686364531517029, "step": 4450 }, { "epoch": 0.33, "learning_rate": 1.69725308778911e-06, "logits/chosen": -2.2249813079833984, "logits/rejected": -1.7022607326507568, "logps/chosen": -442.35205078125, "logps/rejected": -520.51611328125, "loss": 0.6831, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2575305998325348, "rewards/margins": 0.1392556130886078, "rewards/rejected": -0.39678627252578735, "step": 4460 }, { "epoch": 0.33, "learning_rate": 1.6954050508435538e-06, "logits/chosen": -2.2729239463806152, "logits/rejected": -1.748962640762329, "logps/chosen": -531.4226684570312, "logps/rejected": -633.1409912109375, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -0.37256914377212524, "rewards/margins": 0.1608845740556717, "rewards/rejected": -0.5334537029266357, "step": 4470 }, { "epoch": 0.33, "learning_rate": 1.6935524034142872e-06, "logits/chosen": -1.98626708984375, "logits/rejected": -1.6802982091903687, "logps/chosen": -487.5169372558594, "logps/rejected": -574.1229858398438, "loss": 0.6853, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3107338547706604, "rewards/margins": 0.12236249446868896, "rewards/rejected": -0.43309634923934937, "step": 4480 }, { "epoch": 0.33, "learning_rate": 1.691695157784225e-06, "logits/chosen": -2.0959157943725586, "logits/rejected": -1.6148897409439087, "logps/chosen": -527.8648681640625, "logps/rejected": -648.8616943359375, "loss": 0.6841, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32874196767807007, "rewards/margins": 0.16237060725688934, "rewards/rejected": -0.491112619638443, "step": 4490 }, { "epoch": 0.33, "learning_rate": 1.6898333262667672e-06, "logits/chosen": -2.034322738647461, "logits/rejected": -1.4295929670333862, "logps/chosen": -485.95245361328125, "logps/rejected": -676.7805786132812, "loss": 0.6779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34185975790023804, "rewards/margins": 0.2160838395357132, "rewards/rejected": -0.55794358253479, "step": 4500 }, { "epoch": 0.33, "learning_rate": 1.6879669212057186e-06, "logits/chosen": -2.348126173019409, "logits/rejected": -1.6403427124023438, "logps/chosen": -515.438232421875, "logps/rejected": -701.5556030273438, "loss": 0.6773, "rewards/accuracies": 0.75, "rewards/chosen": -0.3711282014846802, "rewards/margins": 0.23850175738334656, "rewards/rejected": -0.6096299886703491, "step": 4510 }, { "epoch": 0.33, "learning_rate": 1.6860959549752053e-06, "logits/chosen": -2.2187891006469727, "logits/rejected": -1.4682965278625488, "logps/chosen": -530.9290161132812, "logps/rejected": -668.9117431640625, "loss": 0.6788, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3302150368690491, "rewards/margins": 0.1844547688961029, "rewards/rejected": -0.5146697759628296, "step": 4520 }, { "epoch": 0.33, "learning_rate": 1.6842204399795945e-06, "logits/chosen": -1.9721513986587524, "logits/rejected": -1.4386364221572876, "logps/chosen": -474.12103271484375, "logps/rejected": -619.9801025390625, "loss": 0.6766, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29605308175086975, "rewards/margins": 0.18934115767478943, "rewards/rejected": -0.4853942394256592, "step": 4530 }, { "epoch": 0.33, "learning_rate": 1.6823403886534107e-06, "logits/chosen": -2.013124942779541, "logits/rejected": -1.6097524166107178, "logps/chosen": -526.3385009765625, "logps/rejected": -700.1527099609375, "loss": 0.6777, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3772345185279846, "rewards/margins": 0.1675170212984085, "rewards/rejected": -0.5447515249252319, "step": 4540 }, { "epoch": 0.34, "learning_rate": 1.680455813461254e-06, "logits/chosen": -2.240689992904663, "logits/rejected": -1.544754147529602, "logps/chosen": -580.8286743164062, "logps/rejected": -745.0361328125, "loss": 0.6761, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35313618183135986, "rewards/margins": 0.21894927322864532, "rewards/rejected": -0.5720854997634888, "step": 4550 }, { "epoch": 0.34, "learning_rate": 1.6785667268977185e-06, "logits/chosen": -2.1584560871124268, "logits/rejected": -1.6247014999389648, "logps/chosen": -592.2593994140625, "logps/rejected": -681.6968994140625, "loss": 0.6801, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4144554138183594, "rewards/margins": 0.15978237986564636, "rewards/rejected": -0.5742378234863281, "step": 4560 }, { "epoch": 0.34, "learning_rate": 1.6766731414873064e-06, "logits/chosen": -1.8538519144058228, "logits/rejected": -1.6317970752716064, "logps/chosen": -592.8834228515625, "logps/rejected": -741.6448974609375, "loss": 0.6827, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.41053134202957153, "rewards/margins": 0.1659267246723175, "rewards/rejected": -0.5764580965042114, "step": 4570 }, { "epoch": 0.34, "learning_rate": 1.6747750697843484e-06, "logits/chosen": -2.0260307788848877, "logits/rejected": -1.457425832748413, "logps/chosen": -594.9239501953125, "logps/rejected": -769.7513427734375, "loss": 0.6752, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.429849237203598, "rewards/margins": 0.20765647292137146, "rewards/rejected": -0.6375057101249695, "step": 4580 }, { "epoch": 0.34, "learning_rate": 1.6728725243729187e-06, "logits/chosen": -2.264361619949341, "logits/rejected": -1.485028862953186, "logps/chosen": -514.7151489257812, "logps/rejected": -748.2507934570312, "loss": 0.6716, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.37393587827682495, "rewards/margins": 0.26760897040367126, "rewards/rejected": -0.6415449380874634, "step": 4590 }, { "epoch": 0.34, "learning_rate": 1.6709655178667518e-06, "logits/chosen": -2.124000072479248, "logits/rejected": -1.5914398431777954, "logps/chosen": -568.2327880859375, "logps/rejected": -735.42041015625, "loss": 0.6791, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3819572925567627, "rewards/margins": 0.2168038785457611, "rewards/rejected": -0.5987611413002014, "step": 4600 }, { "epoch": 0.34, "learning_rate": 1.6690540629091586e-06, "logits/chosen": -2.368072509765625, "logits/rejected": -1.7123315334320068, "logps/chosen": -522.71044921875, "logps/rejected": -621.3574829101562, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33937254548072815, "rewards/margins": 0.14681683480739594, "rewards/rejected": -0.48618942499160767, "step": 4610 }, { "epoch": 0.34, "learning_rate": 1.6671381721729435e-06, "logits/chosen": -2.170703411102295, "logits/rejected": -1.9154598712921143, "logps/chosen": -436.99371337890625, "logps/rejected": -511.85040283203125, "loss": 0.6824, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.28808924555778503, "rewards/margins": 0.1172141581773758, "rewards/rejected": -0.405303418636322, "step": 4620 }, { "epoch": 0.34, "learning_rate": 1.6652178583603186e-06, "logits/chosen": -2.0952982902526855, "logits/rejected": -1.8399502038955688, "logps/chosen": -404.70068359375, "logps/rejected": -486.46075439453125, "loss": 0.6853, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2634625732898712, "rewards/margins": 0.08199816197156906, "rewards/rejected": -0.3454607129096985, "step": 4630 }, { "epoch": 0.34, "learning_rate": 1.6632931342028226e-06, "logits/chosen": -2.1974072456359863, "logits/rejected": -1.704358458518982, "logps/chosen": -436.24688720703125, "logps/rejected": -608.6475830078125, "loss": 0.6764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3109889626502991, "rewards/margins": 0.18977180123329163, "rewards/rejected": -0.5007607936859131, "step": 4640 }, { "epoch": 0.34, "learning_rate": 1.6613640124612325e-06, "logits/chosen": -2.243624687194824, "logits/rejected": -1.699400544166565, "logps/chosen": -431.182861328125, "logps/rejected": -556.0103759765625, "loss": 0.6864, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.26118987798690796, "rewards/margins": 0.1745625138282776, "rewards/rejected": -0.4357524514198303, "step": 4650 }, { "epoch": 0.34, "learning_rate": 1.6594305059254823e-06, "logits/chosen": -2.1734793186187744, "logits/rejected": -1.6898200511932373, "logps/chosen": -471.0535583496094, "logps/rejected": -664.3306884765625, "loss": 0.6763, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3123553991317749, "rewards/margins": 0.21810570359230042, "rewards/rejected": -0.5304610729217529, "step": 4660 }, { "epoch": 0.34, "learning_rate": 1.657492627414576e-06, "logits/chosen": -2.3379733562469482, "logits/rejected": -1.8186023235321045, "logps/chosen": -548.08203125, "logps/rejected": -683.9500732421875, "loss": 0.68, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3738411068916321, "rewards/margins": 0.16807259619235992, "rewards/rejected": -0.5419136881828308, "step": 4670 }, { "epoch": 0.35, "learning_rate": 1.6555503897765041e-06, "logits/chosen": -2.3028602600097656, "logits/rejected": -1.7197811603546143, "logps/chosen": -388.88092041015625, "logps/rejected": -536.4954833984375, "loss": 0.6791, "rewards/accuracies": 0.75, "rewards/chosen": -0.24672028422355652, "rewards/margins": 0.20263640582561493, "rewards/rejected": -0.44935664534568787, "step": 4680 }, { "epoch": 0.35, "learning_rate": 1.6536038058881575e-06, "logits/chosen": -2.1334152221679688, "logits/rejected": -1.691956877708435, "logps/chosen": -476.62579345703125, "logps/rejected": -639.3134155273438, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.301527738571167, "rewards/margins": 0.20355013012886047, "rewards/rejected": -0.5050778388977051, "step": 4690 }, { "epoch": 0.35, "learning_rate": 1.6516528886552424e-06, "logits/chosen": -2.217639446258545, "logits/rejected": -1.7047080993652344, "logps/chosen": -554.6943969726562, "logps/rejected": -684.0974731445312, "loss": 0.6776, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4124639928340912, "rewards/margins": 0.1560050994157791, "rewards/rejected": -0.5684691071510315, "step": 4700 }, { "epoch": 0.35, "learning_rate": 1.649697651012195e-06, "logits/chosen": -2.046504497528076, "logits/rejected": -1.531959891319275, "logps/chosen": -575.7507934570312, "logps/rejected": -731.0814819335938, "loss": 0.6767, "rewards/accuracies": 0.875, "rewards/chosen": -0.4099053740501404, "rewards/margins": 0.20622828602790833, "rewards/rejected": -0.6161336898803711, "step": 4710 }, { "epoch": 0.35, "learning_rate": 1.6477381059220949e-06, "logits/chosen": -2.1291744709014893, "logits/rejected": -1.7128970623016357, "logps/chosen": -515.483154296875, "logps/rejected": -687.2984619140625, "loss": 0.6732, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3726917505264282, "rewards/margins": 0.19011780619621277, "rewards/rejected": -0.5628094673156738, "step": 4720 }, { "epoch": 0.35, "learning_rate": 1.6457742663765805e-06, "logits/chosen": -2.151298999786377, "logits/rejected": -1.4577534198760986, "logps/chosen": -529.96826171875, "logps/rejected": -726.6812744140625, "loss": 0.6784, "rewards/accuracies": 0.75, "rewards/chosen": -0.39403635263442993, "rewards/margins": 0.24600915610790253, "rewards/rejected": -0.6400455832481384, "step": 4730 }, { "epoch": 0.35, "learning_rate": 1.6438061453957617e-06, "logits/chosen": -2.0724809169769287, "logits/rejected": -1.605363130569458, "logps/chosen": -574.4205322265625, "logps/rejected": -722.3775024414062, "loss": 0.6788, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38690292835235596, "rewards/margins": 0.18413017690181732, "rewards/rejected": -0.5710331201553345, "step": 4740 }, { "epoch": 0.35, "learning_rate": 1.6418337560281341e-06, "logits/chosen": -2.192044496536255, "logits/rejected": -1.5235034227371216, "logps/chosen": -480.8382263183594, "logps/rejected": -562.3400268554688, "loss": 0.6812, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2839820981025696, "rewards/margins": 0.16488017141819, "rewards/rejected": -0.4488622546195984, "step": 4750 }, { "epoch": 0.35, "learning_rate": 1.639857111350492e-06, "logits/chosen": -2.2367453575134277, "logits/rejected": -1.61083984375, "logps/chosen": -452.90179443359375, "logps/rejected": -622.7152099609375, "loss": 0.6738, "rewards/accuracies": 0.75, "rewards/chosen": -0.270620197057724, "rewards/margins": 0.21198289096355438, "rewards/rejected": -0.4826030731201172, "step": 4760 }, { "epoch": 0.35, "learning_rate": 1.6378762244678427e-06, "logits/chosen": -2.1005616188049316, "logits/rejected": -1.4604965448379517, "logps/chosen": -537.3001098632812, "logps/rejected": -722.9450073242188, "loss": 0.6787, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36120134592056274, "rewards/margins": 0.23815584182739258, "rewards/rejected": -0.5993572473526001, "step": 4770 }, { "epoch": 0.35, "learning_rate": 1.6358911085133187e-06, "logits/chosen": -1.9724979400634766, "logits/rejected": -1.492722988128662, "logps/chosen": -551.3712768554688, "logps/rejected": -665.3195190429688, "loss": 0.6771, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3955816626548767, "rewards/margins": 0.16671064496040344, "rewards/rejected": -0.5622922778129578, "step": 4780 }, { "epoch": 0.35, "learning_rate": 1.6339017766480912e-06, "logits/chosen": -2.130244016647339, "logits/rejected": -1.7743675708770752, "logps/chosen": -508.78558349609375, "logps/rejected": -637.1127319335938, "loss": 0.6814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33587566018104553, "rewards/margins": 0.1787552386522293, "rewards/rejected": -0.5146307945251465, "step": 4790 }, { "epoch": 0.35, "learning_rate": 1.6319082420612822e-06, "logits/chosen": -1.8162870407104492, "logits/rejected": -1.1381251811981201, "logps/chosen": -468.0426330566406, "logps/rejected": -652.9605102539062, "loss": 0.6744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28710073232650757, "rewards/margins": 0.2264549732208252, "rewards/rejected": -0.5135557055473328, "step": 4800 }, { "epoch": 0.35, "learning_rate": 1.629910517969877e-06, "logits/chosen": -2.229813814163208, "logits/rejected": -2.049359083175659, "logps/chosen": -526.8875732421875, "logps/rejected": -601.2005004882812, "loss": 0.6855, "rewards/accuracies": 0.625, "rewards/chosen": -0.3577499985694885, "rewards/margins": 0.08659864962100983, "rewards/rejected": -0.44434866309165955, "step": 4810 }, { "epoch": 0.36, "learning_rate": 1.6279086176186387e-06, "logits/chosen": -2.166375160217285, "logits/rejected": -1.6852985620498657, "logps/chosen": -481.96575927734375, "logps/rejected": -603.7692260742188, "loss": 0.6822, "rewards/accuracies": 0.75, "rewards/chosen": -0.29227548837661743, "rewards/margins": 0.159645676612854, "rewards/rejected": -0.4519210755825043, "step": 4820 }, { "epoch": 0.36, "learning_rate": 1.625902554280017e-06, "logits/chosen": -2.3101115226745605, "logits/rejected": -1.644484281539917, "logps/chosen": -375.376708984375, "logps/rejected": -542.6263427734375, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.22942504286766052, "rewards/margins": 0.20972609519958496, "rewards/rejected": -0.4391511380672455, "step": 4830 }, { "epoch": 0.36, "learning_rate": 1.623892341254062e-06, "logits/chosen": -1.8732309341430664, "logits/rejected": -1.4316864013671875, "logps/chosen": -493.8506774902344, "logps/rejected": -638.0374755859375, "loss": 0.6796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31851428747177124, "rewards/margins": 0.19655612111091614, "rewards/rejected": -0.5150704383850098, "step": 4840 }, { "epoch": 0.36, "learning_rate": 1.621877991868337e-06, "logits/chosen": -2.242896318435669, "logits/rejected": -1.650689721107483, "logps/chosen": -519.4389038085938, "logps/rejected": -611.6585083007812, "loss": 0.6782, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3664582073688507, "rewards/margins": 0.14937353134155273, "rewards/rejected": -0.5158317685127258, "step": 4850 }, { "epoch": 0.36, "learning_rate": 1.6198595194778286e-06, "logits/chosen": -2.219787120819092, "logits/rejected": -1.6670200824737549, "logps/chosen": -505.8169860839844, "logps/rejected": -620.0072021484375, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3492296636104584, "rewards/margins": 0.1555909812450409, "rewards/rejected": -0.5048205852508545, "step": 4860 }, { "epoch": 0.36, "learning_rate": 1.617836937464858e-06, "logits/chosen": -1.9678666591644287, "logits/rejected": -1.4912750720977783, "logps/chosen": -530.8682861328125, "logps/rejected": -739.8080444335938, "loss": 0.6767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3617743253707886, "rewards/margins": 0.21417617797851562, "rewards/rejected": -0.5759504437446594, "step": 4870 }, { "epoch": 0.36, "learning_rate": 1.6158102592389934e-06, "logits/chosen": -2.1361565589904785, "logits/rejected": -1.3281840085983276, "logps/chosen": -482.83233642578125, "logps/rejected": -652.3165283203125, "loss": 0.6743, "rewards/accuracies": 0.75, "rewards/chosen": -0.3392999470233917, "rewards/margins": 0.22018811106681824, "rewards/rejected": -0.55948805809021, "step": 4880 }, { "epoch": 0.36, "learning_rate": 1.613779498236961e-06, "logits/chosen": -2.149508476257324, "logits/rejected": -1.6003137826919556, "logps/chosen": -487.87548828125, "logps/rejected": -677.0208740234375, "loss": 0.6777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3128717541694641, "rewards/margins": 0.2192939817905426, "rewards/rejected": -0.5321657061576843, "step": 4890 }, { "epoch": 0.36, "learning_rate": 1.6117446679225543e-06, "logits/chosen": -2.238563060760498, "logits/rejected": -1.5886753797531128, "logps/chosen": -516.9549560546875, "logps/rejected": -680.5398559570312, "loss": 0.6757, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3489692211151123, "rewards/margins": 0.21265561878681183, "rewards/rejected": -0.5616248846054077, "step": 4900 }, { "epoch": 0.36, "learning_rate": 1.6097057817865473e-06, "logits/chosen": -2.0529966354370117, "logits/rejected": -1.5015969276428223, "logps/chosen": -636.17724609375, "logps/rejected": -812.0868530273438, "loss": 0.6745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4394589066505432, "rewards/margins": 0.205821231007576, "rewards/rejected": -0.6452800631523132, "step": 4910 }, { "epoch": 0.36, "learning_rate": 1.6076628533466035e-06, "logits/chosen": -1.7595484256744385, "logits/rejected": -1.34345281124115, "logps/chosen": -592.2735595703125, "logps/rejected": -777.0557861328125, "loss": 0.6769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.47729936242103577, "rewards/margins": 0.19319672882556915, "rewards/rejected": -0.6704961061477661, "step": 4920 }, { "epoch": 0.36, "learning_rate": 1.6056158961471855e-06, "logits/chosen": -2.0090723037719727, "logits/rejected": -1.4540952444076538, "logps/chosen": -484.22113037109375, "logps/rejected": -689.1904296875, "loss": 0.6779, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3334945738315582, "rewards/margins": 0.1987476944923401, "rewards/rejected": -0.5322422385215759, "step": 4930 }, { "epoch": 0.36, "learning_rate": 1.6035649237594679e-06, "logits/chosen": -2.1039276123046875, "logits/rejected": -1.7054297924041748, "logps/chosen": -488.91259765625, "logps/rejected": -612.027587890625, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.3821399211883545, "rewards/margins": 0.13810014724731445, "rewards/rejected": -0.520240068435669, "step": 4940 }, { "epoch": 0.37, "learning_rate": 1.6015099497812446e-06, "logits/chosen": -2.1824090480804443, "logits/rejected": -1.6604973077774048, "logps/chosen": -422.83123779296875, "logps/rejected": -568.3201904296875, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24916784465312958, "rewards/margins": 0.18023470044136047, "rewards/rejected": -0.42940253019332886, "step": 4950 }, { "epoch": 0.37, "learning_rate": 1.5994509878368403e-06, "logits/chosen": -2.224484443664551, "logits/rejected": -1.443705677986145, "logps/chosen": -518.998046875, "logps/rejected": -627.6054077148438, "loss": 0.6842, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2934703230857849, "rewards/margins": 0.18971654772758484, "rewards/rejected": -0.48318690061569214, "step": 4960 }, { "epoch": 0.37, "learning_rate": 1.597388051577019e-06, "logits/chosen": -2.102588176727295, "logits/rejected": -1.498981237411499, "logps/chosen": -467.82061767578125, "logps/rejected": -681.9068603515625, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": -0.29962196946144104, "rewards/margins": 0.254504919052124, "rewards/rejected": -0.5541268587112427, "step": 4970 }, { "epoch": 0.37, "learning_rate": 1.5953211546788947e-06, "logits/chosen": -2.0030009746551514, "logits/rejected": -1.5923222303390503, "logps/chosen": -552.2039794921875, "logps/rejected": -692.8494873046875, "loss": 0.6811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3481176495552063, "rewards/margins": 0.17206041514873505, "rewards/rejected": -0.5201780796051025, "step": 4980 }, { "epoch": 0.37, "learning_rate": 1.59325031084584e-06, "logits/chosen": -2.0797653198242188, "logits/rejected": -1.4079411029815674, "logps/chosen": -399.51019287109375, "logps/rejected": -607.7283935546875, "loss": 0.6698, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24167723953723907, "rewards/margins": 0.23988814651966095, "rewards/rejected": -0.4815653860569, "step": 4990 }, { "epoch": 0.37, "learning_rate": 1.5911755338073954e-06, "logits/chosen": -2.1900112628936768, "logits/rejected": -1.715134620666504, "logps/chosen": -367.4262390136719, "logps/rejected": -560.3939819335938, "loss": 0.6728, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.24825957417488098, "rewards/margins": 0.22298626601696014, "rewards/rejected": -0.4712458550930023, "step": 5000 }, { "epoch": 0.37, "learning_rate": 1.589096837319178e-06, "logits/chosen": -1.9976298809051514, "logits/rejected": -1.4810903072357178, "logps/chosen": -474.8133239746094, "logps/rejected": -651.309326171875, "loss": 0.6753, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3124847412109375, "rewards/margins": 0.1913885623216629, "rewards/rejected": -0.5038732886314392, "step": 5010 }, { "epoch": 0.37, "learning_rate": 1.587014235162791e-06, "logits/chosen": -2.1336586475372314, "logits/rejected": -1.7229607105255127, "logps/chosen": -603.9349975585938, "logps/rejected": -743.9161376953125, "loss": 0.6823, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4442051351070404, "rewards/margins": 0.1511613428592682, "rewards/rejected": -0.5953664183616638, "step": 5020 }, { "epoch": 0.37, "learning_rate": 1.584927741145731e-06, "logits/chosen": -2.1497280597686768, "logits/rejected": -1.6596119403839111, "logps/chosen": -527.09765625, "logps/rejected": -676.784912109375, "loss": 0.6778, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.34681645035743713, "rewards/margins": 0.20109625160694122, "rewards/rejected": -0.54791259765625, "step": 5030 }, { "epoch": 0.37, "learning_rate": 1.5828373691012988e-06, "logits/chosen": -2.186262607574463, "logits/rejected": -1.5213615894317627, "logps/chosen": -583.365234375, "logps/rejected": -732.2966918945312, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.3649362623691559, "rewards/margins": 0.22316482663154602, "rewards/rejected": -0.5881010293960571, "step": 5040 }, { "epoch": 0.37, "learning_rate": 1.5807431328885049e-06, "logits/chosen": -1.9089590311050415, "logits/rejected": -1.377929925918579, "logps/chosen": -538.9398803710938, "logps/rejected": -659.9284057617188, "loss": 0.6812, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.381288081407547, "rewards/margins": 0.17760078608989716, "rewards/rejected": -0.5588889122009277, "step": 5050 }, { "epoch": 0.37, "learning_rate": 1.57864504639198e-06, "logits/chosen": -1.9362163543701172, "logits/rejected": -1.351471185684204, "logps/chosen": -550.3132934570312, "logps/rejected": -727.0687255859375, "loss": 0.6732, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36922162771224976, "rewards/margins": 0.23969654738903046, "rewards/rejected": -0.608918309211731, "step": 5060 }, { "epoch": 0.37, "learning_rate": 1.5765431235218802e-06, "logits/chosen": -2.1056344509124756, "logits/rejected": -1.8211787939071655, "logps/chosen": -749.9259033203125, "logps/rejected": -753.31005859375, "loss": 0.6889, "rewards/accuracies": 0.5, "rewards/chosen": -0.5014339089393616, "rewards/margins": 0.08346012979745865, "rewards/rejected": -0.584894061088562, "step": 5070 }, { "epoch": 0.37, "learning_rate": 1.574437378213799e-06, "logits/chosen": -1.8789308071136475, "logits/rejected": -1.41519033908844, "logps/chosen": -608.6334228515625, "logps/rejected": -756.5738525390625, "loss": 0.6799, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.42934709787368774, "rewards/margins": 0.1967284083366394, "rewards/rejected": -0.6260755062103271, "step": 5080 }, { "epoch": 0.38, "learning_rate": 1.5723278244286701e-06, "logits/chosen": -2.0477519035339355, "logits/rejected": -1.5012482404708862, "logps/chosen": -603.8350830078125, "logps/rejected": -726.861083984375, "loss": 0.6837, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4452992081642151, "rewards/margins": 0.1506747305393219, "rewards/rejected": -0.5959738492965698, "step": 5090 }, { "epoch": 0.38, "learning_rate": 1.5702144761526785e-06, "logits/chosen": -1.9950096607208252, "logits/rejected": -1.4681322574615479, "logps/chosen": -539.1134643554688, "logps/rejected": -711.8673095703125, "loss": 0.6775, "rewards/accuracies": 0.75, "rewards/chosen": -0.42568689584732056, "rewards/margins": 0.18904411792755127, "rewards/rejected": -0.614730954170227, "step": 5100 }, { "epoch": 0.38, "learning_rate": 1.568097347397166e-06, "logits/chosen": -2.027630567550659, "logits/rejected": -1.4769898653030396, "logps/chosen": -503.36279296875, "logps/rejected": -631.0704956054688, "loss": 0.6765, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.360460489988327, "rewards/margins": 0.17300045490264893, "rewards/rejected": -0.5334609746932983, "step": 5110 }, { "epoch": 0.38, "learning_rate": 1.565976452198538e-06, "logits/chosen": -2.0727288722991943, "logits/rejected": -1.4746944904327393, "logps/chosen": -491.1219787597656, "logps/rejected": -669.682861328125, "loss": 0.6774, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.370793879032135, "rewards/margins": 0.19685974717140198, "rewards/rejected": -0.5676536560058594, "step": 5120 }, { "epoch": 0.38, "learning_rate": 1.5638518046181721e-06, "logits/chosen": -2.143353223800659, "logits/rejected": -1.6096973419189453, "logps/chosen": -574.9549560546875, "logps/rejected": -759.0604248046875, "loss": 0.6801, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39304494857788086, "rewards/margins": 0.20014910399913788, "rewards/rejected": -0.5931941270828247, "step": 5130 }, { "epoch": 0.38, "learning_rate": 1.5617234187423238e-06, "logits/chosen": -1.6532083749771118, "logits/rejected": -1.480971097946167, "logps/chosen": -608.4043579101562, "logps/rejected": -698.4370727539062, "loss": 0.6866, "rewards/accuracies": 0.625, "rewards/chosen": -0.47158581018447876, "rewards/margins": 0.08905436098575592, "rewards/rejected": -0.5606401562690735, "step": 5140 }, { "epoch": 0.38, "learning_rate": 1.559591308682033e-06, "logits/chosen": -2.044656991958618, "logits/rejected": -1.5655364990234375, "logps/chosen": -616.4970703125, "logps/rejected": -770.9761962890625, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -0.46869930624961853, "rewards/margins": 0.19871486723423004, "rewards/rejected": -0.6674141883850098, "step": 5150 }, { "epoch": 0.38, "learning_rate": 1.55745548857303e-06, "logits/chosen": -2.278850555419922, "logits/rejected": -1.8313630819320679, "logps/chosen": -587.0802001953125, "logps/rejected": -736.6689453125, "loss": 0.676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3871961236000061, "rewards/margins": 0.19364134967327118, "rewards/rejected": -0.5808374881744385, "step": 5160 }, { "epoch": 0.38, "learning_rate": 1.5553159725756435e-06, "logits/chosen": -1.980078101158142, "logits/rejected": -1.3754624128341675, "logps/chosen": -577.0519409179688, "logps/rejected": -751.298828125, "loss": 0.6787, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.432159423828125, "rewards/margins": 0.21748173236846924, "rewards/rejected": -0.649641215801239, "step": 5170 }, { "epoch": 0.38, "learning_rate": 1.5531727748747057e-06, "logits/chosen": -1.9113209247589111, "logits/rejected": -1.4436126947402954, "logps/chosen": -451.560791015625, "logps/rejected": -603.4505615234375, "loss": 0.6768, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3443782925605774, "rewards/margins": 0.17897680401802063, "rewards/rejected": -0.5233551263809204, "step": 5180 }, { "epoch": 0.38, "learning_rate": 1.5510259096794571e-06, "logits/chosen": -2.194047212600708, "logits/rejected": -1.8047571182250977, "logps/chosen": -462.7543029785156, "logps/rejected": -605.2296142578125, "loss": 0.6809, "rewards/accuracies": 0.75, "rewards/chosen": -0.2860252857208252, "rewards/margins": 0.16107816994190216, "rewards/rejected": -0.4471034109592438, "step": 5190 }, { "epoch": 0.38, "learning_rate": 1.5488753912234542e-06, "logits/chosen": -1.6679645776748657, "logits/rejected": -1.1276988983154297, "logps/chosen": -437.58587646484375, "logps/rejected": -626.0628662109375, "loss": 0.674, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3076724410057068, "rewards/margins": 0.23424987494945526, "rewards/rejected": -0.5419222116470337, "step": 5200 }, { "epoch": 0.38, "learning_rate": 1.5467212337644745e-06, "logits/chosen": -1.757349967956543, "logits/rejected": -1.308070421218872, "logps/chosen": -496.41693115234375, "logps/rejected": -665.0636596679688, "loss": 0.6818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.360704630613327, "rewards/margins": 0.20884954929351807, "rewards/rejected": -0.5695542097091675, "step": 5210 }, { "epoch": 0.39, "learning_rate": 1.544563451584422e-06, "logits/chosen": -1.9822053909301758, "logits/rejected": -1.3728872537612915, "logps/chosen": -600.1163330078125, "logps/rejected": -700.0536499023438, "loss": 0.6809, "rewards/accuracies": 0.75, "rewards/chosen": -0.4095671772956848, "rewards/margins": 0.15996333956718445, "rewards/rejected": -0.5695304870605469, "step": 5220 }, { "epoch": 0.39, "learning_rate": 1.5424020589892308e-06, "logits/chosen": -1.9793781042099, "logits/rejected": -1.3829944133758545, "logps/chosen": -590.0093383789062, "logps/rejected": -664.4869384765625, "loss": 0.6805, "rewards/accuracies": 0.625, "rewards/chosen": -0.4029896855354309, "rewards/margins": 0.15095487236976624, "rewards/rejected": -0.5539445877075195, "step": 5230 }, { "epoch": 0.39, "learning_rate": 1.5402370703087737e-06, "logits/chosen": -1.950554609298706, "logits/rejected": -1.5577375888824463, "logps/chosen": -573.10791015625, "logps/rejected": -675.962890625, "loss": 0.6797, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.411384254693985, "rewards/margins": 0.14042767882347107, "rewards/rejected": -0.551811933517456, "step": 5240 }, { "epoch": 0.39, "learning_rate": 1.538068499896764e-06, "logits/chosen": -1.8416516780853271, "logits/rejected": -1.5953738689422607, "logps/chosen": -530.3173828125, "logps/rejected": -659.8794555664062, "loss": 0.6831, "rewards/accuracies": 0.625, "rewards/chosen": -0.3960816264152527, "rewards/margins": 0.12066632509231567, "rewards/rejected": -0.5167479515075684, "step": 5250 }, { "epoch": 0.39, "learning_rate": 1.5358963621306624e-06, "logits/chosen": -2.017129421234131, "logits/rejected": -1.409733772277832, "logps/chosen": -597.0280151367188, "logps/rejected": -747.8365478515625, "loss": 0.6747, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38612258434295654, "rewards/margins": 0.21334418654441833, "rewards/rejected": -0.5994668006896973, "step": 5260 }, { "epoch": 0.39, "learning_rate": 1.5337206714115807e-06, "logits/chosen": -1.942434549331665, "logits/rejected": -1.6046470403671265, "logps/chosen": -516.4931030273438, "logps/rejected": -695.4269409179688, "loss": 0.6822, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.33830127120018005, "rewards/margins": 0.19743837416172028, "rewards/rejected": -0.535739541053772, "step": 5270 }, { "epoch": 0.39, "learning_rate": 1.531541442164186e-06, "logits/chosen": -2.007154941558838, "logits/rejected": -1.475940227508545, "logps/chosen": -554.05126953125, "logps/rejected": -638.6392211914062, "loss": 0.6857, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3700900375843048, "rewards/margins": 0.16532373428344727, "rewards/rejected": -0.5354137420654297, "step": 5280 }, { "epoch": 0.39, "learning_rate": 1.5293586888366057e-06, "logits/chosen": -1.956559181213379, "logits/rejected": -1.6759803295135498, "logps/chosen": -560.2738037109375, "logps/rejected": -663.6712646484375, "loss": 0.6846, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3618046045303345, "rewards/margins": 0.12669149041175842, "rewards/rejected": -0.4884961247444153, "step": 5290 }, { "epoch": 0.39, "learning_rate": 1.5271724259003325e-06, "logits/chosen": -2.1686654090881348, "logits/rejected": -1.5805648565292358, "logps/chosen": -476.68865966796875, "logps/rejected": -652.6936645507812, "loss": 0.676, "rewards/accuracies": 0.75, "rewards/chosen": -0.31859123706817627, "rewards/margins": 0.19163022935390472, "rewards/rejected": -0.5102214217185974, "step": 5300 }, { "epoch": 0.39, "learning_rate": 1.5249826678501267e-06, "logits/chosen": -1.8719730377197266, "logits/rejected": -1.334692358970642, "logps/chosen": -624.6943969726562, "logps/rejected": -739.094970703125, "loss": 0.6816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4694346487522125, "rewards/margins": 0.17502176761627197, "rewards/rejected": -0.6444563865661621, "step": 5310 }, { "epoch": 0.39, "learning_rate": 1.5227894292039208e-06, "logits/chosen": -2.2134275436401367, "logits/rejected": -1.6225459575653076, "logps/chosen": -467.3063049316406, "logps/rejected": -645.9210205078125, "loss": 0.6752, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2953004240989685, "rewards/margins": 0.19018547236919403, "rewards/rejected": -0.4854859411716461, "step": 5320 }, { "epoch": 0.39, "learning_rate": 1.5205927245027242e-06, "logits/chosen": -2.1249210834503174, "logits/rejected": -1.7856996059417725, "logps/chosen": -397.89996337890625, "logps/rejected": -554.6124267578125, "loss": 0.6801, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23781800270080566, "rewards/margins": 0.16789567470550537, "rewards/rejected": -0.4057137072086334, "step": 5330 }, { "epoch": 0.39, "learning_rate": 1.5183925683105251e-06, "logits/chosen": -2.0239202976226807, "logits/rejected": -1.5959135293960571, "logps/chosen": -416.74365234375, "logps/rejected": -598.9381713867188, "loss": 0.6792, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2969916760921478, "rewards/margins": 0.17439952492713928, "rewards/rejected": -0.4713912606239319, "step": 5340 }, { "epoch": 0.39, "learning_rate": 1.5161889752141953e-06, "logits/chosen": -2.02091908454895, "logits/rejected": -1.6813325881958008, "logps/chosen": -471.4469299316406, "logps/rejected": -619.410400390625, "loss": 0.6804, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33810144662857056, "rewards/margins": 0.16796764731407166, "rewards/rejected": -0.5060690641403198, "step": 5350 }, { "epoch": 0.4, "learning_rate": 1.5139819598233934e-06, "logits/chosen": -1.9169718027114868, "logits/rejected": -1.4870550632476807, "logps/chosen": -494.48773193359375, "logps/rejected": -653.6615600585938, "loss": 0.6764, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3573368191719055, "rewards/margins": 0.1991550326347351, "rewards/rejected": -0.5564918518066406, "step": 5360 }, { "epoch": 0.4, "learning_rate": 1.5117715367704662e-06, "logits/chosen": -1.9692615270614624, "logits/rejected": -1.5133672952651978, "logps/chosen": -571.8338623046875, "logps/rejected": -695.9190673828125, "loss": 0.6833, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.40404558181762695, "rewards/margins": 0.18874861299991608, "rewards/rejected": -0.5927942395210266, "step": 5370 }, { "epoch": 0.4, "learning_rate": 1.5095577207103554e-06, "logits/chosen": -1.9273936748504639, "logits/rejected": -1.4560863971710205, "logps/chosen": -539.419921875, "logps/rejected": -749.46337890625, "loss": 0.6763, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.373654842376709, "rewards/margins": 0.22511406242847443, "rewards/rejected": -0.5987690091133118, "step": 5380 }, { "epoch": 0.4, "learning_rate": 1.5073405263204948e-06, "logits/chosen": -1.895892858505249, "logits/rejected": -1.2665776014328003, "logps/chosen": -545.2217407226562, "logps/rejected": -766.6201171875, "loss": 0.6707, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3813478946685791, "rewards/margins": 0.24957890808582306, "rewards/rejected": -0.630926787853241, "step": 5390 }, { "epoch": 0.4, "learning_rate": 1.50511996830072e-06, "logits/chosen": -1.9695810079574585, "logits/rejected": -1.495821475982666, "logps/chosen": -563.3883056640625, "logps/rejected": -688.4684448242188, "loss": 0.6829, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39352867007255554, "rewards/margins": 0.17112888395786285, "rewards/rejected": -0.5646575093269348, "step": 5400 }, { "epoch": 0.4, "learning_rate": 1.502896061373164e-06, "logits/chosen": -1.8675041198730469, "logits/rejected": -1.3415031433105469, "logps/chosen": -577.5606689453125, "logps/rejected": -746.2659912109375, "loss": 0.6789, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4005773961544037, "rewards/margins": 0.23922929167747498, "rewards/rejected": -0.6398066282272339, "step": 5410 }, { "epoch": 0.4, "learning_rate": 1.5006688202821647e-06, "logits/chosen": -1.9099321365356445, "logits/rejected": -1.5384889841079712, "logps/chosen": -724.7630615234375, "logps/rejected": -895.796875, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5991414785385132, "rewards/margins": 0.16683539748191833, "rewards/rejected": -0.7659769058227539, "step": 5420 }, { "epoch": 0.4, "learning_rate": 1.4984382597941646e-06, "logits/chosen": -1.9746736288070679, "logits/rejected": -1.4363503456115723, "logps/chosen": -591.5681762695312, "logps/rejected": -733.8224487304688, "loss": 0.682, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4439503252506256, "rewards/margins": 0.1722889095544815, "rewards/rejected": -0.6162392497062683, "step": 5430 }, { "epoch": 0.4, "learning_rate": 1.4962043946976136e-06, "logits/chosen": -1.9388723373413086, "logits/rejected": -1.4204504489898682, "logps/chosen": -610.668212890625, "logps/rejected": -765.2799072265625, "loss": 0.6772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4811418652534485, "rewards/margins": 0.18396830558776855, "rewards/rejected": -0.6651101112365723, "step": 5440 }, { "epoch": 0.4, "learning_rate": 1.4939672398028713e-06, "logits/chosen": -1.7730228900909424, "logits/rejected": -1.4608489274978638, "logps/chosen": -553.1050415039062, "logps/rejected": -746.1763916015625, "loss": 0.6754, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41683611273765564, "rewards/margins": 0.2271624505519867, "rewards/rejected": -0.6439985632896423, "step": 5450 }, { "epoch": 0.4, "learning_rate": 1.4917268099421075e-06, "logits/chosen": -1.7580642700195312, "logits/rejected": -1.3879187107086182, "logps/chosen": -596.2807006835938, "logps/rejected": -706.5359497070312, "loss": 0.684, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43292635679244995, "rewards/margins": 0.16935476660728455, "rewards/rejected": -0.6022812128067017, "step": 5460 }, { "epoch": 0.4, "learning_rate": 1.4894831199692058e-06, "logits/chosen": -1.6992181539535522, "logits/rejected": -1.3167707920074463, "logps/chosen": -438.9505310058594, "logps/rejected": -625.8517456054688, "loss": 0.6721, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2988505959510803, "rewards/margins": 0.20382995903491974, "rewards/rejected": -0.5026805996894836, "step": 5470 }, { "epoch": 0.4, "learning_rate": 1.4872361847596633e-06, "logits/chosen": -2.269196033477783, "logits/rejected": -1.6576236486434937, "logps/chosen": -404.0931701660156, "logps/rejected": -485.74224853515625, "loss": 0.6854, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23746398091316223, "rewards/margins": 0.14360098540782928, "rewards/rejected": -0.3810649514198303, "step": 5480 }, { "epoch": 0.4, "learning_rate": 1.4849860192104938e-06, "logits/chosen": -1.8039004802703857, "logits/rejected": -1.4741570949554443, "logps/chosen": -432.33306884765625, "logps/rejected": -612.3638305664062, "loss": 0.6742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29069003462791443, "rewards/margins": 0.18383070826530457, "rewards/rejected": -0.4745207726955414, "step": 5490 }, { "epoch": 0.41, "learning_rate": 1.4827326382401267e-06, "logits/chosen": -1.7884862422943115, "logits/rejected": -1.265838861465454, "logps/chosen": -440.08984375, "logps/rejected": -605.2586669921875, "loss": 0.6726, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28611084818840027, "rewards/margins": 0.22203198075294495, "rewards/rejected": -0.5081428289413452, "step": 5500 }, { "epoch": 0.41, "learning_rate": 1.4804760567883103e-06, "logits/chosen": -2.0227761268615723, "logits/rejected": -1.3515312671661377, "logps/chosen": -538.0249633789062, "logps/rejected": -726.3181762695312, "loss": 0.6808, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3369419276714325, "rewards/margins": 0.25320112705230713, "rewards/rejected": -0.590143084526062, "step": 5510 }, { "epoch": 0.41, "learning_rate": 1.4782162898160114e-06, "logits/chosen": -1.9607229232788086, "logits/rejected": -1.3013780117034912, "logps/chosen": -605.4227294921875, "logps/rejected": -808.869384765625, "loss": 0.6719, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4557116627693176, "rewards/margins": 0.246486097574234, "rewards/rejected": -0.7021978497505188, "step": 5520 }, { "epoch": 0.41, "learning_rate": 1.4759533523053172e-06, "logits/chosen": -2.0112054347991943, "logits/rejected": -1.5427964925765991, "logps/chosen": -570.9873657226562, "logps/rejected": -726.5716552734375, "loss": 0.6736, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40037599205970764, "rewards/margins": 0.19984018802642822, "rewards/rejected": -0.6002161502838135, "step": 5530 }, { "epoch": 0.41, "learning_rate": 1.4736872592593346e-06, "logits/chosen": -1.9204181432724, "logits/rejected": -1.3771946430206299, "logps/chosen": -482.2020568847656, "logps/rejected": -675.4815673828125, "loss": 0.6747, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3346666693687439, "rewards/margins": 0.23904109001159668, "rewards/rejected": -0.5737077593803406, "step": 5540 }, { "epoch": 0.41, "learning_rate": 1.4714180257020921e-06, "logits/chosen": -1.996392011642456, "logits/rejected": -1.6018505096435547, "logps/chosen": -432.7384338378906, "logps/rejected": -551.1343994140625, "loss": 0.6782, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2844640612602234, "rewards/margins": 0.15996769070625305, "rewards/rejected": -0.44443172216415405, "step": 5550 }, { "epoch": 0.41, "learning_rate": 1.4691456666784388e-06, "logits/chosen": -1.9391670227050781, "logits/rejected": -1.756392478942871, "logps/chosen": -501.83953857421875, "logps/rejected": -618.6151733398438, "loss": 0.681, "rewards/accuracies": 0.75, "rewards/chosen": -0.3441617786884308, "rewards/margins": 0.11876191198825836, "rewards/rejected": -0.4629236161708832, "step": 5560 }, { "epoch": 0.41, "learning_rate": 1.4668701972539456e-06, "logits/chosen": -2.0398666858673096, "logits/rejected": -1.6541376113891602, "logps/chosen": -443.53302001953125, "logps/rejected": -638.2113037109375, "loss": 0.6739, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2696869373321533, "rewards/margins": 0.2049603909254074, "rewards/rejected": -0.4746473431587219, "step": 5570 }, { "epoch": 0.41, "learning_rate": 1.4645916325148064e-06, "logits/chosen": -2.1519901752471924, "logits/rejected": -1.7204627990722656, "logps/chosen": -456.32586669921875, "logps/rejected": -662.8348388671875, "loss": 0.6717, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.26834937930107117, "rewards/margins": 0.23176249861717224, "rewards/rejected": -0.5001119375228882, "step": 5580 }, { "epoch": 0.41, "learning_rate": 1.462309987567735e-06, "logits/chosen": -2.0160179138183594, "logits/rejected": -1.6904098987579346, "logps/chosen": -492.3265075683594, "logps/rejected": -619.5763549804688, "loss": 0.6802, "rewards/accuracies": 0.625, "rewards/chosen": -0.3206906318664551, "rewards/margins": 0.1302148401737213, "rewards/rejected": -0.4509054720401764, "step": 5590 }, { "epoch": 0.41, "learning_rate": 1.4600252775398676e-06, "logits/chosen": -2.049501895904541, "logits/rejected": -1.40622878074646, "logps/chosen": -573.80126953125, "logps/rejected": -726.6141357421875, "loss": 0.6827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3594181537628174, "rewards/margins": 0.23886315524578094, "rewards/rejected": -0.5982813835144043, "step": 5600 }, { "epoch": 0.41, "learning_rate": 1.4577375175786614e-06, "logits/chosen": -1.9575300216674805, "logits/rejected": -1.5053706169128418, "logps/chosen": -421.0606994628906, "logps/rejected": -574.1912231445312, "loss": 0.6816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28681230545043945, "rewards/margins": 0.20112094283103943, "rewards/rejected": -0.4879332184791565, "step": 5610 }, { "epoch": 0.41, "learning_rate": 1.4554467228517951e-06, "logits/chosen": -2.10115385055542, "logits/rejected": -1.6826986074447632, "logps/chosen": -536.7793579101562, "logps/rejected": -598.1024169921875, "loss": 0.6808, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33600369095802307, "rewards/margins": 0.10951602458953857, "rewards/rejected": -0.44551974534988403, "step": 5620 }, { "epoch": 0.42, "learning_rate": 1.453152908547067e-06, "logits/chosen": -2.100231647491455, "logits/rejected": -1.692291259765625, "logps/chosen": -548.15869140625, "logps/rejected": -735.0494384765625, "loss": 0.6837, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3842836618423462, "rewards/margins": 0.1865202635526657, "rewards/rejected": -0.5708039402961731, "step": 5630 }, { "epoch": 0.42, "learning_rate": 1.450856089872295e-06, "logits/chosen": -2.223653793334961, "logits/rejected": -1.8591362237930298, "logps/chosen": -499.86444091796875, "logps/rejected": -632.41259765625, "loss": 0.6813, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.33917731046676636, "rewards/margins": 0.15343722701072693, "rewards/rejected": -0.4926145076751709, "step": 5640 }, { "epoch": 0.42, "learning_rate": 1.448556282055216e-06, "logits/chosen": -1.9818689823150635, "logits/rejected": -1.5078634023666382, "logps/chosen": -745.6876220703125, "logps/rejected": -864.0311279296875, "loss": 0.6796, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5637558102607727, "rewards/margins": 0.17609409987926483, "rewards/rejected": -0.739849865436554, "step": 5650 }, { "epoch": 0.42, "learning_rate": 1.4462535003433845e-06, "logits/chosen": -1.9804767370224, "logits/rejected": -1.3696779012680054, "logps/chosen": -755.8176879882812, "logps/rejected": -889.3248291015625, "loss": 0.6749, "rewards/accuracies": 0.75, "rewards/chosen": -0.5600562691688538, "rewards/margins": 0.22961854934692383, "rewards/rejected": -0.7896747589111328, "step": 5660 }, { "epoch": 0.42, "learning_rate": 1.4439477600040718e-06, "logits/chosen": -1.962601900100708, "logits/rejected": -1.6618419885635376, "logps/chosen": -634.0560913085938, "logps/rejected": -809.6912231445312, "loss": 0.6828, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4768776297569275, "rewards/margins": 0.18544885516166687, "rewards/rejected": -0.6623265147209167, "step": 5670 }, { "epoch": 0.42, "learning_rate": 1.4416390763241648e-06, "logits/chosen": -1.8963959217071533, "logits/rejected": -1.4339402914047241, "logps/chosen": -598.6373291015625, "logps/rejected": -824.7951049804688, "loss": 0.6672, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.45930927991867065, "rewards/margins": 0.24267706274986267, "rewards/rejected": -0.7019864320755005, "step": 5680 }, { "epoch": 0.42, "learning_rate": 1.4393274646100643e-06, "logits/chosen": -1.7532256841659546, "logits/rejected": -1.1986232995986938, "logps/chosen": -604.2467041015625, "logps/rejected": -751.9915771484375, "loss": 0.6784, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4229431748390198, "rewards/margins": 0.20733948051929474, "rewards/rejected": -0.6302827000617981, "step": 5690 }, { "epoch": 0.42, "learning_rate": 1.4370129401875838e-06, "logits/chosen": -2.202859401702881, "logits/rejected": -1.8486850261688232, "logps/chosen": -486.50067138671875, "logps/rejected": -693.4324951171875, "loss": 0.6767, "rewards/accuracies": 0.75, "rewards/chosen": -0.33568075299263, "rewards/margins": 0.21038010716438293, "rewards/rejected": -0.5460608601570129, "step": 5700 }, { "epoch": 0.42, "learning_rate": 1.4346955184018481e-06, "logits/chosen": -2.042574882507324, "logits/rejected": -1.7677648067474365, "logps/chosen": -516.0814208984375, "logps/rejected": -615.560546875, "loss": 0.6809, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.34114331007003784, "rewards/margins": 0.11936931312084198, "rewards/rejected": -0.460512638092041, "step": 5710 }, { "epoch": 0.42, "learning_rate": 1.4323752146171911e-06, "logits/chosen": -2.1073837280273438, "logits/rejected": -1.5659033060073853, "logps/chosen": -484.8963928222656, "logps/rejected": -687.2247924804688, "loss": 0.6757, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.311156690120697, "rewards/margins": 0.22125396132469177, "rewards/rejected": -0.5324106216430664, "step": 5720 }, { "epoch": 0.42, "learning_rate": 1.430052044217054e-06, "logits/chosen": -1.9267657995224, "logits/rejected": -1.433645486831665, "logps/chosen": -477.9681701660156, "logps/rejected": -599.0147705078125, "loss": 0.6816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31256935000419617, "rewards/margins": 0.18512889742851257, "rewards/rejected": -0.49769824743270874, "step": 5730 }, { "epoch": 0.42, "learning_rate": 1.4277260226038835e-06, "logits/chosen": -2.09680438041687, "logits/rejected": -1.597395658493042, "logps/chosen": -460.64398193359375, "logps/rejected": -605.7073974609375, "loss": 0.6812, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3160422444343567, "rewards/margins": 0.17170074582099915, "rewards/rejected": -0.48774299025535583, "step": 5740 }, { "epoch": 0.42, "learning_rate": 1.4253971651990298e-06, "logits/chosen": -1.8986867666244507, "logits/rejected": -1.5283474922180176, "logps/chosen": -596.755126953125, "logps/rejected": -710.1607666015625, "loss": 0.6801, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40252748131752014, "rewards/margins": 0.1568247675895691, "rewards/rejected": -0.5593522191047668, "step": 5750 }, { "epoch": 0.42, "learning_rate": 1.423065487442644e-06, "logits/chosen": -1.778578758239746, "logits/rejected": -1.3769335746765137, "logps/chosen": -493.33392333984375, "logps/rejected": -694.7250366210938, "loss": 0.6795, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34349411725997925, "rewards/margins": 0.22147352993488312, "rewards/rejected": -0.5649676322937012, "step": 5760 }, { "epoch": 0.43, "learning_rate": 1.420731004793576e-06, "logits/chosen": -1.9070895910263062, "logits/rejected": -1.3747327327728271, "logps/chosen": -569.2225952148438, "logps/rejected": -710.1198120117188, "loss": 0.6769, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40008169412612915, "rewards/margins": 0.19921362400054932, "rewards/rejected": -0.5992952585220337, "step": 5770 }, { "epoch": 0.43, "learning_rate": 1.4183937327292725e-06, "logits/chosen": -1.9281126260757446, "logits/rejected": -1.6213572025299072, "logps/chosen": -686.7996215820312, "logps/rejected": -776.1904907226562, "loss": 0.685, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4961264729499817, "rewards/margins": 0.16589322686195374, "rewards/rejected": -0.662019670009613, "step": 5780 }, { "epoch": 0.43, "learning_rate": 1.4160536867456725e-06, "logits/chosen": -1.9436734914779663, "logits/rejected": -1.6506052017211914, "logps/chosen": -689.914306640625, "logps/rejected": -828.51513671875, "loss": 0.6836, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5143817067146301, "rewards/margins": 0.16885881125926971, "rewards/rejected": -0.6832404732704163, "step": 5790 }, { "epoch": 0.43, "learning_rate": 1.4137108823571072e-06, "logits/chosen": -1.9694902896881104, "logits/rejected": -1.7135379314422607, "logps/chosen": -615.59033203125, "logps/rejected": -800.7882690429688, "loss": 0.6801, "rewards/accuracies": 0.75, "rewards/chosen": -0.44897276163101196, "rewards/margins": 0.153789684176445, "rewards/rejected": -0.6027624607086182, "step": 5800 }, { "epoch": 0.43, "learning_rate": 1.4113653350961947e-06, "logits/chosen": -2.016690492630005, "logits/rejected": -1.4688876867294312, "logps/chosen": -474.17242431640625, "logps/rejected": -652.48876953125, "loss": 0.6739, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31891733407974243, "rewards/margins": 0.22124537825584412, "rewards/rejected": -0.5401627421379089, "step": 5810 }, { "epoch": 0.43, "learning_rate": 1.4090170605137393e-06, "logits/chosen": -2.2318482398986816, "logits/rejected": -1.5148884057998657, "logps/chosen": -460.8369140625, "logps/rejected": -635.6646728515625, "loss": 0.6762, "rewards/accuracies": 0.75, "rewards/chosen": -0.2839355766773224, "rewards/margins": 0.20656192302703857, "rewards/rejected": -0.49049749970436096, "step": 5820 }, { "epoch": 0.43, "learning_rate": 1.4066660741786263e-06, "logits/chosen": -2.272021532058716, "logits/rejected": -1.8575305938720703, "logps/chosen": -570.2350463867188, "logps/rejected": -730.8345947265625, "loss": 0.6813, "rewards/accuracies": 0.75, "rewards/chosen": -0.38693079352378845, "rewards/margins": 0.15883375704288483, "rewards/rejected": -0.5457645654678345, "step": 5830 }, { "epoch": 0.43, "learning_rate": 1.40431239167772e-06, "logits/chosen": -1.8284399509429932, "logits/rejected": -1.463731050491333, "logps/chosen": -613.3795166015625, "logps/rejected": -759.0346069335938, "loss": 0.6799, "rewards/accuracies": 0.875, "rewards/chosen": -0.44081321358680725, "rewards/margins": 0.1804133653640747, "rewards/rejected": -0.6212266087532043, "step": 5840 }, { "epoch": 0.43, "learning_rate": 1.4019560286157606e-06, "logits/chosen": -1.8072166442871094, "logits/rejected": -1.425682783126831, "logps/chosen": -660.7337646484375, "logps/rejected": -803.1480712890625, "loss": 0.6825, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.479152113199234, "rewards/margins": 0.1663302630186081, "rewards/rejected": -0.6454824209213257, "step": 5850 }, { "epoch": 0.43, "learning_rate": 1.399597000615259e-06, "logits/chosen": -1.8538662195205688, "logits/rejected": -1.568634033203125, "logps/chosen": -548.2779541015625, "logps/rejected": -699.1036376953125, "loss": 0.6728, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41243380308151245, "rewards/margins": 0.1998777687549591, "rewards/rejected": -0.6123115420341492, "step": 5860 }, { "epoch": 0.43, "learning_rate": 1.3972353233163959e-06, "logits/chosen": -2.1027262210845947, "logits/rejected": -1.5053484439849854, "logps/chosen": -586.3036499023438, "logps/rejected": -770.5653076171875, "loss": 0.6737, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4243689179420471, "rewards/margins": 0.221284419298172, "rewards/rejected": -0.6456533074378967, "step": 5870 }, { "epoch": 0.43, "learning_rate": 1.3948710123769153e-06, "logits/chosen": -2.103522539138794, "logits/rejected": -1.5769665241241455, "logps/chosen": -527.64208984375, "logps/rejected": -721.6369018554688, "loss": 0.6799, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36479371786117554, "rewards/margins": 0.20637957751750946, "rewards/rejected": -0.5711733102798462, "step": 5880 }, { "epoch": 0.43, "learning_rate": 1.3925040834720234e-06, "logits/chosen": -1.8823391199111938, "logits/rejected": -1.2064802646636963, "logps/chosen": -420.298828125, "logps/rejected": -608.7987060546875, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2624308466911316, "rewards/margins": 0.2471359223127365, "rewards/rejected": -0.5095667839050293, "step": 5890 }, { "epoch": 0.44, "learning_rate": 1.3901345522942821e-06, "logits/chosen": -2.200453996658325, "logits/rejected": -1.5004403591156006, "logps/chosen": -439.07916259765625, "logps/rejected": -564.1845703125, "loss": 0.6782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.26179638504981995, "rewards/margins": 0.1906386762857437, "rewards/rejected": -0.4524350166320801, "step": 5900 }, { "epoch": 0.44, "learning_rate": 1.387762434553507e-06, "logits/chosen": -2.159097909927368, "logits/rejected": -1.5546271800994873, "logps/chosen": -341.0003662109375, "logps/rejected": -496.5472106933594, "loss": 0.678, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20611736178398132, "rewards/margins": 0.1979057937860489, "rewards/rejected": -0.4040231704711914, "step": 5910 }, { "epoch": 0.44, "learning_rate": 1.3853877459766618e-06, "logits/chosen": -2.113694667816162, "logits/rejected": -1.4019198417663574, "logps/chosen": -469.8990783691406, "logps/rejected": -539.2742919921875, "loss": 0.6833, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2615274488925934, "rewards/margins": 0.1536598652601242, "rewards/rejected": -0.4151872992515564, "step": 5920 }, { "epoch": 0.44, "learning_rate": 1.3830105023077554e-06, "logits/chosen": -2.1536202430725098, "logits/rejected": -1.615041971206665, "logps/chosen": -376.25762939453125, "logps/rejected": -487.1012268066406, "loss": 0.6827, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21507656574249268, "rewards/margins": 0.16424132883548737, "rewards/rejected": -0.37931790947914124, "step": 5930 }, { "epoch": 0.44, "learning_rate": 1.3806307193077357e-06, "logits/chosen": -2.2638423442840576, "logits/rejected": -2.0442445278167725, "logps/chosen": -452.62255859375, "logps/rejected": -516.2122802734375, "loss": 0.6831, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.255096971988678, "rewards/margins": 0.09901443868875504, "rewards/rejected": -0.35411137342453003, "step": 5940 }, { "epoch": 0.44, "learning_rate": 1.3782484127543875e-06, "logits/chosen": -2.1454272270202637, "logits/rejected": -1.7814712524414062, "logps/chosen": -429.8060607910156, "logps/rejected": -565.2830810546875, "loss": 0.6854, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2863554060459137, "rewards/margins": 0.10969217121601105, "rewards/rejected": -0.39604753255844116, "step": 5950 }, { "epoch": 0.44, "learning_rate": 1.375863598442225e-06, "logits/chosen": -2.1238183975219727, "logits/rejected": -1.568846583366394, "logps/chosen": -366.221435546875, "logps/rejected": -472.385986328125, "loss": 0.6783, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.2379618138074875, "rewards/margins": 0.1497749537229538, "rewards/rejected": -0.3877367377281189, "step": 5960 }, { "epoch": 0.44, "learning_rate": 1.3734762921823902e-06, "logits/chosen": -1.8821020126342773, "logits/rejected": -1.586446762084961, "logps/chosen": -482.765625, "logps/rejected": -633.6439208984375, "loss": 0.6772, "rewards/accuracies": 0.75, "rewards/chosen": -0.35191452503204346, "rewards/margins": 0.15607963502407074, "rewards/rejected": -0.5079942345619202, "step": 5970 }, { "epoch": 0.44, "learning_rate": 1.371086509802546e-06, "logits/chosen": -2.005577564239502, "logits/rejected": -1.685694694519043, "logps/chosen": -475.3453674316406, "logps/rejected": -593.5455932617188, "loss": 0.6829, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3346400260925293, "rewards/margins": 0.1359376311302185, "rewards/rejected": -0.4705776572227478, "step": 5980 }, { "epoch": 0.44, "learning_rate": 1.3686942671467709e-06, "logits/chosen": -2.0033352375030518, "logits/rejected": -1.7309544086456299, "logps/chosen": -482.9344177246094, "logps/rejected": -595.7747802734375, "loss": 0.6805, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.32413992285728455, "rewards/margins": 0.13286015391349792, "rewards/rejected": -0.45700007677078247, "step": 5990 }, { "epoch": 0.44, "learning_rate": 1.3662995800754561e-06, "logits/chosen": -1.9662678241729736, "logits/rejected": -1.5755388736724854, "logps/chosen": -453.06036376953125, "logps/rejected": -583.8494262695312, "loss": 0.6805, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3091648817062378, "rewards/margins": 0.1618398129940033, "rewards/rejected": -0.4710047245025635, "step": 6000 }, { "epoch": 0.44, "learning_rate": 1.3639024644651988e-06, "logits/chosen": -2.0783851146698, "logits/rejected": -1.605780839920044, "logps/chosen": -544.902587890625, "logps/rejected": -704.4365234375, "loss": 0.68, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4071398675441742, "rewards/margins": 0.1846105456352234, "rewards/rejected": -0.59175044298172, "step": 6010 }, { "epoch": 0.44, "learning_rate": 1.3615029362086965e-06, "logits/chosen": -1.8813356161117554, "logits/rejected": -1.3042521476745605, "logps/chosen": -586.8213500976562, "logps/rejected": -720.1536254882812, "loss": 0.6786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4138125479221344, "rewards/margins": 0.17290091514587402, "rewards/rejected": -0.586713433265686, "step": 6020 }, { "epoch": 0.44, "learning_rate": 1.3591010112146434e-06, "logits/chosen": -2.1586058139801025, "logits/rejected": -1.5369187593460083, "logps/chosen": -523.179931640625, "logps/rejected": -704.6216430664062, "loss": 0.6727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35038304328918457, "rewards/margins": 0.2172560691833496, "rewards/rejected": -0.5676391124725342, "step": 6030 }, { "epoch": 0.45, "learning_rate": 1.3566967054076228e-06, "logits/chosen": -2.2433218955993652, "logits/rejected": -1.619066834449768, "logps/chosen": -565.6881103515625, "logps/rejected": -733.6306762695312, "loss": 0.6734, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.36569905281066895, "rewards/margins": 0.24895429611206055, "rewards/rejected": -0.6146533489227295, "step": 6040 }, { "epoch": 0.45, "learning_rate": 1.3542900347280038e-06, "logits/chosen": -2.0420117378234863, "logits/rejected": -1.6915979385375977, "logps/chosen": -509.38031005859375, "logps/rejected": -625.9391479492188, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3436335027217865, "rewards/margins": 0.11970391124486923, "rewards/rejected": -0.46333742141723633, "step": 6050 }, { "epoch": 0.45, "learning_rate": 1.351881015131833e-06, "logits/chosen": -2.0228798389434814, "logits/rejected": -1.500718116760254, "logps/chosen": -632.2930297851562, "logps/rejected": -731.36962890625, "loss": 0.6817, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4704377055168152, "rewards/margins": 0.1519809365272522, "rewards/rejected": -0.6224186420440674, "step": 6060 }, { "epoch": 0.45, "learning_rate": 1.3494696625907316e-06, "logits/chosen": -2.1017355918884277, "logits/rejected": -1.5148502588272095, "logps/chosen": -598.6015625, "logps/rejected": -784.0311279296875, "loss": 0.6741, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4115746021270752, "rewards/margins": 0.23098953068256378, "rewards/rejected": -0.642564058303833, "step": 6070 }, { "epoch": 0.45, "learning_rate": 1.3470559930917876e-06, "logits/chosen": -1.9456275701522827, "logits/rejected": -1.5212461948394775, "logps/chosen": -549.07568359375, "logps/rejected": -739.7064208984375, "loss": 0.678, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43626147508621216, "rewards/margins": 0.18980272114276886, "rewards/rejected": -0.6260641813278198, "step": 6080 }, { "epoch": 0.45, "learning_rate": 1.3446400226374492e-06, "logits/chosen": -1.9338840246200562, "logits/rejected": -1.663029670715332, "logps/chosen": -556.1005859375, "logps/rejected": -675.072509765625, "loss": 0.6792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4296509623527527, "rewards/margins": 0.14168691635131836, "rewards/rejected": -0.571337878704071, "step": 6090 }, { "epoch": 0.45, "learning_rate": 1.3422217672454216e-06, "logits/chosen": -2.1386399269104004, "logits/rejected": -1.5950251817703247, "logps/chosen": -492.83770751953125, "logps/rejected": -618.4047241210938, "loss": 0.6776, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3088180720806122, "rewards/margins": 0.16717325150966644, "rewards/rejected": -0.47599130868911743, "step": 6100 }, { "epoch": 0.45, "learning_rate": 1.3398012429485572e-06, "logits/chosen": -2.138864040374756, "logits/rejected": -1.5615549087524414, "logps/chosen": -505.00067138671875, "logps/rejected": -620.8656005859375, "loss": 0.6807, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3264150023460388, "rewards/margins": 0.15983109176158905, "rewards/rejected": -0.4862460494041443, "step": 6110 }, { "epoch": 0.45, "learning_rate": 1.3373784657947522e-06, "logits/chosen": -1.9268337488174438, "logits/rejected": -1.579787015914917, "logps/chosen": -434.95849609375, "logps/rejected": -701.113037109375, "loss": 0.677, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30134180188179016, "rewards/margins": 0.2532650828361511, "rewards/rejected": -0.5546067953109741, "step": 6120 }, { "epoch": 0.45, "learning_rate": 1.3349534518468391e-06, "logits/chosen": -2.0919620990753174, "logits/rejected": -1.4542062282562256, "logps/chosen": -587.8707885742188, "logps/rejected": -765.8977661132812, "loss": 0.6778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.37833911180496216, "rewards/margins": 0.2577752470970154, "rewards/rejected": -0.6361143589019775, "step": 6130 }, { "epoch": 0.45, "learning_rate": 1.3325262171824791e-06, "logits/chosen": -1.9984972476959229, "logits/rejected": -1.5068827867507935, "logps/chosen": -606.2144165039062, "logps/rejected": -727.6384887695312, "loss": 0.6809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4422079026699066, "rewards/margins": 0.1667374223470688, "rewards/rejected": -0.6089453101158142, "step": 6140 }, { "epoch": 0.45, "learning_rate": 1.330096777894058e-06, "logits/chosen": -1.95888352394104, "logits/rejected": -1.1844741106033325, "logps/chosen": -546.8136596679688, "logps/rejected": -775.1696166992188, "loss": 0.6727, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.41051191091537476, "rewards/margins": 0.25992804765701294, "rewards/rejected": -0.6704399585723877, "step": 6150 }, { "epoch": 0.45, "learning_rate": 1.3276651500885768e-06, "logits/chosen": -2.056654930114746, "logits/rejected": -1.4428794384002686, "logps/chosen": -648.9431762695312, "logps/rejected": -825.9684448242188, "loss": 0.6753, "rewards/accuracies": 0.75, "rewards/chosen": -0.47783559560775757, "rewards/margins": 0.21049292385578156, "rewards/rejected": -0.6883284449577332, "step": 6160 }, { "epoch": 0.46, "learning_rate": 1.325231349887547e-06, "logits/chosen": -1.970009207725525, "logits/rejected": -1.370725154876709, "logps/chosen": -705.22607421875, "logps/rejected": -873.5283203125, "loss": 0.6781, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5218231081962585, "rewards/margins": 0.20635804533958435, "rewards/rejected": -0.7281811833381653, "step": 6170 }, { "epoch": 0.46, "learning_rate": 1.3227953934268832e-06, "logits/chosen": -1.9422699213027954, "logits/rejected": -1.5812890529632568, "logps/chosen": -624.7861328125, "logps/rejected": -760.4002685546875, "loss": 0.6825, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.48058661818504333, "rewards/margins": 0.1491769254207611, "rewards/rejected": -0.6297634840011597, "step": 6180 }, { "epoch": 0.46, "learning_rate": 1.3203572968567944e-06, "logits/chosen": -1.7663942575454712, "logits/rejected": -1.3293068408966064, "logps/chosen": -660.2886962890625, "logps/rejected": -777.8757934570312, "loss": 0.679, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4856569170951843, "rewards/margins": 0.17902657389640808, "rewards/rejected": -0.66468346118927, "step": 6190 }, { "epoch": 0.46, "learning_rate": 1.3179170763416804e-06, "logits/chosen": -2.0364491939544678, "logits/rejected": -1.488630771636963, "logps/chosen": -660.1956787109375, "logps/rejected": -791.7271728515625, "loss": 0.6813, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.46665510535240173, "rewards/margins": 0.18630675971508026, "rewards/rejected": -0.6529619097709656, "step": 6200 }, { "epoch": 0.46, "learning_rate": 1.3154747480600208e-06, "logits/chosen": -2.0203990936279297, "logits/rejected": -1.7177941799163818, "logps/chosen": -658.2124633789062, "logps/rejected": -775.9713134765625, "loss": 0.6813, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.5018032193183899, "rewards/margins": 0.16150274872779846, "rewards/rejected": -0.6633058786392212, "step": 6210 }, { "epoch": 0.46, "learning_rate": 1.3130303282042703e-06, "logits/chosen": -2.0323503017425537, "logits/rejected": -1.7156431674957275, "logps/chosen": -624.935302734375, "logps/rejected": -743.817626953125, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4513516426086426, "rewards/margins": 0.164527028799057, "rewards/rejected": -0.615878701210022, "step": 6220 }, { "epoch": 0.46, "learning_rate": 1.310583832980751e-06, "logits/chosen": -1.7947067022323608, "logits/rejected": -1.3507503271102905, "logps/chosen": -632.3889770507812, "logps/rejected": -752.5811157226562, "loss": 0.6819, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4916454255580902, "rewards/margins": 0.14238888025283813, "rewards/rejected": -0.6340343356132507, "step": 6230 }, { "epoch": 0.46, "learning_rate": 1.3081352786095434e-06, "logits/chosen": -2.0401968955993652, "logits/rejected": -1.6509777307510376, "logps/chosen": -585.1702880859375, "logps/rejected": -684.5335693359375, "loss": 0.6827, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.45648202300071716, "rewards/margins": 0.12340178340673447, "rewards/rejected": -0.5798837542533875, "step": 6240 }, { "epoch": 0.46, "learning_rate": 1.3056846813243815e-06, "logits/chosen": -1.8307173252105713, "logits/rejected": -1.4943910837173462, "logps/chosen": -608.1192626953125, "logps/rejected": -731.620361328125, "loss": 0.6769, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4275299608707428, "rewards/margins": 0.15836381912231445, "rewards/rejected": -0.5858937501907349, "step": 6250 }, { "epoch": 0.46, "learning_rate": 1.3032320573725422e-06, "logits/chosen": -1.972992181777954, "logits/rejected": -1.7126588821411133, "logps/chosen": -475.3426818847656, "logps/rejected": -605.9652099609375, "loss": 0.6819, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3461971580982208, "rewards/margins": 0.11700750887393951, "rewards/rejected": -0.46320468187332153, "step": 6260 }, { "epoch": 0.46, "learning_rate": 1.30077742301474e-06, "logits/chosen": -2.1744790077209473, "logits/rejected": -1.5530457496643066, "logps/chosen": -474.4832458496094, "logps/rejected": -677.3958129882812, "loss": 0.6728, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31777045130729675, "rewards/margins": 0.23808464407920837, "rewards/rejected": -0.5558550953865051, "step": 6270 }, { "epoch": 0.46, "learning_rate": 1.2983207945250177e-06, "logits/chosen": -2.040515422821045, "logits/rejected": -1.5992939472198486, "logps/chosen": -566.5745849609375, "logps/rejected": -636.104736328125, "loss": 0.6844, "rewards/accuracies": 0.625, "rewards/chosen": -0.3906291425228119, "rewards/margins": 0.12666267156600952, "rewards/rejected": -0.5172918438911438, "step": 6280 }, { "epoch": 0.46, "learning_rate": 1.2958621881906394e-06, "logits/chosen": -2.0320425033569336, "logits/rejected": -1.7175281047821045, "logps/chosen": -600.3284912109375, "logps/rejected": -716.5450439453125, "loss": 0.6801, "rewards/accuracies": 0.75, "rewards/chosen": -0.42791956663131714, "rewards/margins": 0.1510908156633377, "rewards/rejected": -0.5790103673934937, "step": 6290 }, { "epoch": 0.46, "learning_rate": 1.2934016203119823e-06, "logits/chosen": -2.065382719039917, "logits/rejected": -1.5009236335754395, "logps/chosen": -596.4405517578125, "logps/rejected": -845.5657348632812, "loss": 0.6746, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4203696846961975, "rewards/margins": 0.2696375548839569, "rewards/rejected": -0.6900071501731873, "step": 6300 }, { "epoch": 0.47, "learning_rate": 1.2909391072024276e-06, "logits/chosen": -1.8078300952911377, "logits/rejected": -1.3970433473587036, "logps/chosen": -560.9727783203125, "logps/rejected": -697.906982421875, "loss": 0.6792, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4306279122829437, "rewards/margins": 0.13505946099758148, "rewards/rejected": -0.5656872987747192, "step": 6310 }, { "epoch": 0.47, "learning_rate": 1.2884746651882543e-06, "logits/chosen": -2.100630521774292, "logits/rejected": -1.6386215686798096, "logps/chosen": -559.5985107421875, "logps/rejected": -684.25439453125, "loss": 0.6819, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3877354860305786, "rewards/margins": 0.14527356624603271, "rewards/rejected": -0.5330090522766113, "step": 6320 }, { "epoch": 0.47, "learning_rate": 1.2860083106085296e-06, "logits/chosen": -1.9729490280151367, "logits/rejected": -1.373883843421936, "logps/chosen": -647.218994140625, "logps/rejected": -819.5685424804688, "loss": 0.6744, "rewards/accuracies": 0.75, "rewards/chosen": -0.4761982560157776, "rewards/margins": 0.2192036211490631, "rewards/rejected": -0.6954019069671631, "step": 6330 }, { "epoch": 0.47, "learning_rate": 1.2835400598150003e-06, "logits/chosen": -2.067086696624756, "logits/rejected": -1.4320950508117676, "logps/chosen": -612.30908203125, "logps/rejected": -805.825439453125, "loss": 0.6712, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.44070595502853394, "rewards/margins": 0.24399831891059875, "rewards/rejected": -0.6847043037414551, "step": 6340 }, { "epoch": 0.47, "learning_rate": 1.281069929171986e-06, "logits/chosen": -1.8960683345794678, "logits/rejected": -1.3133022785186768, "logps/chosen": -543.4031372070312, "logps/rejected": -751.0497436523438, "loss": 0.6704, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3822365701198578, "rewards/margins": 0.27904000878334045, "rewards/rejected": -0.661276638507843, "step": 6350 }, { "epoch": 0.47, "learning_rate": 1.2785979350562681e-06, "logits/chosen": -2.0440235137939453, "logits/rejected": -1.4590394496917725, "logps/chosen": -552.556884765625, "logps/rejected": -681.4286499023438, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.36493366956710815, "rewards/margins": 0.18902496993541718, "rewards/rejected": -0.5539586544036865, "step": 6360 }, { "epoch": 0.47, "learning_rate": 1.2761240938569839e-06, "logits/chosen": -2.129929780960083, "logits/rejected": -1.391050934791565, "logps/chosen": -528.9986572265625, "logps/rejected": -738.2189331054688, "loss": 0.6777, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3822290897369385, "rewards/margins": 0.2627372145652771, "rewards/rejected": -0.6449663043022156, "step": 6370 }, { "epoch": 0.47, "learning_rate": 1.2736484219755168e-06, "logits/chosen": -1.765912413597107, "logits/rejected": -1.3228662014007568, "logps/chosen": -766.6136474609375, "logps/rejected": -886.5234375, "loss": 0.6834, "rewards/accuracies": 0.75, "rewards/chosen": -0.590294361114502, "rewards/margins": 0.16999807953834534, "rewards/rejected": -0.7602925300598145, "step": 6380 }, { "epoch": 0.47, "learning_rate": 1.2711709358253867e-06, "logits/chosen": -2.018888473510742, "logits/rejected": -1.6018568277359009, "logps/chosen": -575.6990966796875, "logps/rejected": -730.8104248046875, "loss": 0.6752, "rewards/accuracies": 0.625, "rewards/chosen": -0.40960708260536194, "rewards/margins": 0.18516862392425537, "rewards/rejected": -0.5947757363319397, "step": 6390 }, { "epoch": 0.47, "learning_rate": 1.2686916518321422e-06, "logits/chosen": -2.059420347213745, "logits/rejected": -1.5967543125152588, "logps/chosen": -527.1500244140625, "logps/rejected": -694.0020751953125, "loss": 0.6782, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33196625113487244, "rewards/margins": 0.1906048208475113, "rewards/rejected": -0.5225710272789001, "step": 6400 }, { "epoch": 0.47, "learning_rate": 1.266210586433252e-06, "logits/chosen": -1.778092622756958, "logits/rejected": -1.475162386894226, "logps/chosen": -617.8130493164062, "logps/rejected": -742.2760620117188, "loss": 0.6814, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4543870985507965, "rewards/margins": 0.12179821729660034, "rewards/rejected": -0.576185405254364, "step": 6410 }, { "epoch": 0.47, "learning_rate": 1.2637277560779941e-06, "logits/chosen": -1.996260404586792, "logits/rejected": -1.3950804471969604, "logps/chosen": -459.78759765625, "logps/rejected": -570.73291015625, "loss": 0.6807, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.31607139110565186, "rewards/margins": 0.15055465698242188, "rewards/rejected": -0.46662601828575134, "step": 6420 }, { "epoch": 0.47, "learning_rate": 1.26124317722735e-06, "logits/chosen": -1.8809871673583984, "logits/rejected": -1.4337605237960815, "logps/chosen": -534.1637573242188, "logps/rejected": -708.265869140625, "loss": 0.6806, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3995091915130615, "rewards/margins": 0.21556317806243896, "rewards/rejected": -0.6150724291801453, "step": 6430 }, { "epoch": 0.48, "learning_rate": 1.2587568663538916e-06, "logits/chosen": -1.9989843368530273, "logits/rejected": -1.7063862085342407, "logps/chosen": -507.2416076660156, "logps/rejected": -686.0325927734375, "loss": 0.6791, "rewards/accuracies": 0.75, "rewards/chosen": -0.362857848405838, "rewards/margins": 0.186348095536232, "rewards/rejected": -0.5492058992385864, "step": 6440 }, { "epoch": 0.48, "learning_rate": 1.2562688399416752e-06, "logits/chosen": -1.9594495296478271, "logits/rejected": -1.608520746231079, "logps/chosen": -587.7495727539062, "logps/rejected": -752.622314453125, "loss": 0.6764, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4454287886619568, "rewards/margins": 0.17110487818717957, "rewards/rejected": -0.616533637046814, "step": 6450 }, { "epoch": 0.48, "learning_rate": 1.2537791144861306e-06, "logits/chosen": -2.012868881225586, "logits/rejected": -1.7341670989990234, "logps/chosen": -596.1527099609375, "logps/rejected": -686.42578125, "loss": 0.6818, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.453549861907959, "rewards/margins": 0.12084458023309708, "rewards/rejected": -0.5743944644927979, "step": 6460 }, { "epoch": 0.48, "learning_rate": 1.2512877064939526e-06, "logits/chosen": -2.0576839447021484, "logits/rejected": -1.3405307531356812, "logps/chosen": -546.2900390625, "logps/rejected": -760.0919189453125, "loss": 0.6806, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.41873782873153687, "rewards/margins": 0.23287948966026306, "rewards/rejected": -0.6516173481941223, "step": 6470 }, { "epoch": 0.48, "learning_rate": 1.2487946324829903e-06, "logits/chosen": -1.9569212198257446, "logits/rejected": -1.4665840864181519, "logps/chosen": -507.48419189453125, "logps/rejected": -685.4727783203125, "loss": 0.6763, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3782917261123657, "rewards/margins": 0.21471071243286133, "rewards/rejected": -0.593002438545227, "step": 6480 }, { "epoch": 0.48, "learning_rate": 1.2462999089821386e-06, "logits/chosen": -2.194420576095581, "logits/rejected": -1.3900529146194458, "logps/chosen": -465.20965576171875, "logps/rejected": -696.5778198242188, "loss": 0.6773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.28551265597343445, "rewards/margins": 0.29099977016448975, "rewards/rejected": -0.5765124559402466, "step": 6490 }, { "epoch": 0.48, "learning_rate": 1.243803552531229e-06, "logits/chosen": -1.9994758367538452, "logits/rejected": -1.4862468242645264, "logps/chosen": -513.4645385742188, "logps/rejected": -707.9049682617188, "loss": 0.6744, "rewards/accuracies": 0.75, "rewards/chosen": -0.36762186884880066, "rewards/margins": 0.19736014306545258, "rewards/rejected": -0.5649820566177368, "step": 6500 }, { "epoch": 0.48, "learning_rate": 1.2413055796809187e-06, "logits/chosen": -2.0400502681732178, "logits/rejected": -1.688737154006958, "logps/chosen": -493.1700134277344, "logps/rejected": -653.768310546875, "loss": 0.6791, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3435494303703308, "rewards/margins": 0.18467405438423157, "rewards/rejected": -0.5282235145568848, "step": 6510 }, { "epoch": 0.48, "learning_rate": 1.2388060069925815e-06, "logits/chosen": -1.8829008340835571, "logits/rejected": -1.5555776357650757, "logps/chosen": -525.1331176757812, "logps/rejected": -633.1958618164062, "loss": 0.6849, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3896457850933075, "rewards/margins": 0.1405377835035324, "rewards/rejected": -0.5301835536956787, "step": 6520 }, { "epoch": 0.48, "learning_rate": 1.236304851038199e-06, "logits/chosen": -2.071373224258423, "logits/rejected": -1.5028789043426514, "logps/chosen": -538.6671142578125, "logps/rejected": -724.2014770507812, "loss": 0.6769, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35342639684677124, "rewards/margins": 0.22368040680885315, "rewards/rejected": -0.577106773853302, "step": 6530 }, { "epoch": 0.48, "learning_rate": 1.2338021284002475e-06, "logits/chosen": -2.2797493934631348, "logits/rejected": -1.881195068359375, "logps/chosen": -528.4446411132812, "logps/rejected": -529.8223876953125, "loss": 0.6875, "rewards/accuracies": 0.5, "rewards/chosen": -0.3487319350242615, "rewards/margins": 0.06543564051389694, "rewards/rejected": -0.414167582988739, "step": 6540 }, { "epoch": 0.48, "learning_rate": 1.231297855671593e-06, "logits/chosen": -2.1051108837127686, "logits/rejected": -1.911505103111267, "logps/chosen": -396.0923156738281, "logps/rejected": -565.3243408203125, "loss": 0.6826, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2563644051551819, "rewards/margins": 0.15096047520637512, "rewards/rejected": -0.407324880361557, "step": 6550 }, { "epoch": 0.48, "learning_rate": 1.228792049455377e-06, "logits/chosen": -2.167468547821045, "logits/rejected": -1.6618179082870483, "logps/chosen": -501.76898193359375, "logps/rejected": -609.2298583984375, "loss": 0.6828, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30176302790641785, "rewards/margins": 0.18388591706752777, "rewards/rejected": -0.4856489300727844, "step": 6560 }, { "epoch": 0.48, "learning_rate": 1.2262847263649078e-06, "logits/chosen": -1.9011356830596924, "logits/rejected": -1.4950236082077026, "logps/chosen": -560.4290771484375, "logps/rejected": -636.47021484375, "loss": 0.683, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3620612919330597, "rewards/margins": 0.12638024985790253, "rewards/rejected": -0.488441526889801, "step": 6570 }, { "epoch": 0.49, "learning_rate": 1.2237759030235513e-06, "logits/chosen": -1.9690567255020142, "logits/rejected": -1.4544445276260376, "logps/chosen": -497.41961669921875, "logps/rejected": -643.8555908203125, "loss": 0.6809, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3396759629249573, "rewards/margins": 0.17460528016090393, "rewards/rejected": -0.5142812728881836, "step": 6580 }, { "epoch": 0.49, "learning_rate": 1.2212655960646189e-06, "logits/chosen": -1.9392292499542236, "logits/rejected": -1.5736572742462158, "logps/chosen": -643.81982421875, "logps/rejected": -798.4735107421875, "loss": 0.6804, "rewards/accuracies": 0.75, "rewards/chosen": -0.4982185363769531, "rewards/margins": 0.16471776366233826, "rewards/rejected": -0.662936270236969, "step": 6590 }, { "epoch": 0.49, "learning_rate": 1.218753822131259e-06, "logits/chosen": -1.8572442531585693, "logits/rejected": -1.2784183025360107, "logps/chosen": -575.86767578125, "logps/rejected": -778.8348388671875, "loss": 0.6738, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41426700353622437, "rewards/margins": 0.25238311290740967, "rewards/rejected": -0.666650116443634, "step": 6600 }, { "epoch": 0.49, "learning_rate": 1.2162405978763459e-06, "logits/chosen": -2.032919406890869, "logits/rejected": -1.439664602279663, "logps/chosen": -667.5518188476562, "logps/rejected": -825.2806396484375, "loss": 0.6741, "rewards/accuracies": 0.75, "rewards/chosen": -0.45383915305137634, "rewards/margins": 0.2178686112165451, "rewards/rejected": -0.6717076897621155, "step": 6610 }, { "epoch": 0.49, "learning_rate": 1.2137259399623693e-06, "logits/chosen": -2.0464606285095215, "logits/rejected": -1.777965784072876, "logps/chosen": -521.8895263671875, "logps/rejected": -659.5213012695312, "loss": 0.6811, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36563530564308167, "rewards/margins": 0.15401937067508698, "rewards/rejected": -0.5196546316146851, "step": 6620 }, { "epoch": 0.49, "learning_rate": 1.211209865061324e-06, "logits/chosen": -2.1244919300079346, "logits/rejected": -1.5791393518447876, "logps/chosen": -512.9788818359375, "logps/rejected": -656.3749389648438, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.324904203414917, "rewards/margins": 0.19005845487117767, "rewards/rejected": -0.5149626731872559, "step": 6630 }, { "epoch": 0.49, "learning_rate": 1.2086923898545988e-06, "logits/chosen": -1.9532997608184814, "logits/rejected": -1.4492170810699463, "logps/chosen": -490.448974609375, "logps/rejected": -658.2847290039062, "loss": 0.6795, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3218769133090973, "rewards/margins": 0.21906426548957825, "rewards/rejected": -0.5409411191940308, "step": 6640 }, { "epoch": 0.49, "learning_rate": 1.2061735310328678e-06, "logits/chosen": -2.154890537261963, "logits/rejected": -1.863607406616211, "logps/chosen": -528.7138671875, "logps/rejected": -603.8008422851562, "loss": 0.6847, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37687093019485474, "rewards/margins": 0.10462850332260132, "rewards/rejected": -0.48149949312210083, "step": 6650 }, { "epoch": 0.49, "learning_rate": 1.2036533052959763e-06, "logits/chosen": -2.1992573738098145, "logits/rejected": -1.5601474046707153, "logps/chosen": -565.6810302734375, "logps/rejected": -735.594970703125, "loss": 0.6787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3571372330188751, "rewards/margins": 0.22459551692008972, "rewards/rejected": -0.5817327499389648, "step": 6660 }, { "epoch": 0.49, "learning_rate": 1.201131729352834e-06, "logits/chosen": -1.9970417022705078, "logits/rejected": -1.2828041315078735, "logps/chosen": -433.3385314941406, "logps/rejected": -717.0220336914062, "loss": 0.6739, "rewards/accuracies": 0.875, "rewards/chosen": -0.2943017780780792, "rewards/margins": 0.31461119651794434, "rewards/rejected": -0.6089129447937012, "step": 6670 }, { "epoch": 0.49, "learning_rate": 1.1986088199213016e-06, "logits/chosen": -2.2545504570007324, "logits/rejected": -1.6994645595550537, "logps/chosen": -493.75189208984375, "logps/rejected": -621.2471313476562, "loss": 0.6737, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29963093996047974, "rewards/margins": 0.191985622048378, "rewards/rejected": -0.4916165769100189, "step": 6680 }, { "epoch": 0.49, "learning_rate": 1.1960845937280805e-06, "logits/chosen": -2.219055652618408, "logits/rejected": -1.6189359426498413, "logps/chosen": -552.3756713867188, "logps/rejected": -662.29052734375, "loss": 0.6783, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.335493266582489, "rewards/margins": 0.19524462521076202, "rewards/rejected": -0.5307378768920898, "step": 6690 }, { "epoch": 0.49, "learning_rate": 1.193559067508603e-06, "logits/chosen": -2.1299502849578857, "logits/rejected": -1.6706397533416748, "logps/chosen": -638.6701049804688, "logps/rejected": -777.5236206054688, "loss": 0.6761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43202224373817444, "rewards/margins": 0.19856300950050354, "rewards/rejected": -0.630585253238678, "step": 6700 }, { "epoch": 0.49, "learning_rate": 1.1910322580069192e-06, "logits/chosen": -2.038447618484497, "logits/rejected": -1.7339662313461304, "logps/chosen": -565.5980224609375, "logps/rejected": -690.6107177734375, "loss": 0.6814, "rewards/accuracies": 0.75, "rewards/chosen": -0.40603309869766235, "rewards/margins": 0.14836105704307556, "rewards/rejected": -0.5543941259384155, "step": 6710 }, { "epoch": 0.5, "learning_rate": 1.1885041819755884e-06, "logits/chosen": -1.6005750894546509, "logits/rejected": -1.2284924983978271, "logps/chosen": -646.603759765625, "logps/rejected": -746.7529296875, "loss": 0.6839, "rewards/accuracies": 0.75, "rewards/chosen": -0.46997517347335815, "rewards/margins": 0.14106935262680054, "rewards/rejected": -0.6110445261001587, "step": 6720 }, { "epoch": 0.5, "learning_rate": 1.185974856175566e-06, "logits/chosen": -1.9607152938842773, "logits/rejected": -1.5487416982650757, "logps/chosen": -515.3909301757812, "logps/rejected": -656.1471557617188, "loss": 0.6823, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.38197970390319824, "rewards/margins": 0.14993005990982056, "rewards/rejected": -0.5319098234176636, "step": 6730 }, { "epoch": 0.5, "learning_rate": 1.1834442973760943e-06, "logits/chosen": -2.172532081604004, "logits/rejected": -1.5484740734100342, "logps/chosen": -511.90057373046875, "logps/rejected": -688.150146484375, "loss": 0.6771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33634713292121887, "rewards/margins": 0.2057502269744873, "rewards/rejected": -0.5420973300933838, "step": 6740 }, { "epoch": 0.5, "learning_rate": 1.1809125223545895e-06, "logits/chosen": -1.987168312072754, "logits/rejected": -1.5429117679595947, "logps/chosen": -571.2848510742188, "logps/rejected": -691.9042358398438, "loss": 0.6792, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4119444787502289, "rewards/margins": 0.16262158751487732, "rewards/rejected": -0.5745660662651062, "step": 6750 }, { "epoch": 0.5, "learning_rate": 1.1783795478965315e-06, "logits/chosen": -2.1977884769439697, "logits/rejected": -1.7654975652694702, "logps/chosen": -474.2059020996094, "logps/rejected": -639.9047241210938, "loss": 0.6772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3226342499256134, "rewards/margins": 0.20473012328147888, "rewards/rejected": -0.5273643732070923, "step": 6760 }, { "epoch": 0.5, "learning_rate": 1.175845390795352e-06, "logits/chosen": -2.0620803833007812, "logits/rejected": -1.6004393100738525, "logps/chosen": -442.877685546875, "logps/rejected": -608.1854248046875, "loss": 0.6765, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2950608730316162, "rewards/margins": 0.19210489094257355, "rewards/rejected": -0.48716574907302856, "step": 6770 }, { "epoch": 0.5, "learning_rate": 1.1733100678523246e-06, "logits/chosen": -2.0821759700775146, "logits/rejected": -1.7173998355865479, "logps/chosen": -508.9734802246094, "logps/rejected": -606.1417236328125, "loss": 0.6797, "rewards/accuracies": 0.75, "rewards/chosen": -0.3026062548160553, "rewards/margins": 0.16335682570934296, "rewards/rejected": -0.46596306562423706, "step": 6780 }, { "epoch": 0.5, "learning_rate": 1.1707735958764514e-06, "logits/chosen": -2.109788656234741, "logits/rejected": -1.7937109470367432, "logps/chosen": -469.13665771484375, "logps/rejected": -617.9571533203125, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32346269488334656, "rewards/margins": 0.1296866536140442, "rewards/rejected": -0.45314931869506836, "step": 6790 }, { "epoch": 0.5, "learning_rate": 1.1682359916843523e-06, "logits/chosen": -2.1595029830932617, "logits/rejected": -1.6511375904083252, "logps/chosen": -424.64666748046875, "logps/rejected": -529.8653564453125, "loss": 0.6858, "rewards/accuracies": 0.625, "rewards/chosen": -0.27724918723106384, "rewards/margins": 0.13043145835399628, "rewards/rejected": -0.40768060088157654, "step": 6800 }, { "epoch": 0.5, "learning_rate": 1.1656972721001546e-06, "logits/chosen": -2.096097469329834, "logits/rejected": -1.7050952911376953, "logps/chosen": -483.3096618652344, "logps/rejected": -632.2411499023438, "loss": 0.6793, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36271101236343384, "rewards/margins": 0.17200367152690887, "rewards/rejected": -0.5347146987915039, "step": 6810 }, { "epoch": 0.5, "learning_rate": 1.1631574539553798e-06, "logits/chosen": -2.3289947509765625, "logits/rejected": -1.764039397239685, "logps/chosen": -522.4370727539062, "logps/rejected": -689.6656494140625, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -0.336308091878891, "rewards/margins": 0.1876213252544403, "rewards/rejected": -0.5239294767379761, "step": 6820 }, { "epoch": 0.5, "learning_rate": 1.1606165540888335e-06, "logits/chosen": -2.257168769836426, "logits/rejected": -1.406799077987671, "logps/chosen": -460.8558654785156, "logps/rejected": -578.7871704101562, "loss": 0.6796, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25558412075042725, "rewards/margins": 0.1954813152551651, "rewards/rejected": -0.45106539130210876, "step": 6830 }, { "epoch": 0.5, "learning_rate": 1.1580745893464922e-06, "logits/chosen": -2.22340989112854, "logits/rejected": -1.6719459295272827, "logps/chosen": -449.023193359375, "logps/rejected": -649.5020751953125, "loss": 0.679, "rewards/accuracies": 0.75, "rewards/chosen": -0.2843194901943207, "rewards/margins": 0.22148454189300537, "rewards/rejected": -0.5058040618896484, "step": 6840 }, { "epoch": 0.51, "learning_rate": 1.1555315765813934e-06, "logits/chosen": -1.908624291419983, "logits/rejected": -1.6620479822158813, "logps/chosen": -560.8638916015625, "logps/rejected": -675.44677734375, "loss": 0.682, "rewards/accuracies": 0.625, "rewards/chosen": -0.4114800989627838, "rewards/margins": 0.12247387319803238, "rewards/rejected": -0.5339539647102356, "step": 6850 }, { "epoch": 0.51, "learning_rate": 1.1529875326535216e-06, "logits/chosen": -2.100825071334839, "logits/rejected": -1.5739741325378418, "logps/chosen": -544.6141357421875, "logps/rejected": -699.3502807617188, "loss": 0.6745, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3670700192451477, "rewards/margins": 0.21346323192119598, "rewards/rejected": -0.5805332064628601, "step": 6860 }, { "epoch": 0.51, "learning_rate": 1.1504424744296993e-06, "logits/chosen": -2.195791721343994, "logits/rejected": -1.699009656906128, "logps/chosen": -535.3748779296875, "logps/rejected": -670.6187744140625, "loss": 0.6865, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.366873562335968, "rewards/margins": 0.15793319046497345, "rewards/rejected": -0.5248066782951355, "step": 6870 }, { "epoch": 0.51, "learning_rate": 1.1478964187834735e-06, "logits/chosen": -2.2156577110290527, "logits/rejected": -1.6598602533340454, "logps/chosen": -496.2705993652344, "logps/rejected": -643.0625610351562, "loss": 0.681, "rewards/accuracies": 0.75, "rewards/chosen": -0.3119036555290222, "rewards/margins": 0.1867658495903015, "rewards/rejected": -0.4986695349216461, "step": 6880 }, { "epoch": 0.51, "learning_rate": 1.1453493825950022e-06, "logits/chosen": -2.4358723163604736, "logits/rejected": -1.9335870742797852, "logps/chosen": -495.7666931152344, "logps/rejected": -612.8642578125, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2932356894016266, "rewards/margins": 0.1951208859682083, "rewards/rejected": -0.4883565306663513, "step": 6890 }, { "epoch": 0.51, "learning_rate": 1.1428013827509466e-06, "logits/chosen": -2.2152256965637207, "logits/rejected": -1.7287931442260742, "logps/chosen": -515.7564086914062, "logps/rejected": -612.1822509765625, "loss": 0.6829, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31657564640045166, "rewards/margins": 0.14354142546653748, "rewards/rejected": -0.46011704206466675, "step": 6900 }, { "epoch": 0.51, "learning_rate": 1.1402524361443555e-06, "logits/chosen": -2.011786460876465, "logits/rejected": -1.619124174118042, "logps/chosen": -491.5244140625, "logps/rejected": -608.8510131835938, "loss": 0.6824, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33057349920272827, "rewards/margins": 0.15636803209781647, "rewards/rejected": -0.4869415760040283, "step": 6910 }, { "epoch": 0.51, "learning_rate": 1.1377025596745556e-06, "logits/chosen": -2.1605939865112305, "logits/rejected": -1.573787808418274, "logps/chosen": -514.41943359375, "logps/rejected": -654.876220703125, "loss": 0.6768, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3506915271282196, "rewards/margins": 0.1920115202665329, "rewards/rejected": -0.5427030324935913, "step": 6920 }, { "epoch": 0.51, "learning_rate": 1.1351517702470381e-06, "logits/chosen": -1.9281984567642212, "logits/rejected": -1.2289828062057495, "logps/chosen": -422.98785400390625, "logps/rejected": -647.8358764648438, "loss": 0.6743, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2647842764854431, "rewards/margins": 0.29017069935798645, "rewards/rejected": -0.554955005645752, "step": 6930 }, { "epoch": 0.51, "learning_rate": 1.1326000847733464e-06, "logits/chosen": -2.14762282371521, "logits/rejected": -1.4785023927688599, "logps/chosen": -544.8231811523438, "logps/rejected": -707.2084350585938, "loss": 0.6773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3567548394203186, "rewards/margins": 0.20601733028888702, "rewards/rejected": -0.5627721548080444, "step": 6940 }, { "epoch": 0.51, "learning_rate": 1.1300475201709651e-06, "logits/chosen": -2.12109637260437, "logits/rejected": -1.5757226943969727, "logps/chosen": -516.8363037109375, "logps/rejected": -678.6513671875, "loss": 0.677, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3228755593299866, "rewards/margins": 0.22970375418663025, "rewards/rejected": -0.5525792837142944, "step": 6950 }, { "epoch": 0.51, "learning_rate": 1.1274940933632082e-06, "logits/chosen": -1.9370235204696655, "logits/rejected": -1.5177347660064697, "logps/chosen": -493.96893310546875, "logps/rejected": -634.4525756835938, "loss": 0.6781, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3673302233219147, "rewards/margins": 0.16471166908740997, "rewards/rejected": -0.5320419073104858, "step": 6960 }, { "epoch": 0.51, "learning_rate": 1.124939821279105e-06, "logits/chosen": -1.9379634857177734, "logits/rejected": -1.4685571193695068, "logps/chosen": -491.8189392089844, "logps/rejected": -621.76220703125, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -0.33341795206069946, "rewards/margins": 0.16973352432250977, "rewards/rejected": -0.5031514167785645, "step": 6970 }, { "epoch": 0.51, "learning_rate": 1.1223847208532894e-06, "logits/chosen": -2.087827682495117, "logits/rejected": -1.582301378250122, "logps/chosen": -560.5183715820312, "logps/rejected": -625.9332275390625, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.35830578207969666, "rewards/margins": 0.1389695405960083, "rewards/rejected": -0.49727529287338257, "step": 6980 }, { "epoch": 0.52, "learning_rate": 1.1198288090258862e-06, "logits/chosen": -2.1100311279296875, "logits/rejected": -1.2977802753448486, "logps/chosen": -545.86279296875, "logps/rejected": -709.9996337890625, "loss": 0.6782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3649894595146179, "rewards/margins": 0.20886878669261932, "rewards/rejected": -0.5738582015037537, "step": 6990 }, { "epoch": 0.52, "learning_rate": 1.1172721027424018e-06, "logits/chosen": -2.2353432178497314, "logits/rejected": -1.5719621181488037, "logps/chosen": -510.767822265625, "logps/rejected": -777.4364013671875, "loss": 0.6756, "rewards/accuracies": 0.875, "rewards/chosen": -0.26144641637802124, "rewards/margins": 0.3103755712509155, "rewards/rejected": -0.5718220472335815, "step": 7000 }, { "epoch": 0.52, "learning_rate": 1.1147146189536075e-06, "logits/chosen": -1.9847958087921143, "logits/rejected": -1.5548650026321411, "logps/chosen": -449.1758728027344, "logps/rejected": -625.7825927734375, "loss": 0.6777, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3211378753185272, "rewards/margins": 0.1771378219127655, "rewards/rejected": -0.4982756972312927, "step": 7010 }, { "epoch": 0.52, "learning_rate": 1.1121563746154305e-06, "logits/chosen": -2.00468111038208, "logits/rejected": -1.5983264446258545, "logps/chosen": -428.9417419433594, "logps/rejected": -577.589111328125, "loss": 0.6763, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.30887627601623535, "rewards/margins": 0.1682591736316681, "rewards/rejected": -0.47713547945022583, "step": 7020 }, { "epoch": 0.52, "learning_rate": 1.109597386688841e-06, "logits/chosen": -1.9618184566497803, "logits/rejected": -1.4213920831680298, "logps/chosen": -459.1953125, "logps/rejected": -676.6690673828125, "loss": 0.6732, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3114374876022339, "rewards/margins": 0.2295423001050949, "rewards/rejected": -0.5409798622131348, "step": 7030 }, { "epoch": 0.52, "learning_rate": 1.1070376721397372e-06, "logits/chosen": -2.099430561065674, "logits/rejected": -1.3809268474578857, "logps/chosen": -550.3159790039062, "logps/rejected": -709.7271728515625, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3651403486728668, "rewards/margins": 0.23663334548473358, "rewards/rejected": -0.6017736792564392, "step": 7040 }, { "epoch": 0.52, "learning_rate": 1.1044772479388373e-06, "logits/chosen": -2.08833646774292, "logits/rejected": -1.7410602569580078, "logps/chosen": -466.3658142089844, "logps/rejected": -639.8157348632812, "loss": 0.6801, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30931201577186584, "rewards/margins": 0.17786413431167603, "rewards/rejected": -0.48717623949050903, "step": 7050 }, { "epoch": 0.52, "learning_rate": 1.1019161310615623e-06, "logits/chosen": -2.1329710483551025, "logits/rejected": -1.4659491777420044, "logps/chosen": -411.3143615722656, "logps/rejected": -610.5203247070312, "loss": 0.675, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24886736273765564, "rewards/margins": 0.23671062290668488, "rewards/rejected": -0.4855780005455017, "step": 7060 }, { "epoch": 0.52, "learning_rate": 1.0993543384879265e-06, "logits/chosen": -2.1268601417541504, "logits/rejected": -1.5748766660690308, "logps/chosen": -384.02001953125, "logps/rejected": -566.3510131835938, "loss": 0.6742, "rewards/accuracies": 0.75, "rewards/chosen": -0.25085633993148804, "rewards/margins": 0.2218945324420929, "rewards/rejected": -0.4727509021759033, "step": 7070 }, { "epoch": 0.52, "learning_rate": 1.096791887202424e-06, "logits/chosen": -2.083516836166382, "logits/rejected": -1.6232783794403076, "logps/chosen": -475.37982177734375, "logps/rejected": -605.3128662109375, "loss": 0.6806, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.28921976685523987, "rewards/margins": 0.16909350454807281, "rewards/rejected": -0.4583132863044739, "step": 7080 }, { "epoch": 0.52, "learning_rate": 1.0942287941939162e-06, "logits/chosen": -2.132359027862549, "logits/rejected": -1.613216757774353, "logps/chosen": -414.85150146484375, "logps/rejected": -671.4517211914062, "loss": 0.6746, "rewards/accuracies": 0.75, "rewards/chosen": -0.2633386552333832, "rewards/margins": 0.25349220633506775, "rewards/rejected": -0.5168309211730957, "step": 7090 }, { "epoch": 0.52, "learning_rate": 1.0916650764555189e-06, "logits/chosen": -2.1395275592803955, "logits/rejected": -1.761183500289917, "logps/chosen": -403.46173095703125, "logps/rejected": -604.7130126953125, "loss": 0.6794, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.24089500308036804, "rewards/margins": 0.17763091623783112, "rewards/rejected": -0.41852593421936035, "step": 7100 }, { "epoch": 0.52, "learning_rate": 1.0891007509844893e-06, "logits/chosen": -2.2657182216644287, "logits/rejected": -1.8124545812606812, "logps/chosen": -385.18377685546875, "logps/rejected": -532.67919921875, "loss": 0.6832, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23917026817798615, "rewards/margins": 0.1830860823392868, "rewards/rejected": -0.42225632071495056, "step": 7110 }, { "epoch": 0.53, "learning_rate": 1.086535834782115e-06, "logits/chosen": -2.1971447467803955, "logits/rejected": -1.555598258972168, "logps/chosen": -556.7010498046875, "logps/rejected": -726.1436767578125, "loss": 0.679, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3672352433204651, "rewards/margins": 0.1970611810684204, "rewards/rejected": -0.5642964839935303, "step": 7120 }, { "epoch": 0.53, "learning_rate": 1.083970344853599e-06, "logits/chosen": -2.2129383087158203, "logits/rejected": -1.7983310222625732, "logps/chosen": -486.7533264160156, "logps/rejected": -624.1873779296875, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.2838863432407379, "rewards/margins": 0.20917551219463348, "rewards/rejected": -0.4930618405342102, "step": 7130 }, { "epoch": 0.53, "learning_rate": 1.081404298207949e-06, "logits/chosen": -1.8670440912246704, "logits/rejected": -1.4411094188690186, "logps/chosen": -554.7159423828125, "logps/rejected": -709.0608520507812, "loss": 0.6783, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3917040228843689, "rewards/margins": 0.17323587834835052, "rewards/rejected": -0.5649399161338806, "step": 7140 }, { "epoch": 0.53, "learning_rate": 1.0788377118578627e-06, "logits/chosen": -2.2858173847198486, "logits/rejected": -2.0108437538146973, "logps/chosen": -460.38916015625, "logps/rejected": -627.5025634765625, "loss": 0.6779, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3295949399471283, "rewards/margins": 0.17494051158428192, "rewards/rejected": -0.5045354962348938, "step": 7150 }, { "epoch": 0.53, "learning_rate": 1.0762706028196169e-06, "logits/chosen": -1.9077789783477783, "logits/rejected": -1.4968748092651367, "logps/chosen": -450.4775390625, "logps/rejected": -636.2903442382812, "loss": 0.6739, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3059278428554535, "rewards/margins": 0.2069440633058548, "rewards/rejected": -0.5128719210624695, "step": 7160 }, { "epoch": 0.53, "learning_rate": 1.0737029881129527e-06, "logits/chosen": -2.1303255558013916, "logits/rejected": -1.635427474975586, "logps/chosen": -496.37481689453125, "logps/rejected": -666.51025390625, "loss": 0.6815, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33291199803352356, "rewards/margins": 0.1800139844417572, "rewards/rejected": -0.5129259824752808, "step": 7170 }, { "epoch": 0.53, "learning_rate": 1.0711348847609654e-06, "logits/chosen": -2.0684893131256104, "logits/rejected": -1.665270209312439, "logps/chosen": -554.5465087890625, "logps/rejected": -776.6651611328125, "loss": 0.6755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3617570996284485, "rewards/margins": 0.24370384216308594, "rewards/rejected": -0.6054609417915344, "step": 7180 }, { "epoch": 0.53, "learning_rate": 1.0685663097899883e-06, "logits/chosen": -1.9330192804336548, "logits/rejected": -1.546226143836975, "logps/chosen": -673.6510009765625, "logps/rejected": -761.7916259765625, "loss": 0.6819, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4985787868499756, "rewards/margins": 0.13296015560626984, "rewards/rejected": -0.631538987159729, "step": 7190 }, { "epoch": 0.53, "learning_rate": 1.0659972802294825e-06, "logits/chosen": -2.120500326156616, "logits/rejected": -1.6476497650146484, "logps/chosen": -555.645751953125, "logps/rejected": -726.3226318359375, "loss": 0.6756, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3617948889732361, "rewards/margins": 0.1701454371213913, "rewards/rejected": -0.5319402813911438, "step": 7200 }, { "epoch": 0.53, "learning_rate": 1.063427813111923e-06, "logits/chosen": -2.068662405014038, "logits/rejected": -1.3386826515197754, "logps/chosen": -563.1220092773438, "logps/rejected": -746.1171875, "loss": 0.6765, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3670057952404022, "rewards/margins": 0.2521273195743561, "rewards/rejected": -0.6191331148147583, "step": 7210 }, { "epoch": 0.53, "learning_rate": 1.0608579254726844e-06, "logits/chosen": -1.9574649333953857, "logits/rejected": -1.6837431192398071, "logps/chosen": -507.7342834472656, "logps/rejected": -688.670654296875, "loss": 0.6789, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4053799510002136, "rewards/margins": 0.18290381133556366, "rewards/rejected": -0.5882838368415833, "step": 7220 }, { "epoch": 0.53, "learning_rate": 1.0582876343499319e-06, "logits/chosen": -2.0351924896240234, "logits/rejected": -1.5871307849884033, "logps/chosen": -572.7643432617188, "logps/rejected": -757.6729736328125, "loss": 0.677, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.40842705965042114, "rewards/margins": 0.22346167266368866, "rewards/rejected": -0.631888747215271, "step": 7230 }, { "epoch": 0.53, "learning_rate": 1.055716956784503e-06, "logits/chosen": -1.998753309249878, "logits/rejected": -1.5180482864379883, "logps/chosen": -440.18035888671875, "logps/rejected": -593.9848022460938, "loss": 0.6787, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2761276364326477, "rewards/margins": 0.2154371291399002, "rewards/rejected": -0.4915647506713867, "step": 7240 }, { "epoch": 0.53, "learning_rate": 1.0531459098197993e-06, "logits/chosen": -2.053209066390991, "logits/rejected": -1.3793838024139404, "logps/chosen": -490.3388671875, "logps/rejected": -657.9406127929688, "loss": 0.6737, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.3176420331001282, "rewards/margins": 0.22805078327655792, "rewards/rejected": -0.5456928610801697, "step": 7250 }, { "epoch": 0.54, "learning_rate": 1.0505745105016703e-06, "logits/chosen": -2.142179012298584, "logits/rejected": -1.5252223014831543, "logps/chosen": -455.9783630371094, "logps/rejected": -674.1774291992188, "loss": 0.6778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3341664671897888, "rewards/margins": 0.2312694489955902, "rewards/rejected": -0.5654360055923462, "step": 7260 }, { "epoch": 0.54, "learning_rate": 1.0480027758783024e-06, "logits/chosen": -2.2477824687957764, "logits/rejected": -1.7603458166122437, "logps/chosen": -568.7783203125, "logps/rejected": -754.3714599609375, "loss": 0.6765, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3864978551864624, "rewards/margins": 0.20730237662792206, "rewards/rejected": -0.5938001871109009, "step": 7270 }, { "epoch": 0.54, "learning_rate": 1.0454307230001048e-06, "logits/chosen": -2.0879950523376465, "logits/rejected": -1.7965075969696045, "logps/chosen": -636.5643310546875, "logps/rejected": -765.9468994140625, "loss": 0.6851, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4002206325531006, "rewards/margins": 0.15171685814857483, "rewards/rejected": -0.551937460899353, "step": 7280 }, { "epoch": 0.54, "learning_rate": 1.0428583689195957e-06, "logits/chosen": -2.0682201385498047, "logits/rejected": -1.8914836645126343, "logps/chosen": -425.0013732910156, "logps/rejected": -582.059326171875, "loss": 0.6809, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.283445805311203, "rewards/margins": 0.14383098483085632, "rewards/rejected": -0.4272767901420593, "step": 7290 }, { "epoch": 0.54, "learning_rate": 1.040285730691292e-06, "logits/chosen": -2.2483723163604736, "logits/rejected": -1.9081588983535767, "logps/chosen": -502.06494140625, "logps/rejected": -596.4071044921875, "loss": 0.6818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2935526371002197, "rewards/margins": 0.163922518491745, "rewards/rejected": -0.45747512578964233, "step": 7300 }, { "epoch": 0.54, "learning_rate": 1.0377128253715932e-06, "logits/chosen": -2.0993592739105225, "logits/rejected": -1.654163122177124, "logps/chosen": -513.5665283203125, "logps/rejected": -618.477294921875, "loss": 0.6832, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34867578744888306, "rewards/margins": 0.11462261527776718, "rewards/rejected": -0.4632984697818756, "step": 7310 }, { "epoch": 0.54, "learning_rate": 1.0351396700186703e-06, "logits/chosen": -2.059948444366455, "logits/rejected": -1.701128602027893, "logps/chosen": -550.8270874023438, "logps/rejected": -724.6690673828125, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3670697808265686, "rewards/margins": 0.1726648062467575, "rewards/rejected": -0.5397346019744873, "step": 7320 }, { "epoch": 0.54, "learning_rate": 1.0325662816923517e-06, "logits/chosen": -2.0207486152648926, "logits/rejected": -1.4707615375518799, "logps/chosen": -502.605712890625, "logps/rejected": -694.2398681640625, "loss": 0.6766, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32923993468284607, "rewards/margins": 0.2400931417942047, "rewards/rejected": -0.5693330764770508, "step": 7330 }, { "epoch": 0.54, "learning_rate": 1.02999267745401e-06, "logits/chosen": -2.199004650115967, "logits/rejected": -1.6554815769195557, "logps/chosen": -484.93511962890625, "logps/rejected": -655.1716918945312, "loss": 0.6752, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3256527781486511, "rewards/margins": 0.21403315663337708, "rewards/rejected": -0.5396859645843506, "step": 7340 }, { "epoch": 0.54, "learning_rate": 1.0274188743664497e-06, "logits/chosen": -1.9333827495574951, "logits/rejected": -1.5898261070251465, "logps/chosen": -437.130859375, "logps/rejected": -624.3123779296875, "loss": 0.6729, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3014239966869354, "rewards/margins": 0.2181580513715744, "rewards/rejected": -0.5195820331573486, "step": 7350 }, { "epoch": 0.54, "learning_rate": 1.024844889493794e-06, "logits/chosen": -2.1691603660583496, "logits/rejected": -1.5205605030059814, "logps/chosen": -504.45623779296875, "logps/rejected": -732.9961547851562, "loss": 0.6715, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.37193772196769714, "rewards/margins": 0.26419493556022644, "rewards/rejected": -0.6361325979232788, "step": 7360 }, { "epoch": 0.54, "learning_rate": 1.0222707399013707e-06, "logits/chosen": -2.251265048980713, "logits/rejected": -1.7939914464950562, "logps/chosen": -442.19482421875, "logps/rejected": -556.6666870117188, "loss": 0.6828, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2656627297401428, "rewards/margins": 0.14226093888282776, "rewards/rejected": -0.4079236090183258, "step": 7370 }, { "epoch": 0.54, "learning_rate": 1.0196964426556005e-06, "logits/chosen": -2.2378952503204346, "logits/rejected": -1.4849717617034912, "logps/chosen": -411.69580078125, "logps/rejected": -615.6810302734375, "loss": 0.6758, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.23658975958824158, "rewards/margins": 0.24372979998588562, "rewards/rejected": -0.4803195893764496, "step": 7380 }, { "epoch": 0.55, "learning_rate": 1.0171220148238814e-06, "logits/chosen": -2.067894458770752, "logits/rejected": -1.616333246231079, "logps/chosen": -506.3271484375, "logps/rejected": -638.864990234375, "loss": 0.6789, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34923872351646423, "rewards/margins": 0.16366036236286163, "rewards/rejected": -0.5128990411758423, "step": 7390 }, { "epoch": 0.55, "learning_rate": 1.0145474734744794e-06, "logits/chosen": -1.8209835290908813, "logits/rejected": -1.17618727684021, "logps/chosen": -518.3138427734375, "logps/rejected": -681.0667114257812, "loss": 0.6739, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.376911997795105, "rewards/margins": 0.2089867889881134, "rewards/rejected": -0.585898756980896, "step": 7400 }, { "epoch": 0.55, "learning_rate": 1.011972835676411e-06, "logits/chosen": -2.2399604320526123, "logits/rejected": -1.513496994972229, "logps/chosen": -423.9617614746094, "logps/rejected": -650.8936767578125, "loss": 0.6752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.25644707679748535, "rewards/margins": 0.29773297905921936, "rewards/rejected": -0.5541800260543823, "step": 7410 }, { "epoch": 0.55, "learning_rate": 1.0093981184993337e-06, "logits/chosen": -2.1825199127197266, "logits/rejected": -1.7189451456069946, "logps/chosen": -416.1978454589844, "logps/rejected": -541.64697265625, "loss": 0.6844, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2568686008453369, "rewards/margins": 0.15962812304496765, "rewards/rejected": -0.41649675369262695, "step": 7420 }, { "epoch": 0.55, "learning_rate": 1.0068233390134307e-06, "logits/chosen": -2.1325690746307373, "logits/rejected": -1.6511485576629639, "logps/chosen": -381.78057861328125, "logps/rejected": -575.1842041015625, "loss": 0.6776, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2555747330188751, "rewards/margins": 0.21231739223003387, "rewards/rejected": -0.4678920805454254, "step": 7430 }, { "epoch": 0.55, "learning_rate": 1.0042485142892976e-06, "logits/chosen": -2.2823734283447266, "logits/rejected": -1.801641821861267, "logps/chosen": -442.55828857421875, "logps/rejected": -620.2181396484375, "loss": 0.6792, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2573181092739105, "rewards/margins": 0.17728319764137268, "rewards/rejected": -0.4346013069152832, "step": 7440 }, { "epoch": 0.55, "learning_rate": 1.0016736613978314e-06, "logits/chosen": -2.0826942920684814, "logits/rejected": -1.6295521259307861, "logps/chosen": -376.23028564453125, "logps/rejected": -632.2307739257812, "loss": 0.6711, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.21687071025371552, "rewards/margins": 0.272073358297348, "rewards/rejected": -0.48894399404525757, "step": 7450 }, { "epoch": 0.55, "learning_rate": 9.990987974101144e-07, "logits/chosen": -2.2247507572174072, "logits/rejected": -1.813201665878296, "logps/chosen": -412.00341796875, "logps/rejected": -581.5906982421875, "loss": 0.6783, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.23827806115150452, "rewards/margins": 0.192054882645607, "rewards/rejected": -0.4303329586982727, "step": 7460 }, { "epoch": 0.55, "learning_rate": 9.965239393973038e-07, "logits/chosen": -2.081028461456299, "logits/rejected": -1.635382056236267, "logps/chosen": -499.9493103027344, "logps/rejected": -665.76123046875, "loss": 0.6809, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32094982266426086, "rewards/margins": 0.1864783763885498, "rewards/rejected": -0.5074282288551331, "step": 7470 }, { "epoch": 0.55, "learning_rate": 9.939491044305163e-07, "logits/chosen": -2.2581734657287598, "logits/rejected": -1.9229795932769775, "logps/chosen": -423.1853942871094, "logps/rejected": -585.1298217773438, "loss": 0.6768, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2708563208580017, "rewards/margins": 0.187820702791214, "rewards/rejected": -0.4586770534515381, "step": 7480 }, { "epoch": 0.55, "learning_rate": 9.913743095807157e-07, "logits/chosen": -2.1037838459014893, "logits/rejected": -1.6270250082015991, "logps/chosen": -474.70965576171875, "logps/rejected": -613.1885986328125, "loss": 0.684, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3150259256362915, "rewards/margins": 0.16971395909786224, "rewards/rejected": -0.48473992943763733, "step": 7490 }, { "epoch": 0.55, "learning_rate": 9.887995719186002e-07, "logits/chosen": -2.2085089683532715, "logits/rejected": -1.5548410415649414, "logps/chosen": -436.40277099609375, "logps/rejected": -583.61669921875, "loss": 0.6834, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2747476398944855, "rewards/margins": 0.1833493858575821, "rewards/rejected": -0.4580969214439392, "step": 7500 }, { "epoch": 0.55, "learning_rate": 9.862249085144894e-07, "logits/chosen": -2.214167594909668, "logits/rejected": -1.6632585525512695, "logps/chosen": -470.8458557128906, "logps/rejected": -642.3441162109375, "loss": 0.6809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31342509388923645, "rewards/margins": 0.21536509692668915, "rewards/rejected": -0.5287901759147644, "step": 7510 }, { "epoch": 0.55, "learning_rate": 9.836503364382092e-07, "logits/chosen": -2.189967155456543, "logits/rejected": -1.849801778793335, "logps/chosen": -459.47332763671875, "logps/rejected": -576.8540649414062, "loss": 0.6764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2912479043006897, "rewards/margins": 0.15506456792354584, "rewards/rejected": -0.44631248712539673, "step": 7520 }, { "epoch": 0.56, "learning_rate": 9.810758727589813e-07, "logits/chosen": -1.9897617101669312, "logits/rejected": -1.461686611175537, "logps/chosen": -547.1463012695312, "logps/rejected": -696.8236083984375, "loss": 0.6772, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37463638186454773, "rewards/margins": 0.21404524147510529, "rewards/rejected": -0.5886815786361694, "step": 7530 }, { "epoch": 0.56, "learning_rate": 9.785015345453076e-07, "logits/chosen": -2.1562790870666504, "logits/rejected": -1.7163842916488647, "logps/chosen": -537.9956665039062, "logps/rejected": -675.7482299804688, "loss": 0.6788, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3268199563026428, "rewards/margins": 0.16124984622001648, "rewards/rejected": -0.4880698323249817, "step": 7540 }, { "epoch": 0.56, "learning_rate": 9.759273388648593e-07, "logits/chosen": -2.1904211044311523, "logits/rejected": -1.8189483880996704, "logps/chosen": -522.6153564453125, "logps/rejected": -595.0209350585938, "loss": 0.6808, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2989211678504944, "rewards/margins": 0.14431457221508026, "rewards/rejected": -0.4432357847690582, "step": 7550 }, { "epoch": 0.56, "learning_rate": 9.73353302784362e-07, "logits/chosen": -2.2819936275482178, "logits/rejected": -1.8903461694717407, "logps/chosen": -459.2665100097656, "logps/rejected": -543.9547119140625, "loss": 0.6834, "rewards/accuracies": 0.75, "rewards/chosen": -0.2861100137233734, "rewards/margins": 0.13018998503684998, "rewards/rejected": -0.4162999987602234, "step": 7560 }, { "epoch": 0.56, "learning_rate": 9.707794433694833e-07, "logits/chosen": -2.1797239780426025, "logits/rejected": -1.7116367816925049, "logps/chosen": -530.4432373046875, "logps/rejected": -676.8335571289062, "loss": 0.6784, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35446372628211975, "rewards/margins": 0.16574816405773163, "rewards/rejected": -0.5202118754386902, "step": 7570 }, { "epoch": 0.56, "learning_rate": 9.682057776847196e-07, "logits/chosen": -1.9813827276229858, "logits/rejected": -1.5522315502166748, "logps/chosen": -452.7210998535156, "logps/rejected": -577.3336181640625, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2908051311969757, "rewards/margins": 0.1598891168832779, "rewards/rejected": -0.4506942331790924, "step": 7580 }, { "epoch": 0.56, "learning_rate": 9.656323227932824e-07, "logits/chosen": -2.059284210205078, "logits/rejected": -1.2463628053665161, "logps/chosen": -408.991455078125, "logps/rejected": -650.4152221679688, "loss": 0.6713, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.24852529168128967, "rewards/margins": 0.29770779609680176, "rewards/rejected": -0.546233057975769, "step": 7590 }, { "epoch": 0.56, "learning_rate": 9.63059095756986e-07, "logits/chosen": -2.296048164367676, "logits/rejected": -1.842153549194336, "logps/chosen": -453.6302795410156, "logps/rejected": -576.59619140625, "loss": 0.6786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.24667103588581085, "rewards/margins": 0.17874222993850708, "rewards/rejected": -0.42541322112083435, "step": 7600 }, { "epoch": 0.56, "learning_rate": 9.60486113636135e-07, "logits/chosen": -1.9021313190460205, "logits/rejected": -1.7018272876739502, "logps/chosen": -407.9225769042969, "logps/rejected": -502.78289794921875, "loss": 0.6859, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2756204605102539, "rewards/margins": 0.13335542380809784, "rewards/rejected": -0.40897589921951294, "step": 7610 }, { "epoch": 0.56, "learning_rate": 9.579133934894078e-07, "logits/chosen": -2.0996716022491455, "logits/rejected": -1.628588080406189, "logps/chosen": -535.0436401367188, "logps/rejected": -724.3304443359375, "loss": 0.6758, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33897894620895386, "rewards/margins": 0.23890499770641327, "rewards/rejected": -0.5778840184211731, "step": 7620 }, { "epoch": 0.56, "learning_rate": 9.553409523737483e-07, "logits/chosen": -1.8821004629135132, "logits/rejected": -1.5977015495300293, "logps/chosen": -537.4727783203125, "logps/rejected": -688.6104736328125, "loss": 0.6787, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4017679691314697, "rewards/margins": 0.17725667357444763, "rewards/rejected": -0.579024612903595, "step": 7630 }, { "epoch": 0.56, "learning_rate": 9.527688073442494e-07, "logits/chosen": -1.9711759090423584, "logits/rejected": -1.665701150894165, "logps/chosen": -542.7665405273438, "logps/rejected": -667.7518310546875, "loss": 0.6828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38044676184654236, "rewards/margins": 0.1355876624584198, "rewards/rejected": -0.5160343647003174, "step": 7640 }, { "epoch": 0.56, "learning_rate": 9.501969754540415e-07, "logits/chosen": -2.0216217041015625, "logits/rejected": -1.6427929401397705, "logps/chosen": -463.5797424316406, "logps/rejected": -677.499267578125, "loss": 0.6785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3413296341896057, "rewards/margins": 0.20269477367401123, "rewards/rejected": -0.5440243482589722, "step": 7650 }, { "epoch": 0.57, "learning_rate": 9.47625473754178e-07, "logits/chosen": -2.200310707092285, "logits/rejected": -1.7148191928863525, "logps/chosen": -435.7242126464844, "logps/rejected": -551.0423583984375, "loss": 0.6777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.27733972668647766, "rewards/margins": 0.15408551692962646, "rewards/rejected": -0.4314252734184265, "step": 7660 }, { "epoch": 0.57, "learning_rate": 9.45054319293524e-07, "logits/chosen": -2.006230592727661, "logits/rejected": -1.495962381362915, "logps/chosen": -449.54705810546875, "logps/rejected": -558.7694091796875, "loss": 0.678, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28665995597839355, "rewards/margins": 0.1770254224538803, "rewards/rejected": -0.46368536353111267, "step": 7670 }, { "epoch": 0.57, "learning_rate": 9.424835291186423e-07, "logits/chosen": -2.3226377964019775, "logits/rejected": -1.9124386310577393, "logps/chosen": -474.34014892578125, "logps/rejected": -656.3499755859375, "loss": 0.6815, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3054318428039551, "rewards/margins": 0.18947307765483856, "rewards/rejected": -0.49490490555763245, "step": 7680 }, { "epoch": 0.57, "learning_rate": 9.399131202736798e-07, "logits/chosen": -2.1061689853668213, "logits/rejected": -1.4756815433502197, "logps/chosen": -486.81231689453125, "logps/rejected": -692.4720458984375, "loss": 0.6814, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3306121230125427, "rewards/margins": 0.21108195185661316, "rewards/rejected": -0.5416940450668335, "step": 7690 }, { "epoch": 0.57, "learning_rate": 9.37343109800256e-07, "logits/chosen": -1.9775054454803467, "logits/rejected": -1.491363763809204, "logps/chosen": -488.101318359375, "logps/rejected": -693.1671752929688, "loss": 0.6761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31044715642929077, "rewards/margins": 0.22086456418037415, "rewards/rejected": -0.5313116312026978, "step": 7700 }, { "epoch": 0.57, "learning_rate": 9.347735147373495e-07, "logits/chosen": -2.1812281608581543, "logits/rejected": -1.990342378616333, "logps/chosen": -632.6446533203125, "logps/rejected": -695.8546142578125, "loss": 0.6834, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47798389196395874, "rewards/margins": 0.08072348684072495, "rewards/rejected": -0.5587074160575867, "step": 7710 }, { "epoch": 0.57, "learning_rate": 9.322043521211833e-07, "logits/chosen": -1.9690872430801392, "logits/rejected": -1.4018495082855225, "logps/chosen": -628.050048828125, "logps/rejected": -835.7463989257812, "loss": 0.6755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4547399878501892, "rewards/margins": 0.2292158603668213, "rewards/rejected": -0.6839558482170105, "step": 7720 }, { "epoch": 0.57, "learning_rate": 9.296356389851147e-07, "logits/chosen": -2.041583776473999, "logits/rejected": -1.7285511493682861, "logps/chosen": -452.98297119140625, "logps/rejected": -568.7516479492188, "loss": 0.6813, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3357389569282532, "rewards/margins": 0.14191405475139618, "rewards/rejected": -0.4776530861854553, "step": 7730 }, { "epoch": 0.57, "learning_rate": 9.270673923595209e-07, "logits/chosen": -2.1090641021728516, "logits/rejected": -1.6104481220245361, "logps/chosen": -416.66754150390625, "logps/rejected": -640.0941162109375, "loss": 0.6715, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2853562831878662, "rewards/margins": 0.24177269637584686, "rewards/rejected": -0.5271289944648743, "step": 7740 }, { "epoch": 0.57, "learning_rate": 9.244996292716855e-07, "logits/chosen": -1.8525413274765015, "logits/rejected": -1.3863751888275146, "logps/chosen": -486.83209228515625, "logps/rejected": -637.718017578125, "loss": 0.6782, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3408566117286682, "rewards/margins": 0.20128881931304932, "rewards/rejected": -0.5421454310417175, "step": 7750 }, { "epoch": 0.57, "learning_rate": 9.219323667456866e-07, "logits/chosen": -2.429457902908325, "logits/rejected": -1.8178526163101196, "logps/chosen": -502.39215087890625, "logps/rejected": -671.3609619140625, "loss": 0.6767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.28261488676071167, "rewards/margins": 0.23651961982250214, "rewards/rejected": -0.5191345810890198, "step": 7760 }, { "epoch": 0.57, "learning_rate": 9.193656218022835e-07, "logits/chosen": -2.0472922325134277, "logits/rejected": -1.4559301137924194, "logps/chosen": -508.2723693847656, "logps/rejected": -700.1451416015625, "loss": 0.676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3588964641094208, "rewards/margins": 0.2295243740081787, "rewards/rejected": -0.5884208679199219, "step": 7770 }, { "epoch": 0.57, "learning_rate": 9.167994114588043e-07, "logits/chosen": -2.318049669265747, "logits/rejected": -1.9074256420135498, "logps/chosen": -519.6165771484375, "logps/rejected": -630.83056640625, "loss": 0.6804, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3509068787097931, "rewards/margins": 0.1530608832836151, "rewards/rejected": -0.5039677619934082, "step": 7780 }, { "epoch": 0.57, "learning_rate": 9.142337527290324e-07, "logits/chosen": -1.9518425464630127, "logits/rejected": -1.6413418054580688, "logps/chosen": -460.478515625, "logps/rejected": -614.3843994140625, "loss": 0.6804, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2740912437438965, "rewards/margins": 0.1932670623064041, "rewards/rejected": -0.4673582911491394, "step": 7790 }, { "epoch": 0.58, "learning_rate": 9.116686626230939e-07, "logits/chosen": -2.291163682937622, "logits/rejected": -1.7667274475097656, "logps/chosen": -410.030029296875, "logps/rejected": -649.53515625, "loss": 0.6809, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.28005847334861755, "rewards/margins": 0.23983433842658997, "rewards/rejected": -0.5198928117752075, "step": 7800 }, { "epoch": 0.58, "learning_rate": 9.091041581473457e-07, "logits/chosen": -2.188060760498047, "logits/rejected": -1.621809720993042, "logps/chosen": -515.1104125976562, "logps/rejected": -663.5375366210938, "loss": 0.6811, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33936017751693726, "rewards/margins": 0.18403072655200958, "rewards/rejected": -0.5233908891677856, "step": 7810 }, { "epoch": 0.58, "learning_rate": 9.065402563042605e-07, "logits/chosen": -2.2585301399230957, "logits/rejected": -1.7128463983535767, "logps/chosen": -407.33984375, "logps/rejected": -605.4373168945312, "loss": 0.6826, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.22490373253822327, "rewards/margins": 0.22867615520954132, "rewards/rejected": -0.4535799026489258, "step": 7820 }, { "epoch": 0.58, "learning_rate": 9.039769740923182e-07, "logits/chosen": -2.2661023139953613, "logits/rejected": -1.7961671352386475, "logps/chosen": -449.54949951171875, "logps/rejected": -587.9223022460938, "loss": 0.6748, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2665981650352478, "rewards/margins": 0.1783619225025177, "rewards/rejected": -0.4449601173400879, "step": 7830 }, { "epoch": 0.58, "learning_rate": 9.014143285058879e-07, "logits/chosen": -2.0676918029785156, "logits/rejected": -1.5911591053009033, "logps/chosen": -492.6497497558594, "logps/rejected": -730.5531005859375, "loss": 0.6742, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3116580843925476, "rewards/margins": 0.2454153597354889, "rewards/rejected": -0.5570734739303589, "step": 7840 }, { "epoch": 0.58, "learning_rate": 8.988523365351196e-07, "logits/chosen": -2.1170926094055176, "logits/rejected": -1.357446312904358, "logps/chosen": -490.25628662109375, "logps/rejected": -711.0816650390625, "loss": 0.6748, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.32515111565589905, "rewards/margins": 0.2659386694431305, "rewards/rejected": -0.5910897850990295, "step": 7850 }, { "epoch": 0.58, "learning_rate": 8.962910151658288e-07, "logits/chosen": -2.093125820159912, "logits/rejected": -1.5441393852233887, "logps/chosen": -544.5860595703125, "logps/rejected": -600.783935546875, "loss": 0.6811, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.29100656509399414, "rewards/margins": 0.1597594916820526, "rewards/rejected": -0.45076602697372437, "step": 7860 }, { "epoch": 0.58, "learning_rate": 8.937303813793857e-07, "logits/chosen": -2.226034164428711, "logits/rejected": -1.672776460647583, "logps/chosen": -479.85028076171875, "logps/rejected": -638.9367065429688, "loss": 0.6781, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3000134229660034, "rewards/margins": 0.1855669766664505, "rewards/rejected": -0.4855804443359375, "step": 7870 }, { "epoch": 0.58, "learning_rate": 8.91170452152602e-07, "logits/chosen": -2.074187755584717, "logits/rejected": -1.392464280128479, "logps/chosen": -470.930419921875, "logps/rejected": -688.521240234375, "loss": 0.6675, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3165910840034485, "rewards/margins": 0.2549256980419159, "rewards/rejected": -0.5715168118476868, "step": 7880 }, { "epoch": 0.58, "learning_rate": 8.886112444576173e-07, "logits/chosen": -2.329883098602295, "logits/rejected": -1.766724944114685, "logps/chosen": -493.05267333984375, "logps/rejected": -572.3836059570312, "loss": 0.6857, "rewards/accuracies": 0.625, "rewards/chosen": -0.2979585528373718, "rewards/margins": 0.14938543736934662, "rewards/rejected": -0.44734400510787964, "step": 7890 }, { "epoch": 0.58, "learning_rate": 8.860527752617881e-07, "logits/chosen": -2.159447193145752, "logits/rejected": -1.5905570983886719, "logps/chosen": -565.7604370117188, "logps/rejected": -732.3552856445312, "loss": 0.6736, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4078506529331207, "rewards/margins": 0.18509279191493988, "rewards/rejected": -0.5929433703422546, "step": 7900 }, { "epoch": 0.58, "learning_rate": 8.834950615275749e-07, "logits/chosen": -2.239717721939087, "logits/rejected": -1.846816062927246, "logps/chosen": -521.6817626953125, "logps/rejected": -685.7940673828125, "loss": 0.6794, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3363921642303467, "rewards/margins": 0.21023952960968018, "rewards/rejected": -0.5466316938400269, "step": 7910 }, { "epoch": 0.58, "learning_rate": 8.809381202124294e-07, "logits/chosen": -2.115171432495117, "logits/rejected": -1.653999924659729, "logps/chosen": -589.0897827148438, "logps/rejected": -754.7388916015625, "loss": 0.68, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3922828137874603, "rewards/margins": 0.18141032755374908, "rewards/rejected": -0.5736931562423706, "step": 7920 }, { "epoch": 0.58, "learning_rate": 8.783819682686824e-07, "logits/chosen": -1.9068939685821533, "logits/rejected": -1.3181705474853516, "logps/chosen": -561.0936889648438, "logps/rejected": -757.3837280273438, "loss": 0.6793, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4318333566188812, "rewards/margins": 0.21584472060203552, "rewards/rejected": -0.6476780772209167, "step": 7930 }, { "epoch": 0.59, "learning_rate": 8.758266226434303e-07, "logits/chosen": -2.1655139923095703, "logits/rejected": -1.6844466924667358, "logps/chosen": -654.3300170898438, "logps/rejected": -811.7640380859375, "loss": 0.6787, "rewards/accuracies": 0.625, "rewards/chosen": -0.49913960695266724, "rewards/margins": 0.1601150631904602, "rewards/rejected": -0.6592546701431274, "step": 7940 }, { "epoch": 0.59, "learning_rate": 8.732721002784247e-07, "logits/chosen": -1.868781328201294, "logits/rejected": -1.3200758695602417, "logps/chosen": -692.6360473632812, "logps/rejected": -861.0704345703125, "loss": 0.6774, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5370381474494934, "rewards/margins": 0.185677632689476, "rewards/rejected": -0.7227157354354858, "step": 7950 }, { "epoch": 0.59, "learning_rate": 8.707184181099587e-07, "logits/chosen": -2.0907301902770996, "logits/rejected": -1.5912387371063232, "logps/chosen": -596.0416259765625, "logps/rejected": -718.9899291992188, "loss": 0.6842, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.482164204120636, "rewards/margins": 0.15118920803070068, "rewards/rejected": -0.6333533525466919, "step": 7960 }, { "epoch": 0.59, "learning_rate": 8.681655930687549e-07, "logits/chosen": -1.9194139242172241, "logits/rejected": -1.3844929933547974, "logps/chosen": -559.4663696289062, "logps/rejected": -781.9696044921875, "loss": 0.6723, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.42747610807418823, "rewards/margins": 0.24487152695655823, "rewards/rejected": -0.6723476648330688, "step": 7970 }, { "epoch": 0.59, "learning_rate": 8.656136420798532e-07, "logits/chosen": -2.3183705806732178, "logits/rejected": -1.7872848510742188, "logps/chosen": -616.1174926757812, "logps/rejected": -749.3543701171875, "loss": 0.6762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3830505907535553, "rewards/margins": 0.18782643973827362, "rewards/rejected": -0.5708770155906677, "step": 7980 }, { "epoch": 0.59, "learning_rate": 8.630625820624986e-07, "logits/chosen": -2.0517988204956055, "logits/rejected": -1.540780782699585, "logps/chosen": -554.4533081054688, "logps/rejected": -679.123046875, "loss": 0.6829, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4215543270111084, "rewards/margins": 0.1670783907175064, "rewards/rejected": -0.5886327028274536, "step": 7990 }, { "epoch": 0.59, "learning_rate": 8.605124299300289e-07, "logits/chosen": -1.9378564357757568, "logits/rejected": -1.378713846206665, "logps/chosen": -522.533203125, "logps/rejected": -730.0491943359375, "loss": 0.6779, "rewards/accuracies": 0.75, "rewards/chosen": -0.40773048996925354, "rewards/margins": 0.21783597767353058, "rewards/rejected": -0.6255664229393005, "step": 8000 }, { "epoch": 0.59, "learning_rate": 8.579632025897634e-07, "logits/chosen": -2.039006233215332, "logits/rejected": -1.5531352758407593, "logps/chosen": -575.739013671875, "logps/rejected": -729.8787841796875, "loss": 0.6758, "rewards/accuracies": 0.75, "rewards/chosen": -0.43864932656288147, "rewards/margins": 0.16787084937095642, "rewards/rejected": -0.6065202355384827, "step": 8010 }, { "epoch": 0.59, "learning_rate": 8.554149169428892e-07, "logits/chosen": -2.017364978790283, "logits/rejected": -1.4568296670913696, "logps/chosen": -655.77685546875, "logps/rejected": -813.9884033203125, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -0.5079465508460999, "rewards/margins": 0.19929556548595428, "rewards/rejected": -0.7072421312332153, "step": 8020 }, { "epoch": 0.59, "learning_rate": 8.528675898843507e-07, "logits/chosen": -2.1943936347961426, "logits/rejected": -1.5908231735229492, "logps/chosen": -562.1986694335938, "logps/rejected": -729.674072265625, "loss": 0.6797, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39102432131767273, "rewards/margins": 0.20151451230049133, "rewards/rejected": -0.5925388336181641, "step": 8030 }, { "epoch": 0.59, "learning_rate": 8.503212383027362e-07, "logits/chosen": -2.0193402767181396, "logits/rejected": -1.3149906396865845, "logps/chosen": -502.23809814453125, "logps/rejected": -704.9457397460938, "loss": 0.6736, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3383867144584656, "rewards/margins": 0.2528292238712311, "rewards/rejected": -0.591215968132019, "step": 8040 }, { "epoch": 0.59, "learning_rate": 8.477758790801673e-07, "logits/chosen": -2.2514190673828125, "logits/rejected": -1.617310881614685, "logps/chosen": -633.170654296875, "logps/rejected": -772.0711669921875, "loss": 0.6746, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4124244153499603, "rewards/margins": 0.21389015018939972, "rewards/rejected": -0.626314640045166, "step": 8050 }, { "epoch": 0.59, "learning_rate": 8.452315290921864e-07, "logits/chosen": -2.187363862991333, "logits/rejected": -1.81942880153656, "logps/chosen": -487.08074951171875, "logps/rejected": -619.3670654296875, "loss": 0.6824, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3397161662578583, "rewards/margins": 0.14346668124198914, "rewards/rejected": -0.4831829071044922, "step": 8060 }, { "epoch": 0.6, "learning_rate": 8.42688205207644e-07, "logits/chosen": -1.9222972393035889, "logits/rejected": -1.4182207584381104, "logps/chosen": -535.9613647460938, "logps/rejected": -765.598388671875, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.4167323112487793, "rewards/margins": 0.2213503122329712, "rewards/rejected": -0.6380825638771057, "step": 8070 }, { "epoch": 0.6, "learning_rate": 8.40145924288588e-07, "logits/chosen": -2.1452159881591797, "logits/rejected": -1.4765632152557373, "logps/chosen": -529.4765625, "logps/rejected": -696.1754150390625, "loss": 0.676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3434954285621643, "rewards/margins": 0.21109943091869354, "rewards/rejected": -0.5545949339866638, "step": 8080 }, { "epoch": 0.6, "learning_rate": 8.376047031901522e-07, "logits/chosen": -2.367938280105591, "logits/rejected": -1.5650262832641602, "logps/chosen": -533.74560546875, "logps/rejected": -708.0592651367188, "loss": 0.676, "rewards/accuracies": 0.875, "rewards/chosen": -0.3377351462841034, "rewards/margins": 0.24819199740886688, "rewards/rejected": -0.5859271287918091, "step": 8090 }, { "epoch": 0.6, "learning_rate": 8.350645587604431e-07, "logits/chosen": -2.197082042694092, "logits/rejected": -1.729156494140625, "logps/chosen": -451.05413818359375, "logps/rejected": -593.6402587890625, "loss": 0.6803, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3353244364261627, "rewards/margins": 0.16118952631950378, "rewards/rejected": -0.4965139329433441, "step": 8100 }, { "epoch": 0.6, "learning_rate": 8.32525507840429e-07, "logits/chosen": -1.968030571937561, "logits/rejected": -1.4250819683074951, "logps/chosen": -540.2307739257812, "logps/rejected": -695.5989990234375, "loss": 0.6865, "rewards/accuracies": 0.625, "rewards/chosen": -0.3834446668624878, "rewards/margins": 0.1683390736579895, "rewards/rejected": -0.5517838001251221, "step": 8110 }, { "epoch": 0.6, "learning_rate": 8.299875672638283e-07, "logits/chosen": -2.0386221408843994, "logits/rejected": -1.5875909328460693, "logps/chosen": -552.9171752929688, "logps/rejected": -694.0001831054688, "loss": 0.6787, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40761494636535645, "rewards/margins": 0.17114706337451935, "rewards/rejected": -0.5787619352340698, "step": 8120 }, { "epoch": 0.6, "learning_rate": 8.274507538569986e-07, "logits/chosen": -1.8461742401123047, "logits/rejected": -1.3303042650222778, "logps/chosen": -606.8355102539062, "logps/rejected": -780.2529296875, "loss": 0.6761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.43703693151474, "rewards/margins": 0.2086491882801056, "rewards/rejected": -0.645686149597168, "step": 8130 }, { "epoch": 0.6, "learning_rate": 8.249150844388232e-07, "logits/chosen": -2.053286075592041, "logits/rejected": -1.5539348125457764, "logps/chosen": -516.9298706054688, "logps/rejected": -666.6165771484375, "loss": 0.6768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.376163125038147, "rewards/margins": 0.18096207082271576, "rewards/rejected": -0.5571252107620239, "step": 8140 }, { "epoch": 0.6, "learning_rate": 8.223805758206019e-07, "logits/chosen": -1.7628061771392822, "logits/rejected": -1.4413455724716187, "logps/chosen": -480.6449279785156, "logps/rejected": -673.9800415039062, "loss": 0.6765, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3608279526233673, "rewards/margins": 0.21324476599693298, "rewards/rejected": -0.5740727186203003, "step": 8150 }, { "epoch": 0.6, "learning_rate": 8.198472448059383e-07, "logits/chosen": -2.1914093494415283, "logits/rejected": -1.7526919841766357, "logps/chosen": -442.07666015625, "logps/rejected": -606.3175048828125, "loss": 0.6785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3116009831428528, "rewards/margins": 0.1721917688846588, "rewards/rejected": -0.483792781829834, "step": 8160 }, { "epoch": 0.6, "learning_rate": 8.173151081906278e-07, "logits/chosen": -1.977677345275879, "logits/rejected": -1.5701504945755005, "logps/chosen": -539.7857055664062, "logps/rejected": -699.1865844726562, "loss": 0.6829, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3545943796634674, "rewards/margins": 0.19235776364803314, "rewards/rejected": -0.5469521284103394, "step": 8170 }, { "epoch": 0.6, "learning_rate": 8.147841827625485e-07, "logits/chosen": -1.990902304649353, "logits/rejected": -1.5261456966400146, "logps/chosen": -505.59930419921875, "logps/rejected": -666.4114379882812, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3767496645450592, "rewards/margins": 0.1962900459766388, "rewards/rejected": -0.5730396509170532, "step": 8180 }, { "epoch": 0.6, "learning_rate": 8.122544853015469e-07, "logits/chosen": -2.2355000972747803, "logits/rejected": -1.7133270502090454, "logps/chosen": -593.3885498046875, "logps/rejected": -681.4602661132812, "loss": 0.6775, "rewards/accuracies": 0.75, "rewards/chosen": -0.3442772328853607, "rewards/margins": 0.1638968288898468, "rewards/rejected": -0.5081740617752075, "step": 8190 }, { "epoch": 0.6, "learning_rate": 8.097260325793294e-07, "logits/chosen": -2.2519497871398926, "logits/rejected": -1.4795560836791992, "logps/chosen": -464.7041931152344, "logps/rejected": -637.2764892578125, "loss": 0.6783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3216002583503723, "rewards/margins": 0.21077215671539307, "rewards/rejected": -0.5323723554611206, "step": 8200 }, { "epoch": 0.61, "learning_rate": 8.071988413593485e-07, "logits/chosen": -2.0264837741851807, "logits/rejected": -1.6325067281723022, "logps/chosen": -665.4554443359375, "logps/rejected": -731.5895385742188, "loss": 0.6801, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4821698069572449, "rewards/margins": 0.1280933916568756, "rewards/rejected": -0.6102632284164429, "step": 8210 }, { "epoch": 0.61, "learning_rate": 8.046729283966943e-07, "logits/chosen": -2.2196850776672363, "logits/rejected": -1.731212854385376, "logps/chosen": -467.7389221191406, "logps/rejected": -620.0308837890625, "loss": 0.675, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30653297901153564, "rewards/margins": 0.20063868165016174, "rewards/rejected": -0.5071717500686646, "step": 8220 }, { "epoch": 0.61, "learning_rate": 8.021483104379818e-07, "logits/chosen": -2.059840679168701, "logits/rejected": -1.6270462274551392, "logps/chosen": -584.4564208984375, "logps/rejected": -764.6896362304688, "loss": 0.6769, "rewards/accuracies": 0.75, "rewards/chosen": -0.43418771028518677, "rewards/margins": 0.19746635854244232, "rewards/rejected": -0.6316541433334351, "step": 8230 }, { "epoch": 0.61, "learning_rate": 7.996250042212398e-07, "logits/chosen": -1.8237874507904053, "logits/rejected": -1.582890272140503, "logps/chosen": -546.3599243164062, "logps/rejected": -711.2276611328125, "loss": 0.6776, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4105798304080963, "rewards/margins": 0.17162223160266876, "rewards/rejected": -0.5822020769119263, "step": 8240 }, { "epoch": 0.61, "learning_rate": 7.971030264758007e-07, "logits/chosen": -2.117709159851074, "logits/rejected": -1.6131359338760376, "logps/chosen": -453.8822326660156, "logps/rejected": -664.548583984375, "loss": 0.6755, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3155469298362732, "rewards/margins": 0.23456640541553497, "rewards/rejected": -0.550113320350647, "step": 8250 }, { "epoch": 0.61, "learning_rate": 7.94582393922189e-07, "logits/chosen": -1.9141390323638916, "logits/rejected": -1.55727219581604, "logps/chosen": -506.02490234375, "logps/rejected": -659.3465576171875, "loss": 0.6806, "rewards/accuracies": 0.625, "rewards/chosen": -0.36023104190826416, "rewards/margins": 0.1897924393415451, "rewards/rejected": -0.5500234365463257, "step": 8260 }, { "epoch": 0.61, "learning_rate": 7.920631232720111e-07, "logits/chosen": -2.2000651359558105, "logits/rejected": -1.677107572555542, "logps/chosen": -527.6670532226562, "logps/rejected": -666.46337890625, "loss": 0.6776, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3666800856590271, "rewards/margins": 0.18416160345077515, "rewards/rejected": -0.550841748714447, "step": 8270 }, { "epoch": 0.61, "learning_rate": 7.895452312278442e-07, "logits/chosen": -2.073591709136963, "logits/rejected": -1.6107839345932007, "logps/chosen": -385.716796875, "logps/rejected": -542.6244506835938, "loss": 0.677, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.20335707068443298, "rewards/margins": 0.21189677715301514, "rewards/rejected": -0.4152538776397705, "step": 8280 }, { "epoch": 0.61, "learning_rate": 7.870287344831245e-07, "logits/chosen": -2.1374950408935547, "logits/rejected": -1.698887825012207, "logps/chosen": -460.6453552246094, "logps/rejected": -679.6541748046875, "loss": 0.6735, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.321037232875824, "rewards/margins": 0.24038293957710266, "rewards/rejected": -0.5614200830459595, "step": 8290 }, { "epoch": 0.61, "learning_rate": 7.845136497220381e-07, "logits/chosen": -1.9105936288833618, "logits/rejected": -1.3260071277618408, "logps/chosen": -608.5709228515625, "logps/rejected": -676.6468505859375, "loss": 0.6803, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3988291025161743, "rewards/margins": 0.16976319253444672, "rewards/rejected": -0.5685923099517822, "step": 8300 }, { "epoch": 0.61, "learning_rate": 7.819999936194102e-07, "logits/chosen": -1.968349814414978, "logits/rejected": -1.555602788925171, "logps/chosen": -710.207763671875, "logps/rejected": -803.3807373046875, "loss": 0.6832, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5269219875335693, "rewards/margins": 0.13975460827350616, "rewards/rejected": -0.6666765213012695, "step": 8310 }, { "epoch": 0.61, "learning_rate": 7.79487782840593e-07, "logits/chosen": -2.093312978744507, "logits/rejected": -1.3741432428359985, "logps/chosen": -512.59814453125, "logps/rejected": -726.5977783203125, "loss": 0.6737, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.35498982667922974, "rewards/margins": 0.23157374560832977, "rewards/rejected": -0.5865635275840759, "step": 8320 }, { "epoch": 0.61, "learning_rate": 7.769770340413574e-07, "logits/chosen": -2.4317195415496826, "logits/rejected": -1.7523696422576904, "logps/chosen": -498.4124450683594, "logps/rejected": -693.2496337890625, "loss": 0.67, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3245488405227661, "rewards/margins": 0.24395576119422913, "rewards/rejected": -0.5685045719146729, "step": 8330 }, { "epoch": 0.62, "learning_rate": 7.744677638677805e-07, "logits/chosen": -2.1772098541259766, "logits/rejected": -1.680970549583435, "logps/chosen": -492.1055603027344, "logps/rejected": -595.9473876953125, "loss": 0.6793, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3295289874076843, "rewards/margins": 0.13554063439369202, "rewards/rejected": -0.46506962180137634, "step": 8340 }, { "epoch": 0.62, "learning_rate": 7.719599889561368e-07, "logits/chosen": -2.1314282417297363, "logits/rejected": -1.602426528930664, "logps/chosen": -552.0676879882812, "logps/rejected": -705.2737426757812, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4095708429813385, "rewards/margins": 0.19740328192710876, "rewards/rejected": -0.6069741249084473, "step": 8350 }, { "epoch": 0.62, "learning_rate": 7.694537259327876e-07, "logits/chosen": -2.010399580001831, "logits/rejected": -1.5144026279449463, "logps/chosen": -582.93798828125, "logps/rejected": -762.482421875, "loss": 0.6777, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3860655426979065, "rewards/margins": 0.20728985965251923, "rewards/rejected": -0.5933553576469421, "step": 8360 }, { "epoch": 0.62, "learning_rate": 7.6694899141407e-07, "logits/chosen": -2.0907320976257324, "logits/rejected": -1.5521230697631836, "logps/chosen": -537.6409301757812, "logps/rejected": -716.0357055664062, "loss": 0.681, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38915905356407166, "rewards/margins": 0.20285575091838837, "rewards/rejected": -0.5920148491859436, "step": 8370 }, { "epoch": 0.62, "learning_rate": 7.644458020061875e-07, "logits/chosen": -2.188434600830078, "logits/rejected": -1.6081745624542236, "logps/chosen": -564.1373901367188, "logps/rejected": -771.2646484375, "loss": 0.6783, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39271050691604614, "rewards/margins": 0.23232457041740417, "rewards/rejected": -0.6250351071357727, "step": 8380 }, { "epoch": 0.62, "learning_rate": 7.619441743050992e-07, "logits/chosen": -2.0229694843292236, "logits/rejected": -1.4888732433319092, "logps/chosen": -547.3862915039062, "logps/rejected": -720.897705078125, "loss": 0.6794, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4058964252471924, "rewards/margins": 0.2052907943725586, "rewards/rejected": -0.611187219619751, "step": 8390 }, { "epoch": 0.62, "learning_rate": 7.594441248964106e-07, "logits/chosen": -2.1620700359344482, "logits/rejected": -1.6650936603546143, "logps/chosen": -452.0603942871094, "logps/rejected": -590.6874389648438, "loss": 0.6759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29938724637031555, "rewards/margins": 0.19965402781963348, "rewards/rejected": -0.49904125928878784, "step": 8400 }, { "epoch": 0.62, "learning_rate": 7.569456703552635e-07, "logits/chosen": -2.1608829498291016, "logits/rejected": -1.5417696237564087, "logps/chosen": -456.98785400390625, "logps/rejected": -654.5198974609375, "loss": 0.6763, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29214197397232056, "rewards/margins": 0.2567623257637024, "rewards/rejected": -0.548904299736023, "step": 8410 }, { "epoch": 0.62, "learning_rate": 7.544488272462248e-07, "logits/chosen": -2.1347854137420654, "logits/rejected": -1.7564709186553955, "logps/chosen": -535.36279296875, "logps/rejected": -688.6166381835938, "loss": 0.6803, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3837220072746277, "rewards/margins": 0.17627482116222382, "rewards/rejected": -0.5599969029426575, "step": 8420 }, { "epoch": 0.62, "learning_rate": 7.519536121231786e-07, "logits/chosen": -2.188035249710083, "logits/rejected": -1.6748816967010498, "logps/chosen": -511.89501953125, "logps/rejected": -701.8272094726562, "loss": 0.685, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3217531144618988, "rewards/margins": 0.2177658975124359, "rewards/rejected": -0.5395190119743347, "step": 8430 }, { "epoch": 0.62, "learning_rate": 7.494600415292156e-07, "logits/chosen": -2.1104540824890137, "logits/rejected": -1.5475085973739624, "logps/chosen": -521.2024536132812, "logps/rejected": -664.3460693359375, "loss": 0.6802, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3677118420600891, "rewards/margins": 0.19234968721866608, "rewards/rejected": -0.560061514377594, "step": 8440 }, { "epoch": 0.62, "learning_rate": 7.469681319965235e-07, "logits/chosen": -1.9376354217529297, "logits/rejected": -1.5409553050994873, "logps/chosen": -475.6986389160156, "logps/rejected": -606.138671875, "loss": 0.6825, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.36763325333595276, "rewards/margins": 0.13792024552822113, "rewards/rejected": -0.5055534839630127, "step": 8450 }, { "epoch": 0.62, "learning_rate": 7.444779000462763e-07, "logits/chosen": -2.251073122024536, "logits/rejected": -1.603684663772583, "logps/chosen": -529.8258666992188, "logps/rejected": -677.3218994140625, "loss": 0.6784, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3568304777145386, "rewards/margins": 0.21272936463356018, "rewards/rejected": -0.5695598721504211, "step": 8460 }, { "epoch": 0.62, "learning_rate": 7.419893621885267e-07, "logits/chosen": -1.7765016555786133, "logits/rejected": -1.546445608139038, "logps/chosen": -482.62896728515625, "logps/rejected": -620.5966186523438, "loss": 0.6819, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3351251184940338, "rewards/margins": 0.15536092221736908, "rewards/rejected": -0.4904860556125641, "step": 8470 }, { "epoch": 0.63, "learning_rate": 7.395025349220957e-07, "logits/chosen": -2.2052664756774902, "logits/rejected": -1.7501710653305054, "logps/chosen": -437.09423828125, "logps/rejected": -602.45263671875, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29354581236839294, "rewards/margins": 0.16585808992385864, "rewards/rejected": -0.459403932094574, "step": 8480 }, { "epoch": 0.63, "learning_rate": 7.370174347344624e-07, "logits/chosen": -2.0977048873901367, "logits/rejected": -1.4465965032577515, "logps/chosen": -457.1751403808594, "logps/rejected": -604.6024780273438, "loss": 0.6828, "rewards/accuracies": 0.625, "rewards/chosen": -0.2953993082046509, "rewards/margins": 0.1878480464220047, "rewards/rejected": -0.4832473695278168, "step": 8490 }, { "epoch": 0.63, "learning_rate": 7.345340781016562e-07, "logits/chosen": -2.004624128341675, "logits/rejected": -1.5905640125274658, "logps/chosen": -416.42913818359375, "logps/rejected": -615.2310791015625, "loss": 0.6773, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.27361369132995605, "rewards/margins": 0.21810874342918396, "rewards/rejected": -0.4917224049568176, "step": 8500 }, { "epoch": 0.63, "learning_rate": 7.32052481488147e-07, "logits/chosen": -2.1128876209259033, "logits/rejected": -1.7069343328475952, "logps/chosen": -493.61798095703125, "logps/rejected": -645.009033203125, "loss": 0.6789, "rewards/accuracies": 0.75, "rewards/chosen": -0.29455921053886414, "rewards/margins": 0.18263967335224152, "rewards/rejected": -0.47719889879226685, "step": 8510 }, { "epoch": 0.63, "learning_rate": 7.295726613467351e-07, "logits/chosen": -1.9948211908340454, "logits/rejected": -1.5149726867675781, "logps/chosen": -532.598876953125, "logps/rejected": -698.2777099609375, "loss": 0.6754, "rewards/accuracies": 0.75, "rewards/chosen": -0.3663102984428406, "rewards/margins": 0.19079847633838654, "rewards/rejected": -0.5571087598800659, "step": 8520 }, { "epoch": 0.63, "learning_rate": 7.270946341184432e-07, "logits/chosen": -2.007758617401123, "logits/rejected": -1.7146692276000977, "logps/chosen": -531.3564453125, "logps/rejected": -702.7527465820312, "loss": 0.6821, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37035828828811646, "rewards/margins": 0.17738519608974457, "rewards/rejected": -0.5477434396743774, "step": 8530 }, { "epoch": 0.63, "learning_rate": 7.246184162324082e-07, "logits/chosen": -2.241041421890259, "logits/rejected": -1.5949156284332275, "logps/chosen": -465.193603515625, "logps/rejected": -589.6948852539062, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29073622822761536, "rewards/margins": 0.17839983105659485, "rewards/rejected": -0.4691360592842102, "step": 8540 }, { "epoch": 0.63, "learning_rate": 7.221440241057699e-07, "logits/chosen": -2.1380763053894043, "logits/rejected": -1.6570279598236084, "logps/chosen": -451.36865234375, "logps/rejected": -602.4569702148438, "loss": 0.6814, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33328890800476074, "rewards/margins": 0.16088657081127167, "rewards/rejected": -0.49417543411254883, "step": 8550 }, { "epoch": 0.63, "learning_rate": 7.196714741435635e-07, "logits/chosen": -2.2626290321350098, "logits/rejected": -1.8501875400543213, "logps/chosen": -490.504638671875, "logps/rejected": -603.4928588867188, "loss": 0.6854, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3051280081272125, "rewards/margins": 0.16793768107891083, "rewards/rejected": -0.47306567430496216, "step": 8560 }, { "epoch": 0.63, "learning_rate": 7.172007827386116e-07, "logits/chosen": -1.9858976602554321, "logits/rejected": -1.4301769733428955, "logps/chosen": -438.97808837890625, "logps/rejected": -584.3182983398438, "loss": 0.6754, "rewards/accuracies": 0.75, "rewards/chosen": -0.28359970450401306, "rewards/margins": 0.1805255264043808, "rewards/rejected": -0.4641251564025879, "step": 8570 }, { "epoch": 0.63, "learning_rate": 7.147319662714144e-07, "logits/chosen": -2.193336009979248, "logits/rejected": -1.4644556045532227, "logps/chosen": -652.5821533203125, "logps/rejected": -742.8623657226562, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41907253861427307, "rewards/margins": 0.16824768483638763, "rewards/rejected": -0.5873202085494995, "step": 8580 }, { "epoch": 0.63, "learning_rate": 7.122650411100406e-07, "logits/chosen": -2.0596330165863037, "logits/rejected": -1.4012545347213745, "logps/chosen": -630.3573608398438, "logps/rejected": -816.5083618164062, "loss": 0.6777, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4648091793060303, "rewards/margins": 0.1915130317211151, "rewards/rejected": -0.656322181224823, "step": 8590 }, { "epoch": 0.63, "learning_rate": 7.098000236100208e-07, "logits/chosen": -2.2141449451446533, "logits/rejected": -1.7995373010635376, "logps/chosen": -608.6012573242188, "logps/rejected": -775.7775268554688, "loss": 0.6781, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.45448002219200134, "rewards/margins": 0.19960132241249084, "rewards/rejected": -0.6540812849998474, "step": 8600 }, { "epoch": 0.64, "learning_rate": 7.073369301142375e-07, "logits/chosen": -1.9989715814590454, "logits/rejected": -1.2374694347381592, "logps/chosen": -632.7100830078125, "logps/rejected": -781.5244140625, "loss": 0.6775, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.45587247610092163, "rewards/margins": 0.22602996230125427, "rewards/rejected": -0.6819024682044983, "step": 8610 }, { "epoch": 0.64, "learning_rate": 7.048757769528166e-07, "logits/chosen": -1.9669990539550781, "logits/rejected": -1.2890013456344604, "logps/chosen": -554.2747802734375, "logps/rejected": -759.889892578125, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3875698447227478, "rewards/margins": 0.24447545409202576, "rewards/rejected": -0.632045328617096, "step": 8620 }, { "epoch": 0.64, "learning_rate": 7.024165804430215e-07, "logits/chosen": -2.237870693206787, "logits/rejected": -1.591373085975647, "logps/chosen": -596.44970703125, "logps/rejected": -803.5882568359375, "loss": 0.6765, "rewards/accuracies": 0.875, "rewards/chosen": -0.4253896176815033, "rewards/margins": 0.2429129183292389, "rewards/rejected": -0.6683025360107422, "step": 8630 }, { "epoch": 0.64, "learning_rate": 6.999593568891409e-07, "logits/chosen": -2.0108819007873535, "logits/rejected": -1.2091091871261597, "logps/chosen": -603.4434814453125, "logps/rejected": -803.8687744140625, "loss": 0.6752, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4334178566932678, "rewards/margins": 0.2774984538555145, "rewards/rejected": -0.71091628074646, "step": 8640 }, { "epoch": 0.64, "learning_rate": 6.975041225823843e-07, "logits/chosen": -2.1185245513916016, "logits/rejected": -1.417406678199768, "logps/chosen": -589.4439697265625, "logps/rejected": -714.6670532226562, "loss": 0.6764, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3906700611114502, "rewards/margins": 0.20367200672626495, "rewards/rejected": -0.5943421125411987, "step": 8650 }, { "epoch": 0.64, "learning_rate": 6.950508938007729e-07, "logits/chosen": -1.9598004817962646, "logits/rejected": -1.4747965335845947, "logps/chosen": -574.0675048828125, "logps/rejected": -638.275146484375, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -0.36654824018478394, "rewards/margins": 0.14145049452781677, "rewards/rejected": -0.5079987645149231, "step": 8660 }, { "epoch": 0.64, "learning_rate": 6.9259968680903e-07, "logits/chosen": -2.205514669418335, "logits/rejected": -1.6942672729492188, "logps/chosen": -489.58465576171875, "logps/rejected": -662.4784545898438, "loss": 0.6768, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34330517053604126, "rewards/margins": 0.18943773210048676, "rewards/rejected": -0.5327428579330444, "step": 8670 }, { "epoch": 0.64, "learning_rate": 6.901505178584761e-07, "logits/chosen": -1.9675791263580322, "logits/rejected": -1.5814982652664185, "logps/chosen": -463.96630859375, "logps/rejected": -590.6701049804688, "loss": 0.6778, "rewards/accuracies": 0.75, "rewards/chosen": -0.3161087930202484, "rewards/margins": 0.15269628167152405, "rewards/rejected": -0.46880507469177246, "step": 8680 }, { "epoch": 0.64, "learning_rate": 6.877034031869184e-07, "logits/chosen": -2.178333044052124, "logits/rejected": -1.8980128765106201, "logps/chosen": -455.6803283691406, "logps/rejected": -609.7166748046875, "loss": 0.6784, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3055882155895233, "rewards/margins": 0.16015908122062683, "rewards/rejected": -0.46574729681015015, "step": 8690 }, { "epoch": 0.64, "learning_rate": 6.852583590185454e-07, "logits/chosen": -2.2264513969421387, "logits/rejected": -1.6571474075317383, "logps/chosen": -498.14971923828125, "logps/rejected": -654.5874633789062, "loss": 0.6735, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3194701075553894, "rewards/margins": 0.2166365087032318, "rewards/rejected": -0.5361066460609436, "step": 8700 }, { "epoch": 0.64, "learning_rate": 6.828154015638175e-07, "logits/chosen": -2.172942876815796, "logits/rejected": -1.6762641668319702, "logps/chosen": -530.8294677734375, "logps/rejected": -674.2601318359375, "loss": 0.6779, "rewards/accuracies": 0.75, "rewards/chosen": -0.36105436086654663, "rewards/margins": 0.18913230299949646, "rewards/rejected": -0.5501866936683655, "step": 8710 }, { "epoch": 0.64, "learning_rate": 6.803745470193613e-07, "logits/chosen": -2.12288498878479, "logits/rejected": -1.5167837142944336, "logps/chosen": -498.7975158691406, "logps/rejected": -695.2388916015625, "loss": 0.6768, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.33556562662124634, "rewards/margins": 0.2506266236305237, "rewards/rejected": -0.58619225025177, "step": 8720 }, { "epoch": 0.64, "learning_rate": 6.779358115678606e-07, "logits/chosen": -1.97334885597229, "logits/rejected": -1.8157390356063843, "logps/chosen": -478.4043884277344, "logps/rejected": -586.539794921875, "loss": 0.6873, "rewards/accuracies": 0.5, "rewards/chosen": -0.34921741485595703, "rewards/margins": 0.12266378104686737, "rewards/rejected": -0.4718811511993408, "step": 8730 }, { "epoch": 0.64, "learning_rate": 6.754992113779495e-07, "logits/chosen": -2.264493942260742, "logits/rejected": -1.7711353302001953, "logps/chosen": -499.84710693359375, "logps/rejected": -666.3617553710938, "loss": 0.6773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32303962111473083, "rewards/margins": 0.19050443172454834, "rewards/rejected": -0.5135440826416016, "step": 8740 }, { "epoch": 0.65, "learning_rate": 6.730647626041059e-07, "logits/chosen": -1.881373405456543, "logits/rejected": -1.3674567937850952, "logps/chosen": -501.51361083984375, "logps/rejected": -646.7977905273438, "loss": 0.6793, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3534238934516907, "rewards/margins": 0.19772689044475555, "rewards/rejected": -0.5511507987976074, "step": 8750 }, { "epoch": 0.65, "learning_rate": 6.706324813865444e-07, "logits/chosen": -1.9871037006378174, "logits/rejected": -1.5873684883117676, "logps/chosen": -545.7581176757812, "logps/rejected": -703.1762084960938, "loss": 0.6766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39776888489723206, "rewards/margins": 0.1901630461215973, "rewards/rejected": -0.5879319906234741, "step": 8760 }, { "epoch": 0.65, "learning_rate": 6.682023838511077e-07, "logits/chosen": -2.1984500885009766, "logits/rejected": -1.7476098537445068, "logps/chosen": -570.0735473632812, "logps/rejected": -670.9730224609375, "loss": 0.6812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41139131784439087, "rewards/margins": 0.13981643319129944, "rewards/rejected": -0.5512077212333679, "step": 8770 }, { "epoch": 0.65, "learning_rate": 6.657744861091618e-07, "logits/chosen": -2.1141536235809326, "logits/rejected": -1.564929723739624, "logps/chosen": -475.33807373046875, "logps/rejected": -603.91015625, "loss": 0.6825, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.31958717107772827, "rewards/margins": 0.16837835311889648, "rewards/rejected": -0.48796549439430237, "step": 8780 }, { "epoch": 0.65, "learning_rate": 6.633488042574881e-07, "logits/chosen": -2.1187691688537598, "logits/rejected": -1.8091751337051392, "logps/chosen": -563.8350219726562, "logps/rejected": -718.5631103515625, "loss": 0.6778, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3924596905708313, "rewards/margins": 0.1674821674823761, "rewards/rejected": -0.5599418878555298, "step": 8790 }, { "epoch": 0.65, "learning_rate": 6.609253543781765e-07, "logits/chosen": -2.0494179725646973, "logits/rejected": -1.326453447341919, "logps/chosen": -563.4547119140625, "logps/rejected": -808.7349853515625, "loss": 0.6801, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.41484108567237854, "rewards/margins": 0.28922489285469055, "rewards/rejected": -0.7040659189224243, "step": 8800 }, { "epoch": 0.65, "learning_rate": 6.585041525385192e-07, "logits/chosen": -2.035639762878418, "logits/rejected": -1.449599027633667, "logps/chosen": -670.4212646484375, "logps/rejected": -798.1891479492188, "loss": 0.6729, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.41462773084640503, "rewards/margins": 0.227179616689682, "rewards/rejected": -0.6418074369430542, "step": 8810 }, { "epoch": 0.65, "learning_rate": 6.560852147909044e-07, "logits/chosen": -2.204272508621216, "logits/rejected": -1.8315681219100952, "logps/chosen": -495.1904296875, "logps/rejected": -646.8665161132812, "loss": 0.6776, "rewards/accuracies": 0.75, "rewards/chosen": -0.35131943225860596, "rewards/margins": 0.17027410864830017, "rewards/rejected": -0.5215935111045837, "step": 8820 }, { "epoch": 0.65, "learning_rate": 6.536685571727092e-07, "logits/chosen": -2.016833543777466, "logits/rejected": -1.5951346158981323, "logps/chosen": -589.6416625976562, "logps/rejected": -785.7286376953125, "loss": 0.6831, "rewards/accuracies": 0.75, "rewards/chosen": -0.4027079939842224, "rewards/margins": 0.21667513251304626, "rewards/rejected": -0.6193832159042358, "step": 8830 }, { "epoch": 0.65, "learning_rate": 6.512541957061935e-07, "logits/chosen": -1.9630151987075806, "logits/rejected": -1.811151146888733, "logps/chosen": -505.5951232910156, "logps/rejected": -653.0667114257812, "loss": 0.6773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.365755558013916, "rewards/margins": 0.14945665001869202, "rewards/rejected": -0.5152121782302856, "step": 8840 }, { "epoch": 0.65, "learning_rate": 6.488421463983939e-07, "logits/chosen": -2.2271392345428467, "logits/rejected": -1.9624595642089844, "logps/chosen": -469.2462463378906, "logps/rejected": -676.4608154296875, "loss": 0.6739, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30044814944267273, "rewards/margins": 0.23764672875404358, "rewards/rejected": -0.5380948781967163, "step": 8850 }, { "epoch": 0.65, "learning_rate": 6.464324252410183e-07, "logits/chosen": -2.0944252014160156, "logits/rejected": -1.5543146133422852, "logps/chosen": -545.1600341796875, "logps/rejected": -760.9669799804688, "loss": 0.6698, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40626198053359985, "rewards/margins": 0.24924275279045105, "rewards/rejected": -0.6555047631263733, "step": 8860 }, { "epoch": 0.65, "learning_rate": 6.44025048210338e-07, "logits/chosen": -2.230743169784546, "logits/rejected": -1.5395474433898926, "logps/chosen": -509.63623046875, "logps/rejected": -686.8866577148438, "loss": 0.6793, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.309037983417511, "rewards/margins": 0.2182329148054123, "rewards/rejected": -0.5272709131240845, "step": 8870 }, { "epoch": 0.66, "learning_rate": 6.416200312670837e-07, "logits/chosen": -2.26332426071167, "logits/rejected": -1.614126443862915, "logps/chosen": -547.9263305664062, "logps/rejected": -741.86181640625, "loss": 0.6751, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3817203938961029, "rewards/margins": 0.2192268669605255, "rewards/rejected": -0.6009472608566284, "step": 8880 }, { "epoch": 0.66, "learning_rate": 6.392173903563384e-07, "logits/chosen": -2.16848087310791, "logits/rejected": -1.7151235342025757, "logps/chosen": -423.0066833496094, "logps/rejected": -518.5053100585938, "loss": 0.6839, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.28693413734436035, "rewards/margins": 0.1417335569858551, "rewards/rejected": -0.4286676347255707, "step": 8890 }, { "epoch": 0.66, "learning_rate": 6.368171414074332e-07, "logits/chosen": -2.1284852027893066, "logits/rejected": -1.5401533842086792, "logps/chosen": -532.2103271484375, "logps/rejected": -732.8078002929688, "loss": 0.6756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.33287328481674194, "rewards/margins": 0.2488528937101364, "rewards/rejected": -0.5817262530326843, "step": 8900 }, { "epoch": 0.66, "learning_rate": 6.344193003338391e-07, "logits/chosen": -2.0477490425109863, "logits/rejected": -1.6370426416397095, "logps/chosen": -525.9423828125, "logps/rejected": -662.4857788085938, "loss": 0.6817, "rewards/accuracies": 0.75, "rewards/chosen": -0.3880094885826111, "rewards/margins": 0.1583056002855301, "rewards/rejected": -0.54631507396698, "step": 8910 }, { "epoch": 0.66, "learning_rate": 6.320238830330643e-07, "logits/chosen": -2.0930967330932617, "logits/rejected": -1.7215226888656616, "logps/chosen": -481.0933532714844, "logps/rejected": -668.4310302734375, "loss": 0.6756, "rewards/accuracies": 0.875, "rewards/chosen": -0.31642958521842957, "rewards/margins": 0.22864632308483124, "rewards/rejected": -0.5450758934020996, "step": 8920 }, { "epoch": 0.66, "learning_rate": 6.296309053865471e-07, "logits/chosen": -2.06174898147583, "logits/rejected": -1.6604783535003662, "logps/chosen": -487.88836669921875, "logps/rejected": -651.1668090820312, "loss": 0.6828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3148845136165619, "rewards/margins": 0.18394410610198975, "rewards/rejected": -0.49882858991622925, "step": 8930 }, { "epoch": 0.66, "learning_rate": 6.272403832595512e-07, "logits/chosen": -2.0618515014648438, "logits/rejected": -1.6610298156738281, "logps/chosen": -528.3184814453125, "logps/rejected": -660.8770751953125, "loss": 0.6799, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34951621294021606, "rewards/margins": 0.15900449454784393, "rewards/rejected": -0.5085207223892212, "step": 8940 }, { "epoch": 0.66, "learning_rate": 6.248523325010599e-07, "logits/chosen": -2.0800747871398926, "logits/rejected": -1.5042893886566162, "logps/chosen": -434.1739196777344, "logps/rejected": -618.7532348632812, "loss": 0.6765, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.255264014005661, "rewards/margins": 0.2302398383617401, "rewards/rejected": -0.4855038523674011, "step": 8950 }, { "epoch": 0.66, "learning_rate": 6.224667689436724e-07, "logits/chosen": -2.0024657249450684, "logits/rejected": -1.534622311592102, "logps/chosen": -538.9070434570312, "logps/rejected": -703.4763793945312, "loss": 0.6772, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.37612468004226685, "rewards/margins": 0.20299585163593292, "rewards/rejected": -0.5791205167770386, "step": 8960 }, { "epoch": 0.66, "learning_rate": 6.200837084034974e-07, "logits/chosen": -2.1722073554992676, "logits/rejected": -1.6398814916610718, "logps/chosen": -547.4281005859375, "logps/rejected": -695.0169067382812, "loss": 0.6744, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3846130967140198, "rewards/margins": 0.20440444350242615, "rewards/rejected": -0.5890175104141235, "step": 8970 }, { "epoch": 0.66, "learning_rate": 6.177031666800483e-07, "logits/chosen": -1.9309791326522827, "logits/rejected": -1.5629724264144897, "logps/chosen": -463.06683349609375, "logps/rejected": -662.7598876953125, "loss": 0.6744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2936914563179016, "rewards/margins": 0.2245577871799469, "rewards/rejected": -0.5182492136955261, "step": 8980 }, { "epoch": 0.66, "learning_rate": 6.153251595561401e-07, "logits/chosen": -2.2038369178771973, "logits/rejected": -1.6485569477081299, "logps/chosen": -581.6858520507812, "logps/rejected": -704.2618408203125, "loss": 0.6836, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38185879588127136, "rewards/margins": 0.1722816526889801, "rewards/rejected": -0.5541403889656067, "step": 8990 }, { "epoch": 0.66, "learning_rate": 6.129497027977828e-07, "logits/chosen": -2.2019457817077637, "logits/rejected": -1.8824411630630493, "logps/chosen": -573.4754638671875, "logps/rejected": -674.7088623046875, "loss": 0.6826, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37790244817733765, "rewards/margins": 0.12784059345722198, "rewards/rejected": -0.5057430267333984, "step": 9000 }, { "epoch": 0.66, "learning_rate": 6.105768121540779e-07, "logits/chosen": -1.9827836751937866, "logits/rejected": -1.5754587650299072, "logps/chosen": -453.67169189453125, "logps/rejected": -583.4707641601562, "loss": 0.6813, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2983633875846863, "rewards/margins": 0.16575363278388977, "rewards/rejected": -0.4641169607639313, "step": 9010 }, { "epoch": 0.67, "learning_rate": 6.082065033571138e-07, "logits/chosen": -1.8731601238250732, "logits/rejected": -1.6108825206756592, "logps/chosen": -529.3875732421875, "logps/rejected": -690.6495361328125, "loss": 0.678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3702021539211273, "rewards/margins": 0.17188677191734314, "rewards/rejected": -0.5420888662338257, "step": 9020 }, { "epoch": 0.67, "learning_rate": 6.05838792121861e-07, "logits/chosen": -2.2774651050567627, "logits/rejected": -1.390021562576294, "logps/chosen": -537.3579711914062, "logps/rejected": -696.9887084960938, "loss": 0.6778, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3357030749320984, "rewards/margins": 0.2566124200820923, "rewards/rejected": -0.5923154950141907, "step": 9030 }, { "epoch": 0.67, "learning_rate": 6.034736941460687e-07, "logits/chosen": -1.9491256475448608, "logits/rejected": -1.5716960430145264, "logps/chosen": -510.7643127441406, "logps/rejected": -675.9869384765625, "loss": 0.6792, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34260040521621704, "rewards/margins": 0.182336688041687, "rewards/rejected": -0.5249370336532593, "step": 9040 }, { "epoch": 0.67, "learning_rate": 6.011112251101608e-07, "logits/chosen": -2.0846261978149414, "logits/rejected": -1.5779807567596436, "logps/chosen": -482.4671325683594, "logps/rejected": -637.4402465820312, "loss": 0.6839, "rewards/accuracies": 0.75, "rewards/chosen": -0.3113196790218353, "rewards/margins": 0.1773715317249298, "rewards/rejected": -0.4886912405490875, "step": 9050 }, { "epoch": 0.67, "learning_rate": 5.987514006771305e-07, "logits/chosen": -2.266287326812744, "logits/rejected": -1.7985140085220337, "logps/chosen": -502.3783264160156, "logps/rejected": -689.8365478515625, "loss": 0.6762, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3357495963573456, "rewards/margins": 0.20791392028331757, "rewards/rejected": -0.5436635613441467, "step": 9060 }, { "epoch": 0.67, "learning_rate": 5.963942364924386e-07, "logits/chosen": -2.311539888381958, "logits/rejected": -1.643498420715332, "logps/chosen": -501.54229736328125, "logps/rejected": -633.5213012695312, "loss": 0.6767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.273994117975235, "rewards/margins": 0.22471758723258972, "rewards/rejected": -0.4987116754055023, "step": 9070 }, { "epoch": 0.67, "learning_rate": 5.940397481839082e-07, "logits/chosen": -2.1529247760772705, "logits/rejected": -1.706277847290039, "logps/chosen": -473.641357421875, "logps/rejected": -609.03857421875, "loss": 0.6815, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.30332303047180176, "rewards/margins": 0.18798920512199402, "rewards/rejected": -0.49131232500076294, "step": 9080 }, { "epoch": 0.67, "learning_rate": 5.916879513616211e-07, "logits/chosen": -2.052159547805786, "logits/rejected": -1.8957736492156982, "logps/chosen": -486.60205078125, "logps/rejected": -598.6563110351562, "loss": 0.6823, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35168179869651794, "rewards/margins": 0.1156105175614357, "rewards/rejected": -0.46729230880737305, "step": 9090 }, { "epoch": 0.67, "learning_rate": 5.893388616178153e-07, "logits/chosen": -1.998430609703064, "logits/rejected": -1.680422067642212, "logps/chosen": -520.6930541992188, "logps/rejected": -670.364501953125, "loss": 0.6828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3615911900997162, "rewards/margins": 0.13611218333244324, "rewards/rejected": -0.4977033734321594, "step": 9100 }, { "epoch": 0.67, "learning_rate": 5.869924945267812e-07, "logits/chosen": -2.147000551223755, "logits/rejected": -1.4857038259506226, "logps/chosen": -474.12762451171875, "logps/rejected": -641.673095703125, "loss": 0.6811, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2723160982131958, "rewards/margins": 0.236432746052742, "rewards/rejected": -0.5087488293647766, "step": 9110 }, { "epoch": 0.67, "learning_rate": 5.846488656447572e-07, "logits/chosen": -2.257093906402588, "logits/rejected": -1.8363409042358398, "logps/chosen": -465.05535888671875, "logps/rejected": -574.6848754882812, "loss": 0.6788, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.29565146565437317, "rewards/margins": 0.16145199537277222, "rewards/rejected": -0.457103431224823, "step": 9120 }, { "epoch": 0.67, "learning_rate": 5.823079905098289e-07, "logits/chosen": -2.1500298976898193, "logits/rejected": -1.6581920385360718, "logps/chosen": -507.32843017578125, "logps/rejected": -677.3688354492188, "loss": 0.6799, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3689576983451843, "rewards/margins": 0.20642109215259552, "rewards/rejected": -0.5753787755966187, "step": 9130 }, { "epoch": 0.67, "learning_rate": 5.799698846418237e-07, "logits/chosen": -2.0766143798828125, "logits/rejected": -1.4388694763183594, "logps/chosen": -433.40911865234375, "logps/rejected": -637.729736328125, "loss": 0.6752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.257847398519516, "rewards/margins": 0.25164341926574707, "rewards/rejected": -0.5094908475875854, "step": 9140 }, { "epoch": 0.67, "learning_rate": 5.776345635422095e-07, "logits/chosen": -1.8036826848983765, "logits/rejected": -1.280479907989502, "logps/chosen": -543.4500732421875, "logps/rejected": -641.3834228515625, "loss": 0.6807, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3745245933532715, "rewards/margins": 0.1593792587518692, "rewards/rejected": -0.5339038372039795, "step": 9150 }, { "epoch": 0.68, "learning_rate": 5.753020426939921e-07, "logits/chosen": -2.139051914215088, "logits/rejected": -1.619826316833496, "logps/chosen": -587.3509521484375, "logps/rejected": -780.3133544921875, "loss": 0.6807, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40542712807655334, "rewards/margins": 0.22161488234996796, "rewards/rejected": -0.6270419359207153, "step": 9160 }, { "epoch": 0.68, "learning_rate": 5.729723375616102e-07, "logits/chosen": -2.071340560913086, "logits/rejected": -1.8839218616485596, "logps/chosen": -538.0545043945312, "logps/rejected": -673.9176635742188, "loss": 0.6865, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3941883146762848, "rewards/margins": 0.10999884456396103, "rewards/rejected": -0.5041872262954712, "step": 9170 }, { "epoch": 0.68, "learning_rate": 5.706454635908353e-07, "logits/chosen": -2.1010186672210693, "logits/rejected": -1.4439436197280884, "logps/chosen": -519.3521728515625, "logps/rejected": -639.6570434570312, "loss": 0.681, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3349483907222748, "rewards/margins": 0.17985376715660095, "rewards/rejected": -0.514802098274231, "step": 9180 }, { "epoch": 0.68, "learning_rate": 5.683214362086694e-07, "logits/chosen": -2.1606087684631348, "logits/rejected": -1.9430246353149414, "logps/chosen": -592.0208129882812, "logps/rejected": -654.18505859375, "loss": 0.6836, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4397679269313812, "rewards/margins": 0.09065257012844086, "rewards/rejected": -0.5304204225540161, "step": 9190 }, { "epoch": 0.68, "learning_rate": 5.660002708232403e-07, "logits/chosen": -2.222266435623169, "logits/rejected": -1.5575048923492432, "logps/chosen": -634.2406005859375, "logps/rejected": -790.259765625, "loss": 0.6765, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.4413851201534271, "rewards/margins": 0.2227572202682495, "rewards/rejected": -0.6641424298286438, "step": 9200 }, { "epoch": 0.68, "learning_rate": 5.636819828237016e-07, "logits/chosen": -2.0306620597839355, "logits/rejected": -1.210021734237671, "logps/chosen": -541.8827514648438, "logps/rejected": -764.8106079101562, "loss": 0.6761, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3643910586833954, "rewards/margins": 0.27470746636390686, "rewards/rejected": -0.6390984654426575, "step": 9210 }, { "epoch": 0.68, "learning_rate": 5.613665875801307e-07, "logits/chosen": -1.9672247171401978, "logits/rejected": -1.4180262088775635, "logps/chosen": -534.8362426757812, "logps/rejected": -753.1040649414062, "loss": 0.6799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3642098605632782, "rewards/margins": 0.22978202998638153, "rewards/rejected": -0.5939918756484985, "step": 9220 }, { "epoch": 0.68, "learning_rate": 5.590541004434254e-07, "logits/chosen": -1.945367455482483, "logits/rejected": -1.6564737558364868, "logps/chosen": -540.9659423828125, "logps/rejected": -701.5950927734375, "loss": 0.6785, "rewards/accuracies": 0.875, "rewards/chosen": -0.37948331236839294, "rewards/margins": 0.17993657290935516, "rewards/rejected": -0.5594198703765869, "step": 9230 }, { "epoch": 0.68, "learning_rate": 5.567445367452028e-07, "logits/chosen": -2.001260280609131, "logits/rejected": -1.4520026445388794, "logps/chosen": -591.41943359375, "logps/rejected": -811.8400268554688, "loss": 0.6668, "rewards/accuracies": 0.875, "rewards/chosen": -0.4310988783836365, "rewards/margins": 0.26121434569358826, "rewards/rejected": -0.6923132538795471, "step": 9240 }, { "epoch": 0.68, "learning_rate": 5.544379117976986e-07, "logits/chosen": -2.0538206100463867, "logits/rejected": -1.3885562419891357, "logps/chosen": -586.2344360351562, "logps/rejected": -760.7596435546875, "loss": 0.6789, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.44843941926956177, "rewards/margins": 0.21429653465747833, "rewards/rejected": -0.6627359390258789, "step": 9250 }, { "epoch": 0.68, "learning_rate": 5.52134240893665e-07, "logits/chosen": -1.9667079448699951, "logits/rejected": -1.2844576835632324, "logps/chosen": -545.4508056640625, "logps/rejected": -751.7620849609375, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4024244248867035, "rewards/margins": 0.2454407960176468, "rewards/rejected": -0.6478651762008667, "step": 9260 }, { "epoch": 0.68, "learning_rate": 5.498335393062681e-07, "logits/chosen": -2.1263253688812256, "logits/rejected": -1.435465693473816, "logps/chosen": -551.1356201171875, "logps/rejected": -741.4070434570312, "loss": 0.6731, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.3717099130153656, "rewards/margins": 0.23671138286590576, "rewards/rejected": -0.6084213256835938, "step": 9270 }, { "epoch": 0.68, "learning_rate": 5.475358222889876e-07, "logits/chosen": -2.0343384742736816, "logits/rejected": -1.4286859035491943, "logps/chosen": -544.8865356445312, "logps/rejected": -702.6204833984375, "loss": 0.6784, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40050190687179565, "rewards/margins": 0.20019221305847168, "rewards/rejected": -0.6006941199302673, "step": 9280 }, { "epoch": 0.69, "learning_rate": 5.452411050755172e-07, "logits/chosen": -2.1753745079040527, "logits/rejected": -1.460546612739563, "logps/chosen": -588.1976318359375, "logps/rejected": -738.5289306640625, "loss": 0.6768, "rewards/accuracies": 0.75, "rewards/chosen": -0.3974655866622925, "rewards/margins": 0.22477655112743378, "rewards/rejected": -0.6222421526908875, "step": 9290 }, { "epoch": 0.69, "learning_rate": 5.429494028796608e-07, "logits/chosen": -2.228734254837036, "logits/rejected": -1.7535803318023682, "logps/chosen": -544.4983520507812, "logps/rejected": -611.3958740234375, "loss": 0.6814, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3660920560359955, "rewards/margins": 0.12050594389438629, "rewards/rejected": -0.48659801483154297, "step": 9300 }, { "epoch": 0.69, "learning_rate": 5.406607308952329e-07, "logits/chosen": -2.026541233062744, "logits/rejected": -1.8556312322616577, "logps/chosen": -504.58551025390625, "logps/rejected": -653.7550048828125, "loss": 0.6838, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3706248700618744, "rewards/margins": 0.13319511711597443, "rewards/rejected": -0.50382000207901, "step": 9310 }, { "epoch": 0.69, "learning_rate": 5.383751042959591e-07, "logits/chosen": -2.259148120880127, "logits/rejected": -1.5731675624847412, "logps/chosen": -568.8267822265625, "logps/rejected": -765.98486328125, "loss": 0.6771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4114048480987549, "rewards/margins": 0.22743578255176544, "rewards/rejected": -0.6388406157493591, "step": 9320 }, { "epoch": 0.69, "learning_rate": 5.360925382353727e-07, "logits/chosen": -2.023803234100342, "logits/rejected": -1.3969571590423584, "logps/chosen": -527.1331176757812, "logps/rejected": -759.8905029296875, "loss": 0.6717, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.38940930366516113, "rewards/margins": 0.24831035733222961, "rewards/rejected": -0.6377196311950684, "step": 9330 }, { "epoch": 0.69, "learning_rate": 5.338130478467177e-07, "logits/chosen": -2.083513021469116, "logits/rejected": -1.7814117670059204, "logps/chosen": -607.920654296875, "logps/rejected": -713.4910888671875, "loss": 0.6847, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.44132524728775024, "rewards/margins": 0.11885972321033478, "rewards/rejected": -0.5601849555969238, "step": 9340 }, { "epoch": 0.69, "learning_rate": 5.315366482428447e-07, "logits/chosen": -1.977543830871582, "logits/rejected": -1.3159198760986328, "logps/chosen": -566.0277709960938, "logps/rejected": -766.1151733398438, "loss": 0.6758, "rewards/accuracies": 0.75, "rewards/chosen": -0.42397719621658325, "rewards/margins": 0.2222256362438202, "rewards/rejected": -0.6462028622627258, "step": 9350 }, { "epoch": 0.69, "learning_rate": 5.292633545161145e-07, "logits/chosen": -2.181307315826416, "logits/rejected": -1.7215207815170288, "logps/chosen": -628.178955078125, "logps/rejected": -766.2592163085938, "loss": 0.6779, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4475773870944977, "rewards/margins": 0.18180425465106964, "rewards/rejected": -0.6293816566467285, "step": 9360 }, { "epoch": 0.69, "learning_rate": 5.269931817382949e-07, "logits/chosen": -1.96614670753479, "logits/rejected": -1.5855939388275146, "logps/chosen": -640.4093627929688, "logps/rejected": -802.9412841796875, "loss": 0.6807, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5090634226799011, "rewards/margins": 0.17171618342399597, "rewards/rejected": -0.6807795763015747, "step": 9370 }, { "epoch": 0.69, "learning_rate": 5.247261449604619e-07, "logits/chosen": -1.807882308959961, "logits/rejected": -1.3497228622436523, "logps/chosen": -625.1055908203125, "logps/rejected": -783.5462646484375, "loss": 0.679, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4633842408657074, "rewards/margins": 0.18062087893486023, "rewards/rejected": -0.6440051794052124, "step": 9380 }, { "epoch": 0.69, "learning_rate": 5.224622592129011e-07, "logits/chosen": -2.065951108932495, "logits/rejected": -1.6637073755264282, "logps/chosen": -633.1612548828125, "logps/rejected": -785.4385375976562, "loss": 0.6764, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4799092411994934, "rewards/margins": 0.17553207278251648, "rewards/rejected": -0.6554413437843323, "step": 9390 }, { "epoch": 0.69, "learning_rate": 5.202015395050065e-07, "logits/chosen": -2.192063093185425, "logits/rejected": -1.6178420782089233, "logps/chosen": -593.8875732421875, "logps/rejected": -712.0078125, "loss": 0.682, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.40299543738365173, "rewards/margins": 0.18264421820640564, "rewards/rejected": -0.5856396555900574, "step": 9400 }, { "epoch": 0.69, "learning_rate": 5.179440008251808e-07, "logits/chosen": -1.9190728664398193, "logits/rejected": -1.5003230571746826, "logps/chosen": -550.6044921875, "logps/rejected": -711.3189697265625, "loss": 0.6808, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3831252455711365, "rewards/margins": 0.2054707258939743, "rewards/rejected": -0.588595986366272, "step": 9410 }, { "epoch": 0.69, "learning_rate": 5.156896581407378e-07, "logits/chosen": -2.2535195350646973, "logits/rejected": -1.9248249530792236, "logps/chosen": -450.8780212402344, "logps/rejected": -605.2540893554688, "loss": 0.6828, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.306772381067276, "rewards/margins": 0.16693904995918274, "rewards/rejected": -0.47371143102645874, "step": 9420 }, { "epoch": 0.7, "learning_rate": 5.13438526397802e-07, "logits/chosen": -2.18597412109375, "logits/rejected": -1.6641626358032227, "logps/chosen": -518.019287109375, "logps/rejected": -718.30224609375, "loss": 0.6777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3225598931312561, "rewards/margins": 0.19713439047336578, "rewards/rejected": -0.5196942687034607, "step": 9430 }, { "epoch": 0.7, "learning_rate": 5.111906205212088e-07, "logits/chosen": -2.3465638160705566, "logits/rejected": -1.4837565422058105, "logps/chosen": -556.496337890625, "logps/rejected": -661.9947509765625, "loss": 0.6771, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31445637345314026, "rewards/margins": 0.20153887569904327, "rewards/rejected": -0.5159952640533447, "step": 9440 }, { "epoch": 0.7, "learning_rate": 5.089459554144066e-07, "logits/chosen": -1.984344482421875, "logits/rejected": -1.475520133972168, "logps/chosen": -492.66632080078125, "logps/rejected": -650.9781494140625, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36213093996047974, "rewards/margins": 0.1809392273426056, "rewards/rejected": -0.5430701375007629, "step": 9450 }, { "epoch": 0.7, "learning_rate": 5.067045459593585e-07, "logits/chosen": -1.9771225452423096, "logits/rejected": -1.5036323070526123, "logps/chosen": -500.56329345703125, "logps/rejected": -744.9017944335938, "loss": 0.6754, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.34881311655044556, "rewards/margins": 0.25303196907043457, "rewards/rejected": -0.6018451452255249, "step": 9460 }, { "epoch": 0.7, "learning_rate": 5.044664070164421e-07, "logits/chosen": -2.098114490509033, "logits/rejected": -1.5661617517471313, "logps/chosen": -484.1845703125, "logps/rejected": -667.0091552734375, "loss": 0.6769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3309270739555359, "rewards/margins": 0.22507551312446594, "rewards/rejected": -0.5560026168823242, "step": 9470 }, { "epoch": 0.7, "learning_rate": 5.022315534243514e-07, "logits/chosen": -1.8905315399169922, "logits/rejected": -1.3472979068756104, "logps/chosen": -504.87811279296875, "logps/rejected": -684.2596435546875, "loss": 0.6738, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.352977454662323, "rewards/margins": 0.2375892847776413, "rewards/rejected": -0.5905667543411255, "step": 9480 }, { "epoch": 0.7, "learning_rate": 5.000000000000002e-07, "logits/chosen": -2.193150281906128, "logits/rejected": -1.873315453529358, "logps/chosen": -481.3467712402344, "logps/rejected": -595.0586547851562, "loss": 0.6777, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3159124553203583, "rewards/margins": 0.16407866775989532, "rewards/rejected": -0.47999104857444763, "step": 9490 }, { "epoch": 0.7, "learning_rate": 4.97771761538421e-07, "logits/chosen": -2.3275744915008545, "logits/rejected": -1.7983367443084717, "logps/chosen": -540.0834350585938, "logps/rejected": -697.2486572265625, "loss": 0.6752, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34584707021713257, "rewards/margins": 0.22663120925426483, "rewards/rejected": -0.5724782943725586, "step": 9500 }, { "epoch": 0.7, "learning_rate": 4.955468528126684e-07, "logits/chosen": -1.8869571685791016, "logits/rejected": -1.6714521646499634, "logps/chosen": -501.0758361816406, "logps/rejected": -597.2171630859375, "loss": 0.6813, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.340907484292984, "rewards/margins": 0.1292223185300827, "rewards/rejected": -0.4701297879219055, "step": 9510 }, { "epoch": 0.7, "learning_rate": 4.93325288573723e-07, "logits/chosen": -1.9418214559555054, "logits/rejected": -1.4538252353668213, "logps/chosen": -558.3392333984375, "logps/rejected": -721.8717041015625, "loss": 0.6773, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3895326256752014, "rewards/margins": 0.2053803950548172, "rewards/rejected": -0.5949130058288574, "step": 9520 }, { "epoch": 0.7, "learning_rate": 4.911070835503894e-07, "logits/chosen": -2.1666717529296875, "logits/rejected": -1.1871980428695679, "logps/chosen": -601.115234375, "logps/rejected": -710.0198364257812, "loss": 0.6756, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4160861074924469, "rewards/margins": 0.19512124359607697, "rewards/rejected": -0.6112073659896851, "step": 9530 }, { "epoch": 0.7, "learning_rate": 4.88892252449202e-07, "logits/chosen": -2.3219592571258545, "logits/rejected": -1.835222601890564, "logps/chosen": -526.9072875976562, "logps/rejected": -666.6302490234375, "loss": 0.6823, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3459222912788391, "rewards/margins": 0.18886172771453857, "rewards/rejected": -0.5347839593887329, "step": 9540 }, { "epoch": 0.7, "learning_rate": 4.866808099543253e-07, "logits/chosen": -2.12263822555542, "logits/rejected": -1.502110242843628, "logps/chosen": -493.2935485839844, "logps/rejected": -708.0488891601562, "loss": 0.6756, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3464152216911316, "rewards/margins": 0.25983577966690063, "rewards/rejected": -0.6062510013580322, "step": 9550 }, { "epoch": 0.71, "learning_rate": 4.844727707274597e-07, "logits/chosen": -2.143028736114502, "logits/rejected": -1.5317144393920898, "logps/chosen": -464.8404235839844, "logps/rejected": -691.7532958984375, "loss": 0.6773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27119794487953186, "rewards/margins": 0.2564341723918915, "rewards/rejected": -0.5276320576667786, "step": 9560 }, { "epoch": 0.71, "learning_rate": 4.822681494077401e-07, "logits/chosen": -2.1818642616271973, "logits/rejected": -1.7664775848388672, "logps/chosen": -502.638671875, "logps/rejected": -655.87109375, "loss": 0.6839, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2997915744781494, "rewards/margins": 0.1945614069700241, "rewards/rejected": -0.49435296654701233, "step": 9570 }, { "epoch": 0.71, "learning_rate": 4.800669606116414e-07, "logits/chosen": -2.431400775909424, "logits/rejected": -1.569248080253601, "logps/chosen": -423.0960998535156, "logps/rejected": -612.9295654296875, "loss": 0.6732, "rewards/accuracies": 0.875, "rewards/chosen": -0.2559960186481476, "rewards/margins": 0.22658273577690125, "rewards/rejected": -0.4825788140296936, "step": 9580 }, { "epoch": 0.71, "learning_rate": 4.778692189328823e-07, "logits/chosen": -1.9021327495574951, "logits/rejected": -1.3510273694992065, "logps/chosen": -484.2850036621094, "logps/rejected": -714.0293579101562, "loss": 0.6763, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3032609224319458, "rewards/margins": 0.23368963599205017, "rewards/rejected": -0.5369505882263184, "step": 9590 }, { "epoch": 0.71, "learning_rate": 4.7567493894232557e-07, "logits/chosen": -2.252789258956909, "logits/rejected": -1.7787981033325195, "logps/chosen": -520.1695556640625, "logps/rejected": -639.205810546875, "loss": 0.6833, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.36973002552986145, "rewards/margins": 0.15882790088653564, "rewards/rejected": -0.5285578966140747, "step": 9600 }, { "epoch": 0.71, "learning_rate": 4.7348413518788477e-07, "logits/chosen": -2.011453151702881, "logits/rejected": -1.4394270181655884, "logps/chosen": -504.681396484375, "logps/rejected": -647.1278076171875, "loss": 0.6797, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33554479479789734, "rewards/margins": 0.1766767054796219, "rewards/rejected": -0.5122215151786804, "step": 9610 }, { "epoch": 0.71, "learning_rate": 4.712968221944258e-07, "logits/chosen": -1.9913861751556396, "logits/rejected": -1.532707929611206, "logps/chosen": -458.3871154785156, "logps/rejected": -566.8685302734375, "loss": 0.6802, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3244626820087433, "rewards/margins": 0.15273983776569366, "rewards/rejected": -0.47720250487327576, "step": 9620 }, { "epoch": 0.71, "learning_rate": 4.691130144636707e-07, "logits/chosen": -2.2073659896850586, "logits/rejected": -1.6289714574813843, "logps/chosen": -469.6513671875, "logps/rejected": -685.0128784179688, "loss": 0.6808, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2985898554325104, "rewards/margins": 0.23113255202770233, "rewards/rejected": -0.5297223925590515, "step": 9630 }, { "epoch": 0.71, "learning_rate": 4.6693272647410176e-07, "logits/chosen": -1.9315173625946045, "logits/rejected": -1.6658427715301514, "logps/chosen": -534.6717529296875, "logps/rejected": -676.7167358398438, "loss": 0.6804, "rewards/accuracies": 0.75, "rewards/chosen": -0.40315741300582886, "rewards/margins": 0.14599233865737915, "rewards/rejected": -0.5491498112678528, "step": 9640 }, { "epoch": 0.71, "learning_rate": 4.647559726808667e-07, "logits/chosen": -2.178328275680542, "logits/rejected": -1.6684545278549194, "logps/chosen": -467.2462463378906, "logps/rejected": -645.5849609375, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2852398157119751, "rewards/margins": 0.21434848010540009, "rewards/rejected": -0.4995882511138916, "step": 9650 }, { "epoch": 0.71, "learning_rate": 4.6258276751568095e-07, "logits/chosen": -2.032576560974121, "logits/rejected": -1.5622961521148682, "logps/chosen": -563.609130859375, "logps/rejected": -709.7816772460938, "loss": 0.6795, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3773963451385498, "rewards/margins": 0.18319571018218994, "rewards/rejected": -0.5605920553207397, "step": 9660 }, { "epoch": 0.71, "learning_rate": 4.604131253867326e-07, "logits/chosen": -1.968664526939392, "logits/rejected": -1.5848255157470703, "logps/chosen": -507.0882873535156, "logps/rejected": -655.5386962890625, "loss": 0.6788, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3765379786491394, "rewards/margins": 0.1665494441986084, "rewards/rejected": -0.543087363243103, "step": 9670 }, { "epoch": 0.71, "learning_rate": 4.582470606785872e-07, "logits/chosen": -1.9793317317962646, "logits/rejected": -1.4772590398788452, "logps/chosen": -490.95770263671875, "logps/rejected": -652.4213256835938, "loss": 0.6777, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36146271228790283, "rewards/margins": 0.161770761013031, "rewards/rejected": -0.5232334136962891, "step": 9680 }, { "epoch": 0.71, "learning_rate": 4.560845877520929e-07, "logits/chosen": -2.1720499992370605, "logits/rejected": -1.5776737928390503, "logps/chosen": -489.02899169921875, "logps/rejected": -624.7460327148438, "loss": 0.6775, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3324565291404724, "rewards/margins": 0.19724415242671967, "rewards/rejected": -0.5297006368637085, "step": 9690 }, { "epoch": 0.72, "learning_rate": 4.539257209442846e-07, "logits/chosen": -1.9431686401367188, "logits/rejected": -1.504027009010315, "logps/chosen": -583.3151245117188, "logps/rejected": -704.68408203125, "loss": 0.6723, "rewards/accuracies": 0.75, "rewards/chosen": -0.4097047448158264, "rewards/margins": 0.19589026272296906, "rewards/rejected": -0.6055949926376343, "step": 9700 }, { "epoch": 0.72, "learning_rate": 4.5177047456828764e-07, "logits/chosen": -2.1520960330963135, "logits/rejected": -1.5831284523010254, "logps/chosen": -548.4464721679688, "logps/rejected": -713.2514038085938, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4118117690086365, "rewards/margins": 0.1867634505033493, "rewards/rejected": -0.5985752940177917, "step": 9710 }, { "epoch": 0.72, "learning_rate": 4.4961886291322595e-07, "logits/chosen": -2.074911117553711, "logits/rejected": -1.637450933456421, "logps/chosen": -516.6661987304688, "logps/rejected": -669.8375244140625, "loss": 0.6785, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34829381108283997, "rewards/margins": 0.15751291811466217, "rewards/rejected": -0.5058066844940186, "step": 9720 }, { "epoch": 0.72, "learning_rate": 4.4747090024412395e-07, "logits/chosen": -2.1621227264404297, "logits/rejected": -1.7314279079437256, "logps/chosen": -497.71978759765625, "logps/rejected": -686.595703125, "loss": 0.6798, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32298746705055237, "rewards/margins": 0.21728789806365967, "rewards/rejected": -0.5402753353118896, "step": 9730 }, { "epoch": 0.72, "learning_rate": 4.4532660080181407e-07, "logits/chosen": -2.146744728088379, "logits/rejected": -1.7227427959442139, "logps/chosen": -572.8685302734375, "logps/rejected": -735.389892578125, "loss": 0.6823, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3745848536491394, "rewards/margins": 0.19395750761032104, "rewards/rejected": -0.5685423612594604, "step": 9740 }, { "epoch": 0.72, "learning_rate": 4.431859788028426e-07, "logits/chosen": -1.9117978811264038, "logits/rejected": -1.4891576766967773, "logps/chosen": -537.763916015625, "logps/rejected": -649.5765991210938, "loss": 0.6833, "rewards/accuracies": 0.625, "rewards/chosen": -0.3738517165184021, "rewards/margins": 0.14836902916431427, "rewards/rejected": -0.5222207307815552, "step": 9750 }, { "epoch": 0.72, "learning_rate": 4.410490484393736e-07, "logits/chosen": -2.144265651702881, "logits/rejected": -1.3817533254623413, "logps/chosen": -527.6256103515625, "logps/rejected": -729.5147094726562, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.353760689496994, "rewards/margins": 0.2446446716785431, "rewards/rejected": -0.5984053015708923, "step": 9760 }, { "epoch": 0.72, "learning_rate": 4.389158238790961e-07, "logits/chosen": -2.2255442142486572, "logits/rejected": -1.5639233589172363, "logps/chosen": -471.4994201660156, "logps/rejected": -617.951904296875, "loss": 0.6758, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.283263236284256, "rewards/margins": 0.22138628363609314, "rewards/rejected": -0.5046495199203491, "step": 9770 }, { "epoch": 0.72, "learning_rate": 4.3678631926513023e-07, "logits/chosen": -1.9843499660491943, "logits/rejected": -1.6338847875595093, "logps/chosen": -534.9288330078125, "logps/rejected": -642.3228759765625, "loss": 0.675, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.325134813785553, "rewards/margins": 0.17326359450817108, "rewards/rejected": -0.49839845299720764, "step": 9780 }, { "epoch": 0.72, "learning_rate": 4.346605487159335e-07, "logits/chosen": -2.253140449523926, "logits/rejected": -1.969976782798767, "logps/chosen": -514.1753540039062, "logps/rejected": -674.3165283203125, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34713101387023926, "rewards/margins": 0.15280404686927795, "rewards/rejected": -0.49993500113487244, "step": 9790 }, { "epoch": 0.72, "learning_rate": 4.3253852632520605e-07, "logits/chosen": -1.9724416732788086, "logits/rejected": -1.3734538555145264, "logps/chosen": -527.4244384765625, "logps/rejected": -689.0968017578125, "loss": 0.6765, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37632444500923157, "rewards/margins": 0.19215068221092224, "rewards/rejected": -0.5684751272201538, "step": 9800 }, { "epoch": 0.72, "learning_rate": 4.3042026616179793e-07, "logits/chosen": -2.0220792293548584, "logits/rejected": -1.5964064598083496, "logps/chosen": -499.825439453125, "logps/rejected": -651.4201049804688, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34608715772628784, "rewards/margins": 0.17296965420246124, "rewards/rejected": -0.5190567970275879, "step": 9810 }, { "epoch": 0.72, "learning_rate": 4.28305782269617e-07, "logits/chosen": -2.1049816608428955, "logits/rejected": -1.4289195537567139, "logps/chosen": -476.4275817871094, "logps/rejected": -633.3851928710938, "loss": 0.6749, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2782338857650757, "rewards/margins": 0.22359418869018555, "rewards/rejected": -0.5018280744552612, "step": 9820 }, { "epoch": 0.73, "learning_rate": 4.261950886675337e-07, "logits/chosen": -2.011436939239502, "logits/rejected": -1.5123939514160156, "logps/chosen": -499.72723388671875, "logps/rejected": -695.7979736328125, "loss": 0.6777, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31048688292503357, "rewards/margins": 0.22834916412830353, "rewards/rejected": -0.5388360023498535, "step": 9830 }, { "epoch": 0.73, "learning_rate": 4.240881993492892e-07, "logits/chosen": -2.198657989501953, "logits/rejected": -1.5294244289398193, "logps/chosen": -502.138916015625, "logps/rejected": -664.9395751953125, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3501697778701782, "rewards/margins": 0.2185172736644745, "rewards/rejected": -0.5686870813369751, "step": 9840 }, { "epoch": 0.73, "learning_rate": 4.219851282834034e-07, "logits/chosen": -1.9225136041641235, "logits/rejected": -1.5311378240585327, "logps/chosen": -443.62969970703125, "logps/rejected": -575.9142456054688, "loss": 0.681, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2692713141441345, "rewards/margins": 0.17030654847621918, "rewards/rejected": -0.4395778179168701, "step": 9850 }, { "epoch": 0.73, "learning_rate": 4.1988588941308045e-07, "logits/chosen": -2.0425291061401367, "logits/rejected": -1.3951623439788818, "logps/chosen": -467.38226318359375, "logps/rejected": -648.6127319335938, "loss": 0.6743, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29221391677856445, "rewards/margins": 0.24495765566825867, "rewards/rejected": -0.5371716618537903, "step": 9860 }, { "epoch": 0.73, "learning_rate": 4.1779049665611753e-07, "logits/chosen": -2.1270036697387695, "logits/rejected": -1.5874102115631104, "logps/chosen": -504.91778564453125, "logps/rejected": -668.3750610351562, "loss": 0.6753, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2967272400856018, "rewards/margins": 0.2042432576417923, "rewards/rejected": -0.5009704828262329, "step": 9870 }, { "epoch": 0.73, "learning_rate": 4.15698963904813e-07, "logits/chosen": -2.0068886280059814, "logits/rejected": -1.4726722240447998, "logps/chosen": -529.3925170898438, "logps/rejected": -711.1356811523438, "loss": 0.6699, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3764432966709137, "rewards/margins": 0.22341588139533997, "rewards/rejected": -0.5998591184616089, "step": 9880 }, { "epoch": 0.73, "learning_rate": 4.136113050258735e-07, "logits/chosen": -1.936300277709961, "logits/rejected": -1.1553906202316284, "logps/chosen": -518.72900390625, "logps/rejected": -712.3388061523438, "loss": 0.6776, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3423531651496887, "rewards/margins": 0.26637616753578186, "rewards/rejected": -0.6087293028831482, "step": 9890 }, { "epoch": 0.73, "learning_rate": 4.115275338603217e-07, "logits/chosen": -2.1329102516174316, "logits/rejected": -1.4794225692749023, "logps/chosen": -481.8656311035156, "logps/rejected": -644.4656982421875, "loss": 0.6766, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3086708188056946, "rewards/margins": 0.21558892726898193, "rewards/rejected": -0.5242597460746765, "step": 9900 }, { "epoch": 0.73, "learning_rate": 4.0944766422340494e-07, "logits/chosen": -2.216930866241455, "logits/rejected": -1.869200348854065, "logps/chosen": -457.87493896484375, "logps/rejected": -604.038330078125, "loss": 0.6773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.29115691781044006, "rewards/margins": 0.1603890210390091, "rewards/rejected": -0.45154595375061035, "step": 9910 }, { "epoch": 0.73, "learning_rate": 4.0737170990450464e-07, "logits/chosen": -2.2720820903778076, "logits/rejected": -1.5482591390609741, "logps/chosen": -480.0818786621094, "logps/rejected": -631.7645874023438, "loss": 0.6798, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3087753653526306, "rewards/margins": 0.20217499136924744, "rewards/rejected": -0.5109502673149109, "step": 9920 }, { "epoch": 0.73, "learning_rate": 4.0529968466704313e-07, "logits/chosen": -1.9711172580718994, "logits/rejected": -1.3619768619537354, "logps/chosen": -520.8509521484375, "logps/rejected": -734.1829833984375, "loss": 0.6791, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36997565627098083, "rewards/margins": 0.2498759925365448, "rewards/rejected": -0.6198517084121704, "step": 9930 }, { "epoch": 0.73, "learning_rate": 4.0323160224839303e-07, "logits/chosen": -1.9044392108917236, "logits/rejected": -1.5409678220748901, "logps/chosen": -541.0486450195312, "logps/rejected": -738.4863891601562, "loss": 0.6735, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3731314539909363, "rewards/margins": 0.22962895035743713, "rewards/rejected": -0.6027604341506958, "step": 9940 }, { "epoch": 0.73, "learning_rate": 4.0116747635978756e-07, "logits/chosen": -2.2521965503692627, "logits/rejected": -1.7144057750701904, "logps/chosen": -516.4979248046875, "logps/rejected": -662.603759765625, "loss": 0.6777, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3305986225605011, "rewards/margins": 0.1995689868927002, "rewards/rejected": -0.5301675796508789, "step": 9950 }, { "epoch": 0.73, "learning_rate": 3.9910732068622686e-07, "logits/chosen": -2.124319553375244, "logits/rejected": -1.5332378149032593, "logps/chosen": -574.2708740234375, "logps/rejected": -740.5064697265625, "loss": 0.6758, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40282249450683594, "rewards/margins": 0.1872037649154663, "rewards/rejected": -0.5900262594223022, "step": 9960 }, { "epoch": 0.74, "learning_rate": 3.9705114888639055e-07, "logits/chosen": -1.8897336721420288, "logits/rejected": -1.5890296697616577, "logps/chosen": -549.8443603515625, "logps/rejected": -687.631591796875, "loss": 0.6808, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.409562885761261, "rewards/margins": 0.15095026791095734, "rewards/rejected": -0.5605131387710571, "step": 9970 }, { "epoch": 0.74, "learning_rate": 3.949989745925437e-07, "logits/chosen": -2.1396822929382324, "logits/rejected": -1.5380314588546753, "logps/chosen": -577.7100830078125, "logps/rejected": -767.6787719726562, "loss": 0.6761, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4053594172000885, "rewards/margins": 0.23427338898181915, "rewards/rejected": -0.6396327614784241, "step": 9980 }, { "epoch": 0.74, "learning_rate": 3.9295081141044994e-07, "logits/chosen": -2.0814425945281982, "logits/rejected": -1.5792334079742432, "logps/chosen": -520.4976806640625, "logps/rejected": -701.6133422851562, "loss": 0.673, "rewards/accuracies": 0.75, "rewards/chosen": -0.3870624601840973, "rewards/margins": 0.2134360373020172, "rewards/rejected": -0.6004984974861145, "step": 9990 }, { "epoch": 0.74, "learning_rate": 3.9090667291927813e-07, "logits/chosen": -2.0123207569122314, "logits/rejected": -1.6771981716156006, "logps/chosen": -562.020263671875, "logps/rejected": -738.2708129882812, "loss": 0.6744, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.40578967332839966, "rewards/margins": 0.17790724337100983, "rewards/rejected": -0.5836969017982483, "step": 10000 }, { "epoch": 0.74, "learning_rate": 3.8886657267151413e-07, "logits/chosen": -2.0726065635681152, "logits/rejected": -1.4090607166290283, "logps/chosen": -513.8897705078125, "logps/rejected": -716.2015380859375, "loss": 0.6775, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34703773260116577, "rewards/margins": 0.2590465843677521, "rewards/rejected": -0.6060842275619507, "step": 10010 }, { "epoch": 0.74, "learning_rate": 3.868305241928711e-07, "logits/chosen": -1.8365532159805298, "logits/rejected": -1.4728257656097412, "logps/chosen": -544.7128295898438, "logps/rejected": -694.3578491210938, "loss": 0.6773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33691835403442383, "rewards/margins": 0.18811661005020142, "rewards/rejected": -0.5250349640846252, "step": 10020 }, { "epoch": 0.74, "learning_rate": 3.8479854098219876e-07, "logits/chosen": -2.0951552391052246, "logits/rejected": -1.4392368793487549, "logps/chosen": -642.8432006835938, "logps/rejected": -822.8040161132812, "loss": 0.6774, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4517860412597656, "rewards/margins": 0.2443142682313919, "rewards/rejected": -0.6961004137992859, "step": 10030 }, { "epoch": 0.74, "learning_rate": 3.827706365113942e-07, "logits/chosen": -1.9967619180679321, "logits/rejected": -1.4442343711853027, "logps/chosen": -499.4161071777344, "logps/rejected": -676.8135986328125, "loss": 0.6792, "rewards/accuracies": 0.75, "rewards/chosen": -0.32226401567459106, "rewards/margins": 0.23093490302562714, "rewards/rejected": -0.5531989336013794, "step": 10040 }, { "epoch": 0.74, "learning_rate": 3.8074682422531313e-07, "logits/chosen": -1.8466475009918213, "logits/rejected": -1.3138294219970703, "logps/chosen": -537.3638305664062, "logps/rejected": -704.8922729492188, "loss": 0.6795, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3983299136161804, "rewards/margins": 0.2061508595943451, "rewards/rejected": -0.6044807434082031, "step": 10050 }, { "epoch": 0.74, "learning_rate": 3.7872711754168083e-07, "logits/chosen": -2.2161247730255127, "logits/rejected": -1.5660630464553833, "logps/chosen": -631.221435546875, "logps/rejected": -825.1295166015625, "loss": 0.6751, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4280322194099426, "rewards/margins": 0.23600144684314728, "rewards/rejected": -0.6640336513519287, "step": 10060 }, { "epoch": 0.74, "learning_rate": 3.7671152985100196e-07, "logits/chosen": -1.9518520832061768, "logits/rejected": -1.5343437194824219, "logps/chosen": -618.0982055664062, "logps/rejected": -783.3131713867188, "loss": 0.6749, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4240972101688385, "rewards/margins": 0.1998126208782196, "rewards/rejected": -0.6239098310470581, "step": 10070 }, { "epoch": 0.74, "learning_rate": 3.7470007451647245e-07, "logits/chosen": -2.0119428634643555, "logits/rejected": -1.6578304767608643, "logps/chosen": -593.2794799804688, "logps/rejected": -676.4530029296875, "loss": 0.6854, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3971095681190491, "rewards/margins": 0.1416260153055191, "rewards/rejected": -0.5387355089187622, "step": 10080 }, { "epoch": 0.74, "learning_rate": 3.72692764873892e-07, "logits/chosen": -2.2142138481140137, "logits/rejected": -1.7009025812149048, "logps/chosen": -591.6778564453125, "logps/rejected": -772.4219360351562, "loss": 0.6791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4272890090942383, "rewards/margins": 0.21378150582313538, "rewards/rejected": -0.641070544719696, "step": 10090 }, { "epoch": 0.75, "learning_rate": 3.7068961423157385e-07, "logits/chosen": -1.9172048568725586, "logits/rejected": -1.5587607622146606, "logps/chosen": -592.3276977539062, "logps/rejected": -780.3473510742188, "loss": 0.6791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4274671673774719, "rewards/margins": 0.1940421760082245, "rewards/rejected": -0.621509313583374, "step": 10100 }, { "epoch": 0.75, "learning_rate": 3.686906358702572e-07, "logits/chosen": -2.280921459197998, "logits/rejected": -1.5009701251983643, "logps/chosen": -529.0114135742188, "logps/rejected": -738.3413696289062, "loss": 0.6759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.36350077390670776, "rewards/margins": 0.2589018940925598, "rewards/rejected": -0.6224027276039124, "step": 10110 }, { "epoch": 0.75, "learning_rate": 3.6669584304302036e-07, "logits/chosen": -2.0633370876312256, "logits/rejected": -1.5686485767364502, "logps/chosen": -473.98858642578125, "logps/rejected": -675.5977783203125, "loss": 0.6723, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33583658933639526, "rewards/margins": 0.20583102107048035, "rewards/rejected": -0.5416675806045532, "step": 10120 }, { "epoch": 0.75, "learning_rate": 3.6470524897519084e-07, "logits/chosen": -2.153020143508911, "logits/rejected": -1.4381616115570068, "logps/chosen": -510.75311279296875, "logps/rejected": -642.9497680664062, "loss": 0.682, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3218676745891571, "rewards/margins": 0.17238202691078186, "rewards/rejected": -0.49424976110458374, "step": 10130 }, { "epoch": 0.75, "learning_rate": 3.6271886686425945e-07, "logits/chosen": -2.0793919563293457, "logits/rejected": -1.6573820114135742, "logps/chosen": -564.9239501953125, "logps/rejected": -709.6806640625, "loss": 0.6805, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.39696937799453735, "rewards/margins": 0.14774182438850403, "rewards/rejected": -0.5447112321853638, "step": 10140 }, { "epoch": 0.75, "learning_rate": 3.607367098797913e-07, "logits/chosen": -2.072359085083008, "logits/rejected": -1.508797287940979, "logps/chosen": -489.96087646484375, "logps/rejected": -672.0217895507812, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.3402118682861328, "rewards/margins": 0.24030666053295135, "rewards/rejected": -0.5805186033248901, "step": 10150 }, { "epoch": 0.75, "learning_rate": 3.5875879116334006e-07, "logits/chosen": -2.0286965370178223, "logits/rejected": -1.6657094955444336, "logps/chosen": -499.6023864746094, "logps/rejected": -668.9901123046875, "loss": 0.676, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.30784904956817627, "rewards/margins": 0.21051296591758728, "rewards/rejected": -0.5183620452880859, "step": 10160 }, { "epoch": 0.75, "learning_rate": 3.5678512382835936e-07, "logits/chosen": -2.265909194946289, "logits/rejected": -1.513696312904358, "logps/chosen": -423.525390625, "logps/rejected": -621.083740234375, "loss": 0.6759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.25671789050102234, "rewards/margins": 0.22159385681152344, "rewards/rejected": -0.4783117175102234, "step": 10170 }, { "epoch": 0.75, "learning_rate": 3.548157209601164e-07, "logits/chosen": -1.9818347692489624, "logits/rejected": -1.506451964378357, "logps/chosen": -610.1216430664062, "logps/rejected": -772.5660400390625, "loss": 0.6769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4762237071990967, "rewards/margins": 0.1986144781112671, "rewards/rejected": -0.6748381853103638, "step": 10180 }, { "epoch": 0.75, "learning_rate": 3.5285059561560594e-07, "logits/chosen": -2.1727778911590576, "logits/rejected": -1.5819756984710693, "logps/chosen": -502.29486083984375, "logps/rejected": -693.85205078125, "loss": 0.6761, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.37279435992240906, "rewards/margins": 0.22346679866313934, "rewards/rejected": -0.596261203289032, "step": 10190 }, { "epoch": 0.75, "learning_rate": 3.508897608234627e-07, "logits/chosen": -1.8888990879058838, "logits/rejected": -1.643821120262146, "logps/chosen": -567.8800048828125, "logps/rejected": -675.1685180664062, "loss": 0.687, "rewards/accuracies": 0.75, "rewards/chosen": -0.4185306429862976, "rewards/margins": 0.11955422163009644, "rewards/rejected": -0.538084864616394, "step": 10200 }, { "epoch": 0.75, "learning_rate": 3.4893322958387474e-07, "logits/chosen": -2.1356120109558105, "logits/rejected": -1.5174757242202759, "logps/chosen": -548.5902099609375, "logps/rejected": -682.8270263671875, "loss": 0.6771, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36398130655288696, "rewards/margins": 0.2043704092502594, "rewards/rejected": -0.5683517456054688, "step": 10210 }, { "epoch": 0.75, "learning_rate": 3.4698101486849906e-07, "logits/chosen": -2.2638707160949707, "logits/rejected": -1.8391008377075195, "logps/chosen": -581.7598266601562, "logps/rejected": -705.9014892578125, "loss": 0.6833, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.38574424386024475, "rewards/margins": 0.1485583782196045, "rewards/rejected": -0.5343025922775269, "step": 10220 }, { "epoch": 0.75, "learning_rate": 3.45033129620374e-07, "logits/chosen": -2.281609058380127, "logits/rejected": -1.5965584516525269, "logps/chosen": -530.3639526367188, "logps/rejected": -681.22216796875, "loss": 0.6732, "rewards/accuracies": 0.875, "rewards/chosen": -0.33471783995628357, "rewards/margins": 0.19648605585098267, "rewards/rejected": -0.5312038660049438, "step": 10230 }, { "epoch": 0.76, "learning_rate": 3.430895867538337e-07, "logits/chosen": -2.2170422077178955, "logits/rejected": -1.7179912328720093, "logps/chosen": -508.55938720703125, "logps/rejected": -652.34130859375, "loss": 0.6731, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34720906615257263, "rewards/margins": 0.20262762904167175, "rewards/rejected": -0.5498366951942444, "step": 10240 }, { "epoch": 0.76, "learning_rate": 3.41150399154422e-07, "logits/chosen": -2.18412446975708, "logits/rejected": -1.61135733127594, "logps/chosen": -559.9265747070312, "logps/rejected": -679.9569091796875, "loss": 0.6828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35869941115379333, "rewards/margins": 0.18918801844120026, "rewards/rejected": -0.5478874444961548, "step": 10250 }, { "epoch": 0.76, "learning_rate": 3.392155796788091e-07, "logits/chosen": -2.3216662406921387, "logits/rejected": -1.5976135730743408, "logps/chosen": -589.1294555664062, "logps/rejected": -824.6995849609375, "loss": 0.6744, "rewards/accuracies": 0.875, "rewards/chosen": -0.39965206384658813, "rewards/margins": 0.2553795576095581, "rewards/rejected": -0.6550316214561462, "step": 10260 }, { "epoch": 0.76, "learning_rate": 3.3728514115470377e-07, "logits/chosen": -2.0783989429473877, "logits/rejected": -1.641135811805725, "logps/chosen": -570.7056884765625, "logps/rejected": -773.15869140625, "loss": 0.6793, "rewards/accuracies": 0.75, "rewards/chosen": -0.4221855700016022, "rewards/margins": 0.18113453686237335, "rewards/rejected": -0.6033200621604919, "step": 10270 }, { "epoch": 0.76, "learning_rate": 3.353590963807693e-07, "logits/chosen": -1.9756866693496704, "logits/rejected": -1.3407076597213745, "logps/chosen": -576.3007202148438, "logps/rejected": -741.7489624023438, "loss": 0.6766, "rewards/accuracies": 0.75, "rewards/chosen": -0.4183483123779297, "rewards/margins": 0.2052718847990036, "rewards/rejected": -0.6236202120780945, "step": 10280 }, { "epoch": 0.76, "learning_rate": 3.334374581265399e-07, "logits/chosen": -2.0405640602111816, "logits/rejected": -1.5418832302093506, "logps/chosen": -579.9664306640625, "logps/rejected": -681.4684448242188, "loss": 0.6799, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41939201951026917, "rewards/margins": 0.1532829850912094, "rewards/rejected": -0.5726749300956726, "step": 10290 }, { "epoch": 0.76, "learning_rate": 3.315202391323336e-07, "logits/chosen": -2.0898852348327637, "logits/rejected": -1.572394609451294, "logps/chosen": -575.5235595703125, "logps/rejected": -784.1859130859375, "loss": 0.6779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40783554315567017, "rewards/margins": 0.2087715119123459, "rewards/rejected": -0.6166070699691772, "step": 10300 }, { "epoch": 0.76, "learning_rate": 3.2960745210916907e-07, "logits/chosen": -1.9215528964996338, "logits/rejected": -1.4039552211761475, "logps/chosen": -588.45849609375, "logps/rejected": -733.9788818359375, "loss": 0.6815, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4130760729312897, "rewards/margins": 0.19491925835609436, "rewards/rejected": -0.607995331287384, "step": 10310 }, { "epoch": 0.76, "learning_rate": 3.276991097386831e-07, "logits/chosen": -1.9391483068466187, "logits/rejected": -1.3062031269073486, "logps/chosen": -619.6441650390625, "logps/rejected": -845.3654174804688, "loss": 0.6689, "rewards/accuracies": 0.875, "rewards/chosen": -0.4251784384250641, "rewards/margins": 0.2749096155166626, "rewards/rejected": -0.7000880837440491, "step": 10320 }, { "epoch": 0.76, "learning_rate": 3.25795224673043e-07, "logits/chosen": -2.136043071746826, "logits/rejected": -1.6826469898223877, "logps/chosen": -656.7913818359375, "logps/rejected": -801.34912109375, "loss": 0.6815, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4846767783164978, "rewards/margins": 0.1762482225894928, "rewards/rejected": -0.6609249711036682, "step": 10330 }, { "epoch": 0.76, "learning_rate": 3.2389580953486475e-07, "logits/chosen": -1.7782888412475586, "logits/rejected": -1.4503881931304932, "logps/chosen": -574.9546508789062, "logps/rejected": -734.10546875, "loss": 0.6811, "rewards/accuracies": 0.625, "rewards/chosen": -0.44209399819374084, "rewards/margins": 0.14138133823871613, "rewards/rejected": -0.5834753513336182, "step": 10340 }, { "epoch": 0.76, "learning_rate": 3.2200087691712916e-07, "logits/chosen": -2.0401883125305176, "logits/rejected": -1.6733863353729248, "logps/chosen": -554.4951782226562, "logps/rejected": -725.464599609375, "loss": 0.6773, "rewards/accuracies": 0.75, "rewards/chosen": -0.4123150706291199, "rewards/margins": 0.17339856922626495, "rewards/rejected": -0.5857136845588684, "step": 10350 }, { "epoch": 0.76, "learning_rate": 3.201104393830991e-07, "logits/chosen": -2.0009820461273193, "logits/rejected": -1.601406455039978, "logps/chosen": -643.0230712890625, "logps/rejected": -778.6748046875, "loss": 0.6814, "rewards/accuracies": 0.75, "rewards/chosen": -0.41987401247024536, "rewards/margins": 0.17443355917930603, "rewards/rejected": -0.5943076014518738, "step": 10360 }, { "epoch": 0.76, "learning_rate": 3.1822450946623414e-07, "logits/chosen": -2.093895435333252, "logits/rejected": -1.787461519241333, "logps/chosen": -511.2699279785156, "logps/rejected": -650.6312255859375, "loss": 0.6826, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3609655499458313, "rewards/margins": 0.15878081321716309, "rewards/rejected": -0.5197464227676392, "step": 10370 }, { "epoch": 0.77, "learning_rate": 3.1634309967010894e-07, "logits/chosen": -2.184480905532837, "logits/rejected": -1.7474029064178467, "logps/chosen": -518.6658325195312, "logps/rejected": -638.1878662109375, "loss": 0.6853, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3620983958244324, "rewards/margins": 0.13626161217689514, "rewards/rejected": -0.4983599781990051, "step": 10380 }, { "epoch": 0.77, "learning_rate": 3.1446622246833123e-07, "logits/chosen": -2.0745065212249756, "logits/rejected": -1.5417364835739136, "logps/chosen": -585.2289428710938, "logps/rejected": -759.5642700195312, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -0.42149320244789124, "rewards/margins": 0.17287272214889526, "rewards/rejected": -0.5943659543991089, "step": 10390 }, { "epoch": 0.77, "learning_rate": 3.1259389030445614e-07, "logits/chosen": -2.2500557899475098, "logits/rejected": -1.680467963218689, "logps/chosen": -599.4012451171875, "logps/rejected": -727.5234985351562, "loss": 0.6797, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3957217335700989, "rewards/margins": 0.180878683924675, "rewards/rejected": -0.5766004323959351, "step": 10400 }, { "epoch": 0.77, "learning_rate": 3.1072611559190707e-07, "logits/chosen": -2.1246416568756104, "logits/rejected": -1.5858956575393677, "logps/chosen": -517.2442626953125, "logps/rejected": -684.1715087890625, "loss": 0.6788, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39085203409194946, "rewards/margins": 0.1931288093328476, "rewards/rejected": -0.5839808583259583, "step": 10410 }, { "epoch": 0.77, "learning_rate": 3.088629107138916e-07, "logits/chosen": -2.064831018447876, "logits/rejected": -1.6966928243637085, "logps/chosen": -523.6608276367188, "logps/rejected": -693.6393432617188, "loss": 0.6771, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.33710962533950806, "rewards/margins": 0.2137034684419632, "rewards/rejected": -0.5508130788803101, "step": 10420 }, { "epoch": 0.77, "learning_rate": 3.070042880233189e-07, "logits/chosen": -2.171900749206543, "logits/rejected": -1.8328006267547607, "logps/chosen": -501.54522705078125, "logps/rejected": -644.6825561523438, "loss": 0.6809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3496556878089905, "rewards/margins": 0.15000221133232117, "rewards/rejected": -0.49965786933898926, "step": 10430 }, { "epoch": 0.77, "learning_rate": 3.051502598427187e-07, "logits/chosen": -1.9382143020629883, "logits/rejected": -1.5396708250045776, "logps/chosen": -499.06103515625, "logps/rejected": -657.9193115234375, "loss": 0.6736, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3396189212799072, "rewards/margins": 0.20015518367290497, "rewards/rejected": -0.5397740602493286, "step": 10440 }, { "epoch": 0.77, "learning_rate": 3.0330083846416055e-07, "logits/chosen": -2.1952919960021973, "logits/rejected": -1.5463773012161255, "logps/chosen": -537.152099609375, "logps/rejected": -696.9277954101562, "loss": 0.6808, "rewards/accuracies": 0.75, "rewards/chosen": -0.3444993495941162, "rewards/margins": 0.22508642077445984, "rewards/rejected": -0.5695858001708984, "step": 10450 }, { "epoch": 0.77, "learning_rate": 3.0145603614917015e-07, "logits/chosen": -1.9473129510879517, "logits/rejected": -1.587754726409912, "logps/chosen": -455.41748046875, "logps/rejected": -677.6329956054688, "loss": 0.6727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.33350884914398193, "rewards/margins": 0.22765998542308807, "rewards/rejected": -0.5611687898635864, "step": 10460 }, { "epoch": 0.77, "learning_rate": 2.9961586512864944e-07, "logits/chosen": -2.108482837677002, "logits/rejected": -1.6033843755722046, "logps/chosen": -635.1039428710938, "logps/rejected": -771.2107543945312, "loss": 0.6758, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.44512230157852173, "rewards/margins": 0.18214602768421173, "rewards/rejected": -0.627268373966217, "step": 10470 }, { "epoch": 0.77, "learning_rate": 2.977803376027951e-07, "logits/chosen": -2.117605447769165, "logits/rejected": -1.7619565725326538, "logps/chosen": -660.804931640625, "logps/rejected": -786.6671142578125, "loss": 0.6805, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4646502137184143, "rewards/margins": 0.14719609916210175, "rewards/rejected": -0.6118463277816772, "step": 10480 }, { "epoch": 0.77, "learning_rate": 2.95949465741018e-07, "logits/chosen": -2.022155523300171, "logits/rejected": -1.5110101699829102, "logps/chosen": -506.2291564941406, "logps/rejected": -703.7017211914062, "loss": 0.68, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3606299161911011, "rewards/margins": 0.22152552008628845, "rewards/rejected": -0.5821554064750671, "step": 10490 }, { "epoch": 0.77, "learning_rate": 2.9412326168186286e-07, "logits/chosen": -2.186763286590576, "logits/rejected": -1.5946972370147705, "logps/chosen": -511.380615234375, "logps/rejected": -657.2965087890625, "loss": 0.6811, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3725585341453552, "rewards/margins": 0.19504386186599731, "rewards/rejected": -0.5676023960113525, "step": 10500 }, { "epoch": 0.78, "learning_rate": 2.9230173753292575e-07, "logits/chosen": -2.050874948501587, "logits/rejected": -1.6147810220718384, "logps/chosen": -515.0924682617188, "logps/rejected": -685.6243896484375, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3640030026435852, "rewards/margins": 0.20895524322986603, "rewards/rejected": -0.5729581713676453, "step": 10510 }, { "epoch": 0.78, "learning_rate": 2.9048490537077697e-07, "logits/chosen": -2.3163323402404785, "logits/rejected": -2.0036444664001465, "logps/chosen": -523.0878295898438, "logps/rejected": -666.3346557617188, "loss": 0.6813, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35183772444725037, "rewards/margins": 0.18088234961032867, "rewards/rejected": -0.5327199697494507, "step": 10520 }, { "epoch": 0.78, "learning_rate": 2.8867277724087814e-07, "logits/chosen": -2.1048450469970703, "logits/rejected": -1.5720534324645996, "logps/chosen": -570.2147216796875, "logps/rejected": -735.7281494140625, "loss": 0.6725, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4201534390449524, "rewards/margins": 0.2204832136631012, "rewards/rejected": -0.640636682510376, "step": 10530 }, { "epoch": 0.78, "learning_rate": 2.868653651575035e-07, "logits/chosen": -2.1716561317443848, "logits/rejected": -1.4495906829833984, "logps/chosen": -629.9263916015625, "logps/rejected": -758.86181640625, "loss": 0.6791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41328898072242737, "rewards/margins": 0.21519549190998077, "rewards/rejected": -0.6284844875335693, "step": 10540 }, { "epoch": 0.78, "learning_rate": 2.850626811036612e-07, "logits/chosen": -2.2908730506896973, "logits/rejected": -1.9335434436798096, "logps/chosen": -483.810302734375, "logps/rejected": -613.1980590820312, "loss": 0.6818, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3245709240436554, "rewards/margins": 0.1458684206008911, "rewards/rejected": -0.47043928503990173, "step": 10550 }, { "epoch": 0.78, "learning_rate": 2.832647370310118e-07, "logits/chosen": -1.9891103506088257, "logits/rejected": -1.6775586605072021, "logps/chosen": -546.0992431640625, "logps/rejected": -706.2098388671875, "loss": 0.6776, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38032522797584534, "rewards/margins": 0.17626838386058807, "rewards/rejected": -0.5565935969352722, "step": 10560 }, { "epoch": 0.78, "learning_rate": 2.8147154485979073e-07, "logits/chosen": -2.144073963165283, "logits/rejected": -1.7529805898666382, "logps/chosen": -558.131591796875, "logps/rejected": -685.4902954101562, "loss": 0.6759, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3822546899318695, "rewards/margins": 0.1836271584033966, "rewards/rejected": -0.5658819079399109, "step": 10570 }, { "epoch": 0.78, "learning_rate": 2.7968311647872857e-07, "logits/chosen": -2.000150203704834, "logits/rejected": -1.5107579231262207, "logps/chosen": -529.5408325195312, "logps/rejected": -747.8154296875, "loss": 0.669, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.37845689058303833, "rewards/margins": 0.2340497076511383, "rewards/rejected": -0.6125065684318542, "step": 10580 }, { "epoch": 0.78, "learning_rate": 2.7789946374497276e-07, "logits/chosen": -1.9456812143325806, "logits/rejected": -1.5503953695297241, "logps/chosen": -479.95166015625, "logps/rejected": -662.1163330078125, "loss": 0.6762, "rewards/accuracies": 0.75, "rewards/chosen": -0.3551887571811676, "rewards/margins": 0.21338209509849548, "rewards/rejected": -0.5685708522796631, "step": 10590 }, { "epoch": 0.78, "learning_rate": 2.761205984840078e-07, "logits/chosen": -2.1441287994384766, "logits/rejected": -1.4046849012374878, "logps/chosen": -521.8428955078125, "logps/rejected": -688.54150390625, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3625255525112152, "rewards/margins": 0.21373891830444336, "rewards/rejected": -0.576264500617981, "step": 10600 }, { "epoch": 0.78, "learning_rate": 2.743465324895775e-07, "logits/chosen": -2.193976879119873, "logits/rejected": -1.7303205728530884, "logps/chosen": -539.5777587890625, "logps/rejected": -703.30029296875, "loss": 0.674, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36469537019729614, "rewards/margins": 0.22154657542705536, "rewards/rejected": -0.5862419605255127, "step": 10610 }, { "epoch": 0.78, "learning_rate": 2.725772775236081e-07, "logits/chosen": -2.0336716175079346, "logits/rejected": -1.7612769603729248, "logps/chosen": -466.799072265625, "logps/rejected": -623.3970947265625, "loss": 0.6862, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3272768557071686, "rewards/margins": 0.1337151825428009, "rewards/rejected": -0.4609920382499695, "step": 10620 }, { "epoch": 0.78, "learning_rate": 2.708128453161279e-07, "logits/chosen": -1.892351746559143, "logits/rejected": -1.5013676881790161, "logps/chosen": -493.01513671875, "logps/rejected": -677.9485473632812, "loss": 0.6785, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35529211163520813, "rewards/margins": 0.18309606611728668, "rewards/rejected": -0.5383881330490112, "step": 10630 }, { "epoch": 0.78, "learning_rate": 2.690532475651909e-07, "logits/chosen": -2.1886696815490723, "logits/rejected": -1.6499555110931396, "logps/chosen": -509.1919860839844, "logps/rejected": -679.2091064453125, "loss": 0.6774, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2929200828075409, "rewards/margins": 0.23404213786125183, "rewards/rejected": -0.5269622802734375, "step": 10640 }, { "epoch": 0.79, "learning_rate": 2.6729849593679945e-07, "logits/chosen": -2.360426664352417, "logits/rejected": -1.686313271522522, "logps/chosen": -549.1649780273438, "logps/rejected": -646.7726440429688, "loss": 0.6786, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3691824972629547, "rewards/margins": 0.17812438309192657, "rewards/rejected": -0.5473068952560425, "step": 10650 }, { "epoch": 0.79, "learning_rate": 2.655486020648262e-07, "logits/chosen": -2.056715488433838, "logits/rejected": -1.7340837717056274, "logps/chosen": -558.26611328125, "logps/rejected": -693.526123046875, "loss": 0.6793, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3931505084037781, "rewards/margins": 0.1679086685180664, "rewards/rejected": -0.5610591769218445, "step": 10660 }, { "epoch": 0.79, "learning_rate": 2.6380357755093683e-07, "logits/chosen": -2.0560123920440674, "logits/rejected": -1.6119024753570557, "logps/chosen": -639.6138916015625, "logps/rejected": -764.7164306640625, "loss": 0.6791, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.45962581038475037, "rewards/margins": 0.15371228754520416, "rewards/rejected": -0.6133381724357605, "step": 10670 }, { "epoch": 0.79, "learning_rate": 2.6206343396451426e-07, "logits/chosen": -1.9950430393218994, "logits/rejected": -1.3809174299240112, "logps/chosen": -461.9769592285156, "logps/rejected": -673.701416015625, "loss": 0.6713, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35400134325027466, "rewards/margins": 0.24448266625404358, "rewards/rejected": -0.5984839200973511, "step": 10680 }, { "epoch": 0.79, "learning_rate": 2.603281828425812e-07, "logits/chosen": -2.1688809394836426, "logits/rejected": -1.4829763174057007, "logps/chosen": -454.86376953125, "logps/rejected": -698.8355712890625, "loss": 0.6738, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.27301931381225586, "rewards/margins": 0.2838878929615021, "rewards/rejected": -0.5569071769714355, "step": 10690 }, { "epoch": 0.79, "learning_rate": 2.585978356897229e-07, "logits/chosen": -2.0652523040771484, "logits/rejected": -1.5851044654846191, "logps/chosen": -513.6170043945312, "logps/rejected": -659.014892578125, "loss": 0.6791, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32184848189353943, "rewards/margins": 0.2077127993106842, "rewards/rejected": -0.5295613408088684, "step": 10700 }, { "epoch": 0.79, "learning_rate": 2.568724039780122e-07, "logits/chosen": -2.0851826667785645, "logits/rejected": -1.5598808526992798, "logps/chosen": -486.662353515625, "logps/rejected": -689.19677734375, "loss": 0.6735, "rewards/accuracies": 0.75, "rewards/chosen": -0.32306092977523804, "rewards/margins": 0.22072815895080566, "rewards/rejected": -0.5437890291213989, "step": 10710 }, { "epoch": 0.79, "learning_rate": 2.5515189914693323e-07, "logits/chosen": -2.1234490871429443, "logits/rejected": -1.6652425527572632, "logps/chosen": -506.23565673828125, "logps/rejected": -656.379150390625, "loss": 0.6788, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.34732329845428467, "rewards/margins": 0.16897304356098175, "rewards/rejected": -0.5162962675094604, "step": 10720 }, { "epoch": 0.79, "learning_rate": 2.534363326033048e-07, "logits/chosen": -2.3087897300720215, "logits/rejected": -1.6239168643951416, "logps/chosen": -532.6551513671875, "logps/rejected": -705.4374389648438, "loss": 0.6779, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3329441249370575, "rewards/margins": 0.23473021388053894, "rewards/rejected": -0.5676743388175964, "step": 10730 }, { "epoch": 0.79, "learning_rate": 2.5172571572120484e-07, "logits/chosen": -1.893275499343872, "logits/rejected": -1.372240424156189, "logps/chosen": -542.9639892578125, "logps/rejected": -793.4429321289062, "loss": 0.6732, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3965885043144226, "rewards/margins": 0.2541755735874176, "rewards/rejected": -0.6507641077041626, "step": 10740 }, { "epoch": 0.79, "learning_rate": 2.5002005984189655e-07, "logits/chosen": -2.0689899921417236, "logits/rejected": -1.6377427577972412, "logps/chosen": -545.60986328125, "logps/rejected": -627.2432861328125, "loss": 0.6816, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35989484190940857, "rewards/margins": 0.15491078794002533, "rewards/rejected": -0.5148055553436279, "step": 10750 }, { "epoch": 0.79, "learning_rate": 2.48319376273751e-07, "logits/chosen": -2.2028651237487793, "logits/rejected": -1.4643477201461792, "logps/chosen": -577.0232543945312, "logps/rejected": -699.4783325195312, "loss": 0.6777, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3890058696269989, "rewards/margins": 0.19969801604747772, "rewards/rejected": -0.5887039303779602, "step": 10760 }, { "epoch": 0.79, "learning_rate": 2.466236762921742e-07, "logits/chosen": -2.2632381916046143, "logits/rejected": -1.83908212184906, "logps/chosen": -530.7381591796875, "logps/rejected": -624.7786865234375, "loss": 0.6803, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37680983543395996, "rewards/margins": 0.13502205908298492, "rewards/rejected": -0.5118318796157837, "step": 10770 }, { "epoch": 0.8, "learning_rate": 2.449329711395299e-07, "logits/chosen": -2.306734561920166, "logits/rejected": -1.6053659915924072, "logps/chosen": -513.4888916015625, "logps/rejected": -669.3111572265625, "loss": 0.678, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3716445565223694, "rewards/margins": 0.18246564269065857, "rewards/rejected": -0.5541102290153503, "step": 10780 }, { "epoch": 0.8, "learning_rate": 2.432472720250682e-07, "logits/chosen": -2.0206589698791504, "logits/rejected": -1.5303064584732056, "logps/chosen": -570.9443969726562, "logps/rejected": -695.1116333007812, "loss": 0.6772, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.376811683177948, "rewards/margins": 0.2026720941066742, "rewards/rejected": -0.5794838070869446, "step": 10790 }, { "epoch": 0.8, "learning_rate": 2.415665901248485e-07, "logits/chosen": -2.1157279014587402, "logits/rejected": -1.3440406322479248, "logps/chosen": -481.9535217285156, "logps/rejected": -675.973388671875, "loss": 0.6746, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3256687521934509, "rewards/margins": 0.2585730254650116, "rewards/rejected": -0.5842418074607849, "step": 10800 }, { "epoch": 0.8, "learning_rate": 2.398909365816662e-07, "logits/chosen": -2.0803186893463135, "logits/rejected": -1.752389907836914, "logps/chosen": -563.4825439453125, "logps/rejected": -751.8348388671875, "loss": 0.6761, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3895424008369446, "rewards/margins": 0.194087952375412, "rewards/rejected": -0.583630383014679, "step": 10810 }, { "epoch": 0.8, "learning_rate": 2.3822032250498003e-07, "logits/chosen": -1.8449316024780273, "logits/rejected": -1.4013381004333496, "logps/chosen": -506.70281982421875, "logps/rejected": -704.1387329101562, "loss": 0.6745, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.38750454783439636, "rewards/margins": 0.21737447381019592, "rewards/rejected": -0.6048790216445923, "step": 10820 }, { "epoch": 0.8, "learning_rate": 2.3655475897083688e-07, "logits/chosen": -2.1290407180786133, "logits/rejected": -1.7081708908081055, "logps/chosen": -521.71044921875, "logps/rejected": -651.2029418945312, "loss": 0.6773, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3622135519981384, "rewards/margins": 0.19280828535556793, "rewards/rejected": -0.5550217628479004, "step": 10830 }, { "epoch": 0.8, "learning_rate": 2.3489425702179865e-07, "logits/chosen": -1.9179744720458984, "logits/rejected": -1.5480151176452637, "logps/chosen": -512.5335083007812, "logps/rejected": -674.140380859375, "loss": 0.6767, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3705032765865326, "rewards/margins": 0.18422213196754456, "rewards/rejected": -0.5547254681587219, "step": 10840 }, { "epoch": 0.8, "learning_rate": 2.3323882766686975e-07, "logits/chosen": -2.1862170696258545, "logits/rejected": -1.8159459829330444, "logps/chosen": -570.6233520507812, "logps/rejected": -706.8641357421875, "loss": 0.6808, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4055134654045105, "rewards/margins": 0.15210683643817902, "rewards/rejected": -0.5576204061508179, "step": 10850 }, { "epoch": 0.8, "learning_rate": 2.3158848188142388e-07, "logits/chosen": -2.111546516418457, "logits/rejected": -1.7774909734725952, "logps/chosen": -486.26422119140625, "logps/rejected": -610.6021118164062, "loss": 0.6841, "rewards/accuracies": 0.625, "rewards/chosen": -0.3105432093143463, "rewards/margins": 0.14615751802921295, "rewards/rejected": -0.45670074224472046, "step": 10860 }, { "epoch": 0.8, "learning_rate": 2.299432306071306e-07, "logits/chosen": -1.9830353260040283, "logits/rejected": -1.4597135782241821, "logps/chosen": -492.3953552246094, "logps/rejected": -680.1231079101562, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38669583201408386, "rewards/margins": 0.20478415489196777, "rewards/rejected": -0.5914799571037292, "step": 10870 }, { "epoch": 0.8, "learning_rate": 2.2830308475188287e-07, "logits/chosen": -2.2889063358306885, "logits/rejected": -1.6898342370986938, "logps/chosen": -469.3197326660156, "logps/rejected": -644.8192138671875, "loss": 0.6773, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3269376754760742, "rewards/margins": 0.2115563154220581, "rewards/rejected": -0.5384939908981323, "step": 10880 }, { "epoch": 0.8, "learning_rate": 2.2666805518972632e-07, "logits/chosen": -2.0307421684265137, "logits/rejected": -1.5409694910049438, "logps/chosen": -591.9019775390625, "logps/rejected": -701.6461181640625, "loss": 0.6812, "rewards/accuracies": 0.625, "rewards/chosen": -0.36903706192970276, "rewards/margins": 0.18706443905830383, "rewards/rejected": -0.5561014413833618, "step": 10890 }, { "epoch": 0.8, "learning_rate": 2.2503815276078475e-07, "logits/chosen": -2.1525652408599854, "logits/rejected": -1.674991250038147, "logps/chosen": -525.7415161132812, "logps/rejected": -696.43359375, "loss": 0.6767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35493582487106323, "rewards/margins": 0.21227797865867615, "rewards/rejected": -0.567213773727417, "step": 10900 }, { "epoch": 0.8, "learning_rate": 2.2341338827118982e-07, "logits/chosen": -2.0912506580352783, "logits/rejected": -1.597157597541809, "logps/chosen": -531.5753784179688, "logps/rejected": -773.052734375, "loss": 0.6759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3262995183467865, "rewards/margins": 0.27241984009742737, "rewards/rejected": -0.5987193584442139, "step": 10910 }, { "epoch": 0.81, "learning_rate": 2.2179377249300957e-07, "logits/chosen": -2.0986053943634033, "logits/rejected": -1.5119253396987915, "logps/chosen": -521.8085327148438, "logps/rejected": -690.8663940429688, "loss": 0.6774, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3727436065673828, "rewards/margins": 0.21108171343803406, "rewards/rejected": -0.5838252902030945, "step": 10920 }, { "epoch": 0.81, "learning_rate": 2.201793161641754e-07, "logits/chosen": -2.085059642791748, "logits/rejected": -1.7490705251693726, "logps/chosen": -551.7567138671875, "logps/rejected": -723.2818603515625, "loss": 0.6777, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3586350083351135, "rewards/margins": 0.19101367890834808, "rewards/rejected": -0.549648642539978, "step": 10930 }, { "epoch": 0.81, "learning_rate": 2.1857002998841334e-07, "logits/chosen": -2.17722749710083, "logits/rejected": -1.7010624408721924, "logps/chosen": -486.0692443847656, "logps/rejected": -555.6473999023438, "loss": 0.6845, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29386553168296814, "rewards/margins": 0.12873950600624084, "rewards/rejected": -0.4226049482822418, "step": 10940 }, { "epoch": 0.81, "learning_rate": 2.1696592463517016e-07, "logits/chosen": -2.069153308868408, "logits/rejected": -1.543979525566101, "logps/chosen": -556.296142578125, "logps/rejected": -689.0145263671875, "loss": 0.6799, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4105522632598877, "rewards/margins": 0.18179269134998322, "rewards/rejected": -0.5923448801040649, "step": 10950 }, { "epoch": 0.81, "learning_rate": 2.1536701073954556e-07, "logits/chosen": -2.17238187789917, "logits/rejected": -1.5424482822418213, "logps/chosen": -590.9041748046875, "logps/rejected": -725.8909301757812, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4255344867706299, "rewards/margins": 0.15771031379699707, "rewards/rejected": -0.5832447409629822, "step": 10960 }, { "epoch": 0.81, "learning_rate": 2.137732989022193e-07, "logits/chosen": -2.245425224304199, "logits/rejected": -1.5637383460998535, "logps/chosen": -500.72296142578125, "logps/rejected": -684.111328125, "loss": 0.6759, "rewards/accuracies": 0.75, "rewards/chosen": -0.3180897831916809, "rewards/margins": 0.23746657371520996, "rewards/rejected": -0.5555562973022461, "step": 10970 }, { "epoch": 0.81, "learning_rate": 2.1218479968938185e-07, "logits/chosen": -2.176104784011841, "logits/rejected": -1.6241388320922852, "logps/chosen": -540.0192260742188, "logps/rejected": -761.0203857421875, "loss": 0.6759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35575196146965027, "rewards/margins": 0.24659201502799988, "rewards/rejected": -0.6023439168930054, "step": 10980 }, { "epoch": 0.81, "learning_rate": 2.106015236326655e-07, "logits/chosen": -2.039689540863037, "logits/rejected": -1.5349855422973633, "logps/chosen": -589.9346923828125, "logps/rejected": -738.235107421875, "loss": 0.6747, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.38452786207199097, "rewards/margins": 0.19346585869789124, "rewards/rejected": -0.5779936909675598, "step": 10990 }, { "epoch": 0.81, "learning_rate": 2.090234812290721e-07, "logits/chosen": -2.2033839225769043, "logits/rejected": -1.5826890468597412, "logps/chosen": -529.233642578125, "logps/rejected": -761.4420166015625, "loss": 0.6708, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35690170526504517, "rewards/margins": 0.25006797909736633, "rewards/rejected": -0.6069697141647339, "step": 11000 }, { "epoch": 0.81, "learning_rate": 2.0745068294090517e-07, "logits/chosen": -1.8588390350341797, "logits/rejected": -1.5646635293960571, "logps/chosen": -467.3592224121094, "logps/rejected": -627.1018676757812, "loss": 0.68, "rewards/accuracies": 0.75, "rewards/chosen": -0.3085804283618927, "rewards/margins": 0.18440012633800507, "rewards/rejected": -0.49298062920570374, "step": 11010 }, { "epoch": 0.81, "learning_rate": 2.0588313919570055e-07, "logits/chosen": -2.0810747146606445, "logits/rejected": -1.408111333847046, "logps/chosen": -466.6900329589844, "logps/rejected": -673.1016235351562, "loss": 0.6746, "rewards/accuracies": 0.75, "rewards/chosen": -0.32904472947120667, "rewards/margins": 0.23681005835533142, "rewards/rejected": -0.5658548474311829, "step": 11020 }, { "epoch": 0.81, "learning_rate": 2.043208603861567e-07, "logits/chosen": -1.9701420068740845, "logits/rejected": -1.5445826053619385, "logps/chosen": -477.5081481933594, "logps/rejected": -686.8162841796875, "loss": 0.682, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32325586676597595, "rewards/margins": 0.20145726203918457, "rewards/rejected": -0.5247131586074829, "step": 11030 }, { "epoch": 0.81, "learning_rate": 2.0276385687006558e-07, "logits/chosen": -2.1076884269714355, "logits/rejected": -1.6466995477676392, "logps/chosen": -517.8886108398438, "logps/rejected": -651.1657104492188, "loss": 0.6774, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35351067781448364, "rewards/margins": 0.18619707226753235, "rewards/rejected": -0.5397077202796936, "step": 11040 }, { "epoch": 0.82, "learning_rate": 2.0121213897024446e-07, "logits/chosen": -2.203386068344116, "logits/rejected": -1.7619549036026, "logps/chosen": -534.5306396484375, "logps/rejected": -721.2672729492188, "loss": 0.679, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35543617606163025, "rewards/margins": 0.18918003141880035, "rewards/rejected": -0.5446162223815918, "step": 11050 }, { "epoch": 0.82, "learning_rate": 1.9966571697446777e-07, "logits/chosen": -2.1693358421325684, "logits/rejected": -1.5129002332687378, "logps/chosen": -466.4637145996094, "logps/rejected": -627.1818237304688, "loss": 0.6771, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.32646816968917847, "rewards/margins": 0.2007295787334442, "rewards/rejected": -0.5271977186203003, "step": 11060 }, { "epoch": 0.82, "learning_rate": 1.9812460113539808e-07, "logits/chosen": -2.010284185409546, "logits/rejected": -1.5282354354858398, "logps/chosen": -552.5551147460938, "logps/rejected": -740.6582641601562, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.379190057516098, "rewards/margins": 0.242404505610466, "rewards/rejected": -0.6215946078300476, "step": 11070 }, { "epoch": 0.82, "learning_rate": 1.9658880167051828e-07, "logits/chosen": -2.117943286895752, "logits/rejected": -1.448421597480774, "logps/chosen": -486.53033447265625, "logps/rejected": -657.52978515625, "loss": 0.6767, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33636724948883057, "rewards/margins": 0.20785737037658691, "rewards/rejected": -0.5442246198654175, "step": 11080 }, { "epoch": 0.82, "learning_rate": 1.950583287620652e-07, "logits/chosen": -2.0691158771514893, "logits/rejected": -1.6485700607299805, "logps/chosen": -471.4639587402344, "logps/rejected": -635.6940307617188, "loss": 0.6789, "rewards/accuracies": 0.75, "rewards/chosen": -0.31615012884140015, "rewards/margins": 0.18050697445869446, "rewards/rejected": -0.496657133102417, "step": 11090 }, { "epoch": 0.82, "learning_rate": 1.9353319255695987e-07, "logits/chosen": -2.0407967567443848, "logits/rejected": -1.590834617614746, "logps/chosen": -500.94940185546875, "logps/rejected": -654.8148193359375, "loss": 0.681, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.31715118885040283, "rewards/margins": 0.17893825471401215, "rewards/rejected": -0.49608945846557617, "step": 11100 }, { "epoch": 0.82, "learning_rate": 1.9201340316674118e-07, "logits/chosen": -2.180812120437622, "logits/rejected": -1.7350797653198242, "logps/chosen": -472.43292236328125, "logps/rejected": -690.92724609375, "loss": 0.6749, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31833207607269287, "rewards/margins": 0.22675812244415283, "rewards/rejected": -0.5450900793075562, "step": 11110 }, { "epoch": 0.82, "learning_rate": 1.9049897066750086e-07, "logits/chosen": -1.9958397150039673, "logits/rejected": -1.3506852388381958, "logps/chosen": -458.17120361328125, "logps/rejected": -646.0404052734375, "loss": 0.6757, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31187617778778076, "rewards/margins": 0.23398855328559875, "rewards/rejected": -0.5458647012710571, "step": 11120 }, { "epoch": 0.82, "learning_rate": 1.8898990509981304e-07, "logits/chosen": -2.2154998779296875, "logits/rejected": -1.7998969554901123, "logps/chosen": -546.8018188476562, "logps/rejected": -691.36181640625, "loss": 0.6749, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.382901668548584, "rewards/margins": 0.18915510177612305, "rewards/rejected": -0.5720568299293518, "step": 11130 }, { "epoch": 0.82, "learning_rate": 1.874862164686697e-07, "logits/chosen": -2.318312406539917, "logits/rejected": -1.6733108758926392, "logps/chosen": -515.534912109375, "logps/rejected": -658.1876220703125, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.365068256855011, "rewards/margins": 0.19886387884616852, "rewards/rejected": -0.5639321208000183, "step": 11140 }, { "epoch": 0.82, "learning_rate": 1.8598791474341513e-07, "logits/chosen": -2.2325592041015625, "logits/rejected": -1.8393135070800781, "logps/chosen": -522.1558837890625, "logps/rejected": -683.9580688476562, "loss": 0.6766, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3981925845146179, "rewards/margins": 0.17568477988243103, "rewards/rejected": -0.5738773345947266, "step": 11150 }, { "epoch": 0.82, "learning_rate": 1.8449500985767797e-07, "logits/chosen": -2.0550386905670166, "logits/rejected": -1.7208935022354126, "logps/chosen": -515.6741333007812, "logps/rejected": -682.4834594726562, "loss": 0.6766, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3454609513282776, "rewards/margins": 0.1756027787923813, "rewards/rejected": -0.5210637450218201, "step": 11160 }, { "epoch": 0.82, "learning_rate": 1.830075117093064e-07, "logits/chosen": -2.1245198249816895, "logits/rejected": -1.6286752223968506, "logps/chosen": -522.698486328125, "logps/rejected": -742.0823364257812, "loss": 0.6728, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.39660799503326416, "rewards/margins": 0.2427835911512375, "rewards/rejected": -0.6393916010856628, "step": 11170 }, { "epoch": 0.82, "learning_rate": 1.8152543016030207e-07, "logits/chosen": -2.0065226554870605, "logits/rejected": -1.3822864294052124, "logps/chosen": -514.8363037109375, "logps/rejected": -706.56298828125, "loss": 0.675, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34601300954818726, "rewards/margins": 0.22399237751960754, "rewards/rejected": -0.5700053572654724, "step": 11180 }, { "epoch": 0.83, "learning_rate": 1.8004877503675597e-07, "logits/chosen": -2.173508405685425, "logits/rejected": -1.5496495962142944, "logps/chosen": -549.7410888671875, "logps/rejected": -729.1505737304688, "loss": 0.6736, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3554348051548004, "rewards/margins": 0.2492203712463379, "rewards/rejected": -0.6046552062034607, "step": 11190 }, { "epoch": 0.83, "learning_rate": 1.7857755612878101e-07, "logits/chosen": -2.214113235473633, "logits/rejected": -1.747985601425171, "logps/chosen": -439.21990966796875, "logps/rejected": -640.9788818359375, "loss": 0.678, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29404741525650024, "rewards/margins": 0.2010693997144699, "rewards/rejected": -0.4951168894767761, "step": 11200 }, { "epoch": 0.83, "learning_rate": 1.7711178319044928e-07, "logits/chosen": -2.262014865875244, "logits/rejected": -1.5814998149871826, "logps/chosen": -419.3548278808594, "logps/rejected": -636.02734375, "loss": 0.6749, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.23753714561462402, "rewards/margins": 0.2587575912475586, "rewards/rejected": -0.4962947964668274, "step": 11210 }, { "epoch": 0.83, "learning_rate": 1.7565146593972658e-07, "logits/chosen": -2.145596981048584, "logits/rejected": -1.4776171445846558, "logps/chosen": -441.65863037109375, "logps/rejected": -667.8380126953125, "loss": 0.6792, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3123208284378052, "rewards/margins": 0.24159188568592072, "rewards/rejected": -0.5539126992225647, "step": 11220 }, { "epoch": 0.83, "learning_rate": 1.7419661405840714e-07, "logits/chosen": -1.879442572593689, "logits/rejected": -1.6890602111816406, "logps/chosen": -521.6060791015625, "logps/rejected": -712.320068359375, "loss": 0.6742, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.35336145758628845, "rewards/margins": 0.19200098514556885, "rewards/rejected": -0.5453624129295349, "step": 11230 }, { "epoch": 0.83, "learning_rate": 1.7274723719205042e-07, "logits/chosen": -1.9975048303604126, "logits/rejected": -1.5508085489273071, "logps/chosen": -539.3282470703125, "logps/rejected": -679.6025390625, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3547186255455017, "rewards/margins": 0.17232701182365417, "rewards/rejected": -0.5270456075668335, "step": 11240 }, { "epoch": 0.83, "learning_rate": 1.713033449499177e-07, "logits/chosen": -1.8922897577285767, "logits/rejected": -1.4738727807998657, "logps/chosen": -534.0403442382812, "logps/rejected": -703.4894409179688, "loss": 0.6779, "rewards/accuracies": 0.75, "rewards/chosen": -0.34626370668411255, "rewards/margins": 0.2069902867078781, "rewards/rejected": -0.5532540082931519, "step": 11250 }, { "epoch": 0.83, "learning_rate": 1.6986494690490672e-07, "logits/chosen": -2.209655523300171, "logits/rejected": -1.8722198009490967, "logps/chosen": -541.1258544921875, "logps/rejected": -585.009765625, "loss": 0.6859, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35546523332595825, "rewards/margins": 0.09770401567220688, "rewards/rejected": -0.4531692564487457, "step": 11260 }, { "epoch": 0.83, "learning_rate": 1.6843205259348913e-07, "logits/chosen": -2.1242384910583496, "logits/rejected": -1.585843801498413, "logps/chosen": -402.3747253417969, "logps/rejected": -570.5761108398438, "loss": 0.679, "rewards/accuracies": 0.625, "rewards/chosen": -0.23818814754486084, "rewards/margins": 0.18978819251060486, "rewards/rejected": -0.4279763698577881, "step": 11270 }, { "epoch": 0.83, "learning_rate": 1.6700467151564812e-07, "logits/chosen": -2.080387830734253, "logits/rejected": -1.8800522089004517, "logps/chosen": -508.39703369140625, "logps/rejected": -652.1477661132812, "loss": 0.6765, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35491079092025757, "rewards/margins": 0.13959527015686035, "rewards/rejected": -0.4945060610771179, "step": 11280 }, { "epoch": 0.83, "learning_rate": 1.6558281313481336e-07, "logits/chosen": -2.086334228515625, "logits/rejected": -1.7285445928573608, "logps/chosen": -439.4105529785156, "logps/rejected": -648.2574462890625, "loss": 0.6783, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.31938764452934265, "rewards/margins": 0.21171119809150696, "rewards/rejected": -0.5310988426208496, "step": 11290 }, { "epoch": 0.83, "learning_rate": 1.6416648687780066e-07, "logits/chosen": -2.17122220993042, "logits/rejected": -1.6196234226226807, "logps/chosen": -493.2779235839844, "logps/rejected": -708.6973876953125, "loss": 0.6687, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33853742480278015, "rewards/margins": 0.25859734416007996, "rewards/rejected": -0.5971347093582153, "step": 11300 }, { "epoch": 0.83, "learning_rate": 1.627557021347471e-07, "logits/chosen": -2.2199857234954834, "logits/rejected": -1.605706810951233, "logps/chosen": -550.2164306640625, "logps/rejected": -713.4415893554688, "loss": 0.6775, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.39293330907821655, "rewards/margins": 0.1979748159646988, "rewards/rejected": -0.5909081697463989, "step": 11310 }, { "epoch": 0.83, "learning_rate": 1.6135046825905097e-07, "logits/chosen": -2.059091567993164, "logits/rejected": -1.6327292919158936, "logps/chosen": -522.040283203125, "logps/rejected": -707.2333374023438, "loss": 0.6796, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3427750766277313, "rewards/margins": 0.21159076690673828, "rewards/rejected": -0.554365873336792, "step": 11320 }, { "epoch": 0.84, "learning_rate": 1.59950794567308e-07, "logits/chosen": -1.9733203649520874, "logits/rejected": -1.301887035369873, "logps/chosen": -454.64605712890625, "logps/rejected": -668.7889404296875, "loss": 0.6667, "rewards/accuracies": 0.875, "rewards/chosen": -0.285489946603775, "rewards/margins": 0.26248130202293396, "rewards/rejected": -0.547971248626709, "step": 11330 }, { "epoch": 0.84, "learning_rate": 1.5855669033925024e-07, "logits/chosen": -2.1244359016418457, "logits/rejected": -1.6503021717071533, "logps/chosen": -447.8352966308594, "logps/rejected": -606.2278442382812, "loss": 0.6783, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.29755404591560364, "rewards/margins": 0.18223240971565247, "rewards/rejected": -0.4797864854335785, "step": 11340 }, { "epoch": 0.84, "learning_rate": 1.5716816481768557e-07, "logits/chosen": -2.2158656120300293, "logits/rejected": -1.7112407684326172, "logps/chosen": -460.07855224609375, "logps/rejected": -585.0291137695312, "loss": 0.6739, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2920977473258972, "rewards/margins": 0.17933326959609985, "rewards/rejected": -0.47143101692199707, "step": 11350 }, { "epoch": 0.84, "learning_rate": 1.5578522720843444e-07, "logits/chosen": -2.1187593936920166, "logits/rejected": -1.5937222242355347, "logps/chosen": -492.04742431640625, "logps/rejected": -697.5565185546875, "loss": 0.6764, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3116902709007263, "rewards/margins": 0.23768413066864014, "rewards/rejected": -0.5493744015693665, "step": 11360 }, { "epoch": 0.84, "learning_rate": 1.5440788668027005e-07, "logits/chosen": -1.9890190362930298, "logits/rejected": -1.56052565574646, "logps/chosen": -561.7776489257812, "logps/rejected": -743.3853759765625, "loss": 0.6779, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3992674946784973, "rewards/margins": 0.2233458012342453, "rewards/rejected": -0.6226133704185486, "step": 11370 }, { "epoch": 0.84, "learning_rate": 1.530361523648578e-07, "logits/chosen": -1.9865038394927979, "logits/rejected": -1.6104322671890259, "logps/chosen": -491.42047119140625, "logps/rejected": -682.6951904296875, "loss": 0.6741, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3276445269584656, "rewards/margins": 0.20479269325733185, "rewards/rejected": -0.5324372053146362, "step": 11380 }, { "epoch": 0.84, "learning_rate": 1.5167003335669425e-07, "logits/chosen": -1.907299280166626, "logits/rejected": -1.8059295415878296, "logps/chosen": -564.5491333007812, "logps/rejected": -688.0845947265625, "loss": 0.6833, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42394429445266724, "rewards/margins": 0.14541706442832947, "rewards/rejected": -0.5693613886833191, "step": 11390 }, { "epoch": 0.84, "learning_rate": 1.5030953871304652e-07, "logits/chosen": -1.8709131479263306, "logits/rejected": -1.3899725675582886, "logps/chosen": -466.7942810058594, "logps/rejected": -684.3262329101562, "loss": 0.678, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.317802369594574, "rewards/margins": 0.25755149126052856, "rewards/rejected": -0.5753538608551025, "step": 11400 }, { "epoch": 0.84, "learning_rate": 1.4895467745389258e-07, "logits/chosen": -1.8801774978637695, "logits/rejected": -1.4526864290237427, "logps/chosen": -477.912109375, "logps/rejected": -705.2786865234375, "loss": 0.6715, "rewards/accuracies": 0.75, "rewards/chosen": -0.33740004897117615, "rewards/margins": 0.2454081028699875, "rewards/rejected": -0.5828081369400024, "step": 11410 }, { "epoch": 0.84, "learning_rate": 1.4760545856186202e-07, "logits/chosen": -2.0727765560150146, "logits/rejected": -1.556194543838501, "logps/chosen": -441.41973876953125, "logps/rejected": -622.4970703125, "loss": 0.6785, "rewards/accuracies": 0.75, "rewards/chosen": -0.28738194704055786, "rewards/margins": 0.2108961045742035, "rewards/rejected": -0.49827808141708374, "step": 11420 }, { "epoch": 0.84, "learning_rate": 1.4626189098217567e-07, "logits/chosen": -2.12617564201355, "logits/rejected": -1.6395431756973267, "logps/chosen": -526.80224609375, "logps/rejected": -634.3231201171875, "loss": 0.6844, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.33399686217308044, "rewards/margins": 0.12497571855783463, "rewards/rejected": -0.45897260308265686, "step": 11430 }, { "epoch": 0.84, "learning_rate": 1.4492398362258608e-07, "logits/chosen": -2.155522108078003, "logits/rejected": -1.5506956577301025, "logps/chosen": -564.40673828125, "logps/rejected": -706.6734619140625, "loss": 0.6786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37609583139419556, "rewards/margins": 0.20465174317359924, "rewards/rejected": -0.5807475447654724, "step": 11440 }, { "epoch": 0.84, "learning_rate": 1.4359174535331998e-07, "logits/chosen": -2.173557758331299, "logits/rejected": -1.700290322303772, "logps/chosen": -570.8174438476562, "logps/rejected": -709.6393432617188, "loss": 0.6807, "rewards/accuracies": 0.625, "rewards/chosen": -0.39314985275268555, "rewards/margins": 0.1699405014514923, "rewards/rejected": -0.5630903244018555, "step": 11450 }, { "epoch": 0.85, "learning_rate": 1.4226518500701766e-07, "logits/chosen": -2.200305461883545, "logits/rejected": -1.5395944118499756, "logps/chosen": -611.2542724609375, "logps/rejected": -777.2630004882812, "loss": 0.6807, "rewards/accuracies": 0.75, "rewards/chosen": -0.4176185727119446, "rewards/margins": 0.21436114609241486, "rewards/rejected": -0.6319797039031982, "step": 11460 }, { "epoch": 0.85, "learning_rate": 1.4094431137867525e-07, "logits/chosen": -2.404508352279663, "logits/rejected": -1.7176029682159424, "logps/chosen": -566.2863159179688, "logps/rejected": -724.6280517578125, "loss": 0.6746, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37993091344833374, "rewards/margins": 0.23466253280639648, "rewards/rejected": -0.614593505859375, "step": 11470 }, { "epoch": 0.85, "learning_rate": 1.3962913322558668e-07, "logits/chosen": -2.223142623901367, "logits/rejected": -1.7430245876312256, "logps/chosen": -540.4989013671875, "logps/rejected": -705.7747802734375, "loss": 0.6751, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39641761779785156, "rewards/margins": 0.18849137425422668, "rewards/rejected": -0.5849089026451111, "step": 11480 }, { "epoch": 0.85, "learning_rate": 1.383196592672856e-07, "logits/chosen": -2.158254623413086, "logits/rejected": -1.7039811611175537, "logps/chosen": -560.0143432617188, "logps/rejected": -682.818115234375, "loss": 0.6802, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4005807936191559, "rewards/margins": 0.15347105264663696, "rewards/rejected": -0.5540518164634705, "step": 11490 }, { "epoch": 0.85, "learning_rate": 1.370158981854864e-07, "logits/chosen": -2.16107439994812, "logits/rejected": -1.4237970113754272, "logps/chosen": -545.4278564453125, "logps/rejected": -763.4801025390625, "loss": 0.6702, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3577817380428314, "rewards/margins": 0.29427027702331543, "rewards/rejected": -0.6520520448684692, "step": 11500 }, { "epoch": 0.85, "learning_rate": 1.3571785862402795e-07, "logits/chosen": -2.0002291202545166, "logits/rejected": -1.539971113204956, "logps/chosen": -590.5300903320312, "logps/rejected": -714.4227294921875, "loss": 0.6759, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41214194893836975, "rewards/margins": 0.1729172170162201, "rewards/rejected": -0.5850591659545898, "step": 11510 }, { "epoch": 0.85, "learning_rate": 1.3442554918881632e-07, "logits/chosen": -1.9651178121566772, "logits/rejected": -1.7888374328613281, "logps/chosen": -619.6047973632812, "logps/rejected": -747.0380249023438, "loss": 0.6823, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4825130105018616, "rewards/margins": 0.12256376445293427, "rewards/rejected": -0.6050766706466675, "step": 11520 }, { "epoch": 0.85, "learning_rate": 1.3313897844776656e-07, "logits/chosen": -2.1935908794403076, "logits/rejected": -1.7824379205703735, "logps/chosen": -494.60870361328125, "logps/rejected": -698.7366943359375, "loss": 0.6795, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3306487202644348, "rewards/margins": 0.2177126705646515, "rewards/rejected": -0.5483614206314087, "step": 11530 }, { "epoch": 0.85, "learning_rate": 1.3185815493074658e-07, "logits/chosen": -2.1345067024230957, "logits/rejected": -1.726317048072815, "logps/chosen": -564.0242309570312, "logps/rejected": -678.8775634765625, "loss": 0.6785, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.33631232380867004, "rewards/margins": 0.1727355420589447, "rewards/rejected": -0.5090478658676147, "step": 11540 }, { "epoch": 0.85, "learning_rate": 1.305830871295216e-07, "logits/chosen": -2.1457180976867676, "logits/rejected": -1.633244514465332, "logps/chosen": -522.7899169921875, "logps/rejected": -653.7332153320312, "loss": 0.6784, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36698076128959656, "rewards/margins": 0.18732739984989166, "rewards/rejected": -0.5543082356452942, "step": 11550 }, { "epoch": 0.85, "learning_rate": 1.2931378349769562e-07, "logits/chosen": -1.9951725006103516, "logits/rejected": -1.5670063495635986, "logps/chosen": -539.683837890625, "logps/rejected": -689.224853515625, "loss": 0.6813, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3701626658439636, "rewards/margins": 0.20513005554676056, "rewards/rejected": -0.575292706489563, "step": 11560 }, { "epoch": 0.85, "learning_rate": 1.2805025245065758e-07, "logits/chosen": -2.213223695755005, "logits/rejected": -1.5460580587387085, "logps/chosen": -548.5403442382812, "logps/rejected": -710.1760864257812, "loss": 0.6801, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35964879393577576, "rewards/margins": 0.21489587426185608, "rewards/rejected": -0.5745446085929871, "step": 11570 }, { "epoch": 0.85, "learning_rate": 1.2679250236552398e-07, "logits/chosen": -2.046143054962158, "logits/rejected": -1.603053331375122, "logps/chosen": -549.704345703125, "logps/rejected": -700.8527221679688, "loss": 0.6777, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3875100314617157, "rewards/margins": 0.18478037416934967, "rewards/rejected": -0.5722903609275818, "step": 11580 }, { "epoch": 0.85, "learning_rate": 1.2554054158108461e-07, "logits/chosen": -2.1103625297546387, "logits/rejected": -1.5126107931137085, "logps/chosen": -575.8826293945312, "logps/rejected": -741.3408203125, "loss": 0.6774, "rewards/accuracies": 0.75, "rewards/chosen": -0.3928926885128021, "rewards/margins": 0.2224433869123459, "rewards/rejected": -0.6153361201286316, "step": 11590 }, { "epoch": 0.86, "learning_rate": 1.2429437839774593e-07, "logits/chosen": -1.8790794610977173, "logits/rejected": -1.3745124340057373, "logps/chosen": -528.7866821289062, "logps/rejected": -729.0175170898438, "loss": 0.6753, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3903048634529114, "rewards/margins": 0.2423095703125, "rewards/rejected": -0.6326144337654114, "step": 11600 }, { "epoch": 0.86, "learning_rate": 1.2305402107747697e-07, "logits/chosen": -2.1610960960388184, "logits/rejected": -1.7504879236221313, "logps/chosen": -503.0387268066406, "logps/rejected": -712.3675537109375, "loss": 0.6783, "rewards/accuracies": 0.75, "rewards/chosen": -0.32698822021484375, "rewards/margins": 0.20815011858940125, "rewards/rejected": -0.5351383090019226, "step": 11610 }, { "epoch": 0.86, "learning_rate": 1.218194778437549e-07, "logits/chosen": -2.0860178470611572, "logits/rejected": -1.3299556970596313, "logps/chosen": -489.7447814941406, "logps/rejected": -729.017578125, "loss": 0.6698, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3158016800880432, "rewards/margins": 0.2987143099308014, "rewards/rejected": -0.6145161390304565, "step": 11620 }, { "epoch": 0.86, "learning_rate": 1.205907568815091e-07, "logits/chosen": -2.197669744491577, "logits/rejected": -1.7979087829589844, "logps/chosen": -520.904052734375, "logps/rejected": -668.6552734375, "loss": 0.6774, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3608534336090088, "rewards/margins": 0.18765898048877716, "rewards/rejected": -0.5485123991966248, "step": 11630 }, { "epoch": 0.86, "learning_rate": 1.1936786633706785e-07, "logits/chosen": -2.2559616565704346, "logits/rejected": -1.5276210308074951, "logps/chosen": -553.7825317382812, "logps/rejected": -692.2486572265625, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.3489120602607727, "rewards/margins": 0.19527992606163025, "rewards/rejected": -0.5441919565200806, "step": 11640 }, { "epoch": 0.86, "learning_rate": 1.181508143181047e-07, "logits/chosen": -2.178379774093628, "logits/rejected": -1.3969662189483643, "logps/chosen": -533.5789794921875, "logps/rejected": -753.7601928710938, "loss": 0.668, "rewards/accuracies": 0.875, "rewards/chosen": -0.37372249364852905, "rewards/margins": 0.2766973376274109, "rewards/rejected": -0.6504198908805847, "step": 11650 }, { "epoch": 0.86, "learning_rate": 1.16939608893584e-07, "logits/chosen": -2.3711740970611572, "logits/rejected": -1.6361500024795532, "logps/chosen": -539.8046264648438, "logps/rejected": -665.2981567382812, "loss": 0.6752, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3721042275428772, "rewards/margins": 0.18820464611053467, "rewards/rejected": -0.5603088140487671, "step": 11660 }, { "epoch": 0.86, "learning_rate": 1.1573425809370751e-07, "logits/chosen": -2.1173765659332275, "logits/rejected": -1.9480403661727905, "logps/chosen": -571.1784057617188, "logps/rejected": -711.0091552734375, "loss": 0.6847, "rewards/accuracies": 0.625, "rewards/chosen": -0.38174983859062195, "rewards/margins": 0.12545272707939148, "rewards/rejected": -0.5072025060653687, "step": 11670 }, { "epoch": 0.86, "learning_rate": 1.1453476990986088e-07, "logits/chosen": -2.003103017807007, "logits/rejected": -1.5127356052398682, "logps/chosen": -522.6393432617188, "logps/rejected": -670.935791015625, "loss": 0.6759, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3658371567726135, "rewards/margins": 0.21326470375061035, "rewards/rejected": -0.5791018605232239, "step": 11680 }, { "epoch": 0.86, "learning_rate": 1.1334115229456209e-07, "logits/chosen": -2.1393322944641113, "logits/rejected": -1.549593448638916, "logps/chosen": -553.6405029296875, "logps/rejected": -715.8551025390625, "loss": 0.6741, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.37700438499450684, "rewards/margins": 0.20481088757514954, "rewards/rejected": -0.5818153023719788, "step": 11690 }, { "epoch": 0.86, "learning_rate": 1.1215341316140659e-07, "logits/chosen": -2.0922000408172607, "logits/rejected": -1.7184057235717773, "logps/chosen": -476.92138671875, "logps/rejected": -672.3213500976562, "loss": 0.6756, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3462360203266144, "rewards/margins": 0.18364040553569794, "rewards/rejected": -0.5298764109611511, "step": 11700 }, { "epoch": 0.86, "learning_rate": 1.1097156038501643e-07, "logits/chosen": -2.075392484664917, "logits/rejected": -1.555350661277771, "logps/chosen": -536.9241943359375, "logps/rejected": -693.0538330078125, "loss": 0.6763, "rewards/accuracies": 0.75, "rewards/chosen": -0.3407003581523895, "rewards/margins": 0.21984586119651794, "rewards/rejected": -0.5605462789535522, "step": 11710 }, { "epoch": 0.86, "learning_rate": 1.097956018009879e-07, "logits/chosen": -2.058293581008911, "logits/rejected": -1.302160620689392, "logps/chosen": -474.60009765625, "logps/rejected": -669.0299072265625, "loss": 0.6728, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2996251583099365, "rewards/margins": 0.25531119108200073, "rewards/rejected": -0.5549362897872925, "step": 11720 }, { "epoch": 0.87, "learning_rate": 1.0862554520583855e-07, "logits/chosen": -1.924964189529419, "logits/rejected": -1.6679813861846924, "logps/chosen": -477.4837951660156, "logps/rejected": -610.4304809570312, "loss": 0.6836, "rewards/accuracies": 0.625, "rewards/chosen": -0.33257392048835754, "rewards/margins": 0.15096861124038696, "rewards/rejected": -0.4835425019264221, "step": 11730 }, { "epoch": 0.87, "learning_rate": 1.0746139835695623e-07, "logits/chosen": -2.0558691024780273, "logits/rejected": -1.456861138343811, "logps/chosen": -517.1407470703125, "logps/rejected": -689.3067016601562, "loss": 0.6721, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3459244668483734, "rewards/margins": 0.23744741082191467, "rewards/rejected": -0.5833719372749329, "step": 11740 }, { "epoch": 0.87, "learning_rate": 1.0630316897254832e-07, "logits/chosen": -2.0924949645996094, "logits/rejected": -1.5260127782821655, "logps/chosen": -577.3016967773438, "logps/rejected": -723.8834228515625, "loss": 0.6763, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3888899087905884, "rewards/margins": 0.18879278004169464, "rewards/rejected": -0.5776826739311218, "step": 11750 }, { "epoch": 0.87, "learning_rate": 1.0515086473158941e-07, "logits/chosen": -2.075624704360962, "logits/rejected": -1.486901044845581, "logps/chosen": -610.8809204101562, "logps/rejected": -795.6976318359375, "loss": 0.6741, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4129890501499176, "rewards/margins": 0.2546321749687195, "rewards/rejected": -0.6676211953163147, "step": 11760 }, { "epoch": 0.87, "learning_rate": 1.0400449327377059e-07, "logits/chosen": -2.184328317642212, "logits/rejected": -1.544185757637024, "logps/chosen": -600.6399536132812, "logps/rejected": -831.1331176757812, "loss": 0.6729, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3807184100151062, "rewards/margins": 0.27937257289886475, "rewards/rejected": -0.6600910425186157, "step": 11770 }, { "epoch": 0.87, "learning_rate": 1.0286406219944921e-07, "logits/chosen": -2.1113171577453613, "logits/rejected": -1.5434300899505615, "logps/chosen": -565.5036010742188, "logps/rejected": -774.560302734375, "loss": 0.6734, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41980499029159546, "rewards/margins": 0.28181618452072144, "rewards/rejected": -0.7016211748123169, "step": 11780 }, { "epoch": 0.87, "learning_rate": 1.0172957906959867e-07, "logits/chosen": -2.0298049449920654, "logits/rejected": -1.3633978366851807, "logps/chosen": -499.2825622558594, "logps/rejected": -638.1953125, "loss": 0.6754, "rewards/accuracies": 0.75, "rewards/chosen": -0.3398219645023346, "rewards/margins": 0.18804070353507996, "rewards/rejected": -0.5278626680374146, "step": 11790 }, { "epoch": 0.87, "learning_rate": 1.0060105140575758e-07, "logits/chosen": -2.375609874725342, "logits/rejected": -1.7659037113189697, "logps/chosen": -562.5169677734375, "logps/rejected": -685.890869140625, "loss": 0.6804, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3594036102294922, "rewards/margins": 0.20662932097911835, "rewards/rejected": -0.5660330057144165, "step": 11800 }, { "epoch": 0.87, "learning_rate": 9.947848668998004e-08, "logits/chosen": -2.0854921340942383, "logits/rejected": -1.4648220539093018, "logps/chosen": -419.31915283203125, "logps/rejected": -611.6030883789062, "loss": 0.6738, "rewards/accuracies": 0.875, "rewards/chosen": -0.2836928069591522, "rewards/margins": 0.2291683852672577, "rewards/rejected": -0.5128611326217651, "step": 11810 }, { "epoch": 0.87, "learning_rate": 9.836189236478687e-08, "logits/chosen": -2.140289783477783, "logits/rejected": -1.612786889076233, "logps/chosen": -492.24603271484375, "logps/rejected": -681.7683715820312, "loss": 0.6719, "rewards/accuracies": 0.75, "rewards/chosen": -0.3055552542209625, "rewards/margins": 0.2460629642009735, "rewards/rejected": -0.551618218421936, "step": 11820 }, { "epoch": 0.87, "learning_rate": 9.725127583311499e-08, "logits/chosen": -1.960222601890564, "logits/rejected": -1.6410388946533203, "logps/chosen": -418.762939453125, "logps/rejected": -596.2178955078125, "loss": 0.681, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.286318838596344, "rewards/margins": 0.19508405029773712, "rewards/rejected": -0.4814028739929199, "step": 11830 }, { "epoch": 0.87, "learning_rate": 9.614664445826969e-08, "logits/chosen": -1.9251677989959717, "logits/rejected": -1.3752527236938477, "logps/chosen": -476.6480407714844, "logps/rejected": -750.4679565429688, "loss": 0.6747, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.33865123987197876, "rewards/margins": 0.3065612316131592, "rewards/rejected": -0.6452124714851379, "step": 11840 }, { "epoch": 0.87, "learning_rate": 9.504800556387449e-08, "logits/chosen": -2.051159381866455, "logits/rejected": -1.4519617557525635, "logps/chosen": -636.652587890625, "logps/rejected": -795.81640625, "loss": 0.6734, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.488227516412735, "rewards/margins": 0.2063998281955719, "rewards/rejected": -0.6946273446083069, "step": 11850 }, { "epoch": 0.87, "learning_rate": 9.395536643382352e-08, "logits/chosen": -2.049304485321045, "logits/rejected": -1.8154830932617188, "logps/chosen": -476.916748046875, "logps/rejected": -598.0867919921875, "loss": 0.6803, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.30885186791419983, "rewards/margins": 0.1536674201488495, "rewards/rejected": -0.46251925826072693, "step": 11860 }, { "epoch": 0.88, "learning_rate": 9.286873431223296e-08, "logits/chosen": -2.1505484580993652, "logits/rejected": -1.6356751918792725, "logps/chosen": -517.5658569335938, "logps/rejected": -651.8446044921875, "loss": 0.6825, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34943175315856934, "rewards/margins": 0.16707541048526764, "rewards/rejected": -0.5165071487426758, "step": 11870 }, { "epoch": 0.88, "learning_rate": 9.178811640339257e-08, "logits/chosen": -2.029130220413208, "logits/rejected": -1.694906234741211, "logps/chosen": -467.0113220214844, "logps/rejected": -616.1665649414062, "loss": 0.6798, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3241115212440491, "rewards/margins": 0.16358722746372223, "rewards/rejected": -0.4876987338066101, "step": 11880 }, { "epoch": 0.88, "learning_rate": 9.071351987171905e-08, "logits/chosen": -2.068429708480835, "logits/rejected": -1.5930020809173584, "logps/chosen": -540.6192626953125, "logps/rejected": -760.9334716796875, "loss": 0.6762, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3492319583892822, "rewards/margins": 0.2422039806842804, "rewards/rejected": -0.5914359092712402, "step": 11890 }, { "epoch": 0.88, "learning_rate": 8.964495184170718e-08, "logits/chosen": -2.2944469451904297, "logits/rejected": -1.634636640548706, "logps/chosen": -457.00811767578125, "logps/rejected": -671.1212158203125, "loss": 0.675, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.27005189657211304, "rewards/margins": 0.26423192024230957, "rewards/rejected": -0.5342838168144226, "step": 11900 }, { "epoch": 0.88, "learning_rate": 8.858241939788302e-08, "logits/chosen": -2.2302041053771973, "logits/rejected": -1.9964078664779663, "logps/chosen": -524.549072265625, "logps/rejected": -688.2406005859375, "loss": 0.6842, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.3906378149986267, "rewards/margins": 0.1524837613105774, "rewards/rejected": -0.5431216359138489, "step": 11910 }, { "epoch": 0.88, "learning_rate": 8.752592958475835e-08, "logits/chosen": -2.058880090713501, "logits/rejected": -1.630812406539917, "logps/chosen": -560.9879760742188, "logps/rejected": -694.6641845703125, "loss": 0.6789, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3899931311607361, "rewards/margins": 0.1890266239643097, "rewards/rejected": -0.5790198445320129, "step": 11920 }, { "epoch": 0.88, "learning_rate": 8.647548940678151e-08, "logits/chosen": -2.0391430854797363, "logits/rejected": -1.4186969995498657, "logps/chosen": -572.2463989257812, "logps/rejected": -679.0311279296875, "loss": 0.6798, "rewards/accuracies": 0.625, "rewards/chosen": -0.41643890738487244, "rewards/margins": 0.17069916427135468, "rewards/rejected": -0.5871380567550659, "step": 11930 }, { "epoch": 0.88, "learning_rate": 8.543110582829272e-08, "logits/chosen": -1.9597278833389282, "logits/rejected": -1.5545175075531006, "logps/chosen": -512.7542724609375, "logps/rejected": -701.0950927734375, "loss": 0.6768, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3761431574821472, "rewards/margins": 0.21214339137077332, "rewards/rejected": -0.5882865786552429, "step": 11940 }, { "epoch": 0.88, "learning_rate": 8.439278577347742e-08, "logits/chosen": -2.026366710662842, "logits/rejected": -1.512511968612671, "logps/chosen": -556.6964721679688, "logps/rejected": -686.3632202148438, "loss": 0.6756, "rewards/accuracies": 0.75, "rewards/chosen": -0.37322869896888733, "rewards/margins": 0.18188980221748352, "rewards/rejected": -0.5551184415817261, "step": 11950 }, { "epoch": 0.88, "learning_rate": 8.336053612632033e-08, "logits/chosen": -1.8984979391098022, "logits/rejected": -1.3281292915344238, "logps/chosen": -485.2894592285156, "logps/rejected": -677.2818603515625, "loss": 0.6778, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.35314667224884033, "rewards/margins": 0.20715463161468506, "rewards/rejected": -0.5603011846542358, "step": 11960 }, { "epoch": 0.88, "learning_rate": 8.233436373055957e-08, "logits/chosen": -2.0889196395874023, "logits/rejected": -1.3610897064208984, "logps/chosen": -599.91455078125, "logps/rejected": -794.409423828125, "loss": 0.6769, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.42317208647727966, "rewards/margins": 0.25722023844718933, "rewards/rejected": -0.6803923845291138, "step": 11970 }, { "epoch": 0.88, "learning_rate": 8.131427538964164e-08, "logits/chosen": -2.1638360023498535, "logits/rejected": -1.6086984872817993, "logps/chosen": -560.1827392578125, "logps/rejected": -705.5655517578125, "loss": 0.68, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42679134011268616, "rewards/margins": 0.1811455488204956, "rewards/rejected": -0.6079368591308594, "step": 11980 }, { "epoch": 0.88, "learning_rate": 8.030027786667659e-08, "logits/chosen": -2.0417678356170654, "logits/rejected": -1.7381083965301514, "logps/chosen": -527.0389404296875, "logps/rejected": -643.7883911132812, "loss": 0.6823, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3757875859737396, "rewards/margins": 0.1576153039932251, "rewards/rejected": -0.5334028601646423, "step": 11990 }, { "epoch": 0.89, "learning_rate": 7.929237788439214e-08, "logits/chosen": -2.112691640853882, "logits/rejected": -1.702228307723999, "logps/chosen": -610.6134033203125, "logps/rejected": -697.50390625, "loss": 0.6829, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4542052149772644, "rewards/margins": 0.13019748032093048, "rewards/rejected": -0.5844027400016785, "step": 12000 }, { "epoch": 0.89, "learning_rate": 7.829058212509044e-08, "logits/chosen": -2.119997978210449, "logits/rejected": -1.7328296899795532, "logps/chosen": -492.2579650878906, "logps/rejected": -643.808837890625, "loss": 0.6806, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.34606820344924927, "rewards/margins": 0.18539109826087952, "rewards/rejected": -0.5314592123031616, "step": 12010 }, { "epoch": 0.89, "learning_rate": 7.729489723060279e-08, "logits/chosen": -1.9665237665176392, "logits/rejected": -1.3517693281173706, "logps/chosen": -506.0587463378906, "logps/rejected": -634.2609252929688, "loss": 0.678, "rewards/accuracies": 0.75, "rewards/chosen": -0.35632461309432983, "rewards/margins": 0.19445648789405823, "rewards/rejected": -0.5507810711860657, "step": 12020 }, { "epoch": 0.89, "learning_rate": 7.63053298022458e-08, "logits/chosen": -1.9922393560409546, "logits/rejected": -1.6335655450820923, "logps/chosen": -563.5203857421875, "logps/rejected": -676.1923828125, "loss": 0.6793, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.41114670038223267, "rewards/margins": 0.15853631496429443, "rewards/rejected": -0.5696830153465271, "step": 12030 }, { "epoch": 0.89, "learning_rate": 7.532188640077774e-08, "logits/chosen": -2.168828248977661, "logits/rejected": -1.5506516695022583, "logps/chosen": -525.9278564453125, "logps/rejected": -679.5648803710938, "loss": 0.6795, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.36929211020469666, "rewards/margins": 0.18959006667137146, "rewards/rejected": -0.5588821768760681, "step": 12040 }, { "epoch": 0.89, "learning_rate": 7.434457354635537e-08, "logits/chosen": -1.933289885520935, "logits/rejected": -1.4429380893707275, "logps/chosen": -551.5314331054688, "logps/rejected": -733.5830688476562, "loss": 0.6793, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43892568349838257, "rewards/margins": 0.19921594858169556, "rewards/rejected": -0.6381416320800781, "step": 12050 }, { "epoch": 0.89, "learning_rate": 7.337339771849005e-08, "logits/chosen": -2.051596164703369, "logits/rejected": -1.4689710140228271, "logps/chosen": -517.6698608398438, "logps/rejected": -709.239990234375, "loss": 0.6712, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.34601426124572754, "rewards/margins": 0.2351907193660736, "rewards/rejected": -0.5812050104141235, "step": 12060 }, { "epoch": 0.89, "learning_rate": 7.240836535600491e-08, "logits/chosen": -2.2160327434539795, "logits/rejected": -1.5829148292541504, "logps/chosen": -489.1114196777344, "logps/rejected": -717.3267822265625, "loss": 0.6686, "rewards/accuracies": 0.875, "rewards/chosen": -0.3404497504234314, "rewards/margins": 0.26928848028182983, "rewards/rejected": -0.6097382307052612, "step": 12070 }, { "epoch": 0.89, "learning_rate": 7.144948285699315e-08, "logits/chosen": -2.0869555473327637, "logits/rejected": -1.6278873682022095, "logps/chosen": -518.9573974609375, "logps/rejected": -669.9026489257812, "loss": 0.678, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3847216069698334, "rewards/margins": 0.16836339235305786, "rewards/rejected": -0.5530849695205688, "step": 12080 }, { "epoch": 0.89, "learning_rate": 7.049675657877396e-08, "logits/chosen": -2.180208683013916, "logits/rejected": -1.6160075664520264, "logps/chosen": -551.6221923828125, "logps/rejected": -704.8599853515625, "loss": 0.6788, "rewards/accuracies": 0.75, "rewards/chosen": -0.3785368800163269, "rewards/margins": 0.18843142688274384, "rewards/rejected": -0.5669683218002319, "step": 12090 }, { "epoch": 0.89, "learning_rate": 6.955019283785202e-08, "logits/chosen": -2.042459726333618, "logits/rejected": -1.615616798400879, "logps/chosen": -464.7916564941406, "logps/rejected": -626.5486450195312, "loss": 0.6802, "rewards/accuracies": 0.75, "rewards/chosen": -0.32067328691482544, "rewards/margins": 0.1979360431432724, "rewards/rejected": -0.518609344959259, "step": 12100 }, { "epoch": 0.89, "learning_rate": 6.860979790987397e-08, "logits/chosen": -2.0341639518737793, "logits/rejected": -1.4358879327774048, "logps/chosen": -525.8634643554688, "logps/rejected": -661.380859375, "loss": 0.6738, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.35057705640792847, "rewards/margins": 0.2015516310930252, "rewards/rejected": -0.5521286725997925, "step": 12110 }, { "epoch": 0.89, "learning_rate": 6.767557802958856e-08, "logits/chosen": -2.12530779838562, "logits/rejected": -1.5785202980041504, "logps/chosen": -514.0413818359375, "logps/rejected": -716.4776611328125, "loss": 0.6787, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.37576180696487427, "rewards/margins": 0.2191653698682785, "rewards/rejected": -0.5949271321296692, "step": 12120 }, { "epoch": 0.89, "learning_rate": 6.674753939080401e-08, "logits/chosen": -2.018036365509033, "logits/rejected": -1.5523185729980469, "logps/chosen": -540.2135009765625, "logps/rejected": -716.6060180664062, "loss": 0.676, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40253162384033203, "rewards/margins": 0.19373667240142822, "rewards/rejected": -0.5962682962417603, "step": 12130 }, { "epoch": 0.9, "learning_rate": 6.582568814634703e-08, "logits/chosen": -2.011209726333618, "logits/rejected": -1.7657463550567627, "logps/chosen": -542.4263916015625, "logps/rejected": -717.190673828125, "loss": 0.6798, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3893393576145172, "rewards/margins": 0.19020923972129822, "rewards/rejected": -0.5795485377311707, "step": 12140 }, { "epoch": 0.9, "learning_rate": 6.491003040802323e-08, "logits/chosen": -2.1444852352142334, "logits/rejected": -1.56595778465271, "logps/chosen": -499.14697265625, "logps/rejected": -678.7808837890625, "loss": 0.6776, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3452990651130676, "rewards/margins": 0.21645450592041016, "rewards/rejected": -0.561753511428833, "step": 12150 }, { "epoch": 0.9, "learning_rate": 6.400057224657496e-08, "logits/chosen": -2.180280923843384, "logits/rejected": -1.6241642236709595, "logps/chosen": -451.76568603515625, "logps/rejected": -695.0481567382812, "loss": 0.6725, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30192214250564575, "rewards/margins": 0.2575666606426239, "rewards/rejected": -0.5594887733459473, "step": 12160 }, { "epoch": 0.9, "learning_rate": 6.30973196916419e-08, "logits/chosen": -1.731420874595642, "logits/rejected": -1.3264827728271484, "logps/chosen": -606.3764038085938, "logps/rejected": -765.3652954101562, "loss": 0.6812, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.4529503285884857, "rewards/margins": 0.1897788941860199, "rewards/rejected": -0.6427292227745056, "step": 12170 }, { "epoch": 0.9, "learning_rate": 6.220027873172162e-08, "logits/chosen": -2.253455638885498, "logits/rejected": -1.644662857055664, "logps/chosen": -521.5140380859375, "logps/rejected": -716.2017822265625, "loss": 0.6769, "rewards/accuracies": 0.875, "rewards/chosen": -0.350619375705719, "rewards/margins": 0.23096589744091034, "rewards/rejected": -0.5815852284431458, "step": 12180 }, { "epoch": 0.9, "learning_rate": 6.13094553141289e-08, "logits/chosen": -2.3285715579986572, "logits/rejected": -1.9376949071884155, "logps/chosen": -515.0303955078125, "logps/rejected": -641.3269653320312, "loss": 0.68, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36369284987449646, "rewards/margins": 0.14453105628490448, "rewards/rejected": -0.5082239508628845, "step": 12190 }, { "epoch": 0.9, "learning_rate": 6.042485534495655e-08, "logits/chosen": -1.9890801906585693, "logits/rejected": -1.5963408946990967, "logps/chosen": -513.9190063476562, "logps/rejected": -659.6663208007812, "loss": 0.681, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3424949049949646, "rewards/margins": 0.17473813891410828, "rewards/rejected": -0.5172330737113953, "step": 12200 }, { "epoch": 0.9, "learning_rate": 5.954648468903623e-08, "logits/chosen": -2.026334524154663, "logits/rejected": -1.620408296585083, "logps/chosen": -446.39300537109375, "logps/rejected": -620.4864501953125, "loss": 0.6799, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.30145910382270813, "rewards/margins": 0.20131120085716248, "rewards/rejected": -0.5027703046798706, "step": 12210 }, { "epoch": 0.9, "learning_rate": 5.867434916990055e-08, "logits/chosen": -2.3257107734680176, "logits/rejected": -1.784974455833435, "logps/chosen": -453.8641662597656, "logps/rejected": -632.9586791992188, "loss": 0.6782, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.29515084624290466, "rewards/margins": 0.22382840514183044, "rewards/rejected": -0.5189792513847351, "step": 12220 }, { "epoch": 0.9, "learning_rate": 5.7808454569742706e-08, "logits/chosen": -1.872157335281372, "logits/rejected": -1.401256799697876, "logps/chosen": -425.2027282714844, "logps/rejected": -636.7537841796875, "loss": 0.6731, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3082929253578186, "rewards/margins": 0.2235678732395172, "rewards/rejected": -0.5318607687950134, "step": 12230 }, { "epoch": 0.9, "learning_rate": 5.694880662937929e-08, "logits/chosen": -2.1014838218688965, "logits/rejected": -1.7553619146347046, "logps/chosen": -494.86492919921875, "logps/rejected": -631.1832275390625, "loss": 0.6812, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3671993315219879, "rewards/margins": 0.14324142038822174, "rewards/rejected": -0.5104407668113708, "step": 12240 }, { "epoch": 0.9, "learning_rate": 5.609541104821247e-08, "logits/chosen": -2.075838565826416, "logits/rejected": -1.46909761428833, "logps/chosen": -578.9197998046875, "logps/rejected": -763.7278442382812, "loss": 0.6763, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4337473511695862, "rewards/margins": 0.19216279685497284, "rewards/rejected": -0.6259101629257202, "step": 12250 }, { "epoch": 0.9, "learning_rate": 5.5248273484191366e-08, "logits/chosen": -1.8354591131210327, "logits/rejected": -1.4614601135253906, "logps/chosen": -509.98553466796875, "logps/rejected": -638.80859375, "loss": 0.6885, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3746192157268524, "rewards/margins": 0.14292457699775696, "rewards/rejected": -0.5175438523292542, "step": 12260 }, { "epoch": 0.91, "learning_rate": 5.440739955377471e-08, "logits/chosen": -2.1477248668670654, "logits/rejected": -1.5527656078338623, "logps/chosen": -572.0133056640625, "logps/rejected": -763.5013427734375, "loss": 0.6778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.40032339096069336, "rewards/margins": 0.21219122409820557, "rewards/rejected": -0.6125146150588989, "step": 12270 }, { "epoch": 0.91, "learning_rate": 5.357279483189414e-08, "logits/chosen": -2.0602614879608154, "logits/rejected": -1.725098967552185, "logps/chosen": -513.7144775390625, "logps/rejected": -641.1572875976562, "loss": 0.6836, "rewards/accuracies": 0.75, "rewards/chosen": -0.35652679204940796, "rewards/margins": 0.1798141896724701, "rewards/rejected": -0.5363409519195557, "step": 12280 }, { "epoch": 0.91, "learning_rate": 5.274446485191708e-08, "logits/chosen": -2.1383869647979736, "logits/rejected": -1.4689433574676514, "logps/chosen": -535.1082763671875, "logps/rejected": -721.9680786132812, "loss": 0.6726, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.35319286584854126, "rewards/margins": 0.23558051884174347, "rewards/rejected": -0.5887733697891235, "step": 12290 }, { "epoch": 0.91, "learning_rate": 5.1922415105609355e-08, "logits/chosen": -2.0963099002838135, "logits/rejected": -1.4602117538452148, "logps/chosen": -476.92559814453125, "logps/rejected": -710.030517578125, "loss": 0.6705, "rewards/accuracies": 0.875, "rewards/chosen": -0.3160824775695801, "rewards/margins": 0.27854666113853455, "rewards/rejected": -0.594629168510437, "step": 12300 }, { "epoch": 0.91, "learning_rate": 5.110665104309964e-08, "logits/chosen": -2.1493380069732666, "logits/rejected": -1.8708021640777588, "logps/chosen": -547.4935913085938, "logps/rejected": -695.1535034179688, "loss": 0.6794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3854970335960388, "rewards/margins": 0.16502565145492554, "rewards/rejected": -0.5505226850509644, "step": 12310 }, { "epoch": 0.91, "learning_rate": 5.029717807284295e-08, "logits/chosen": -2.1561474800109863, "logits/rejected": -1.8051540851593018, "logps/chosen": -503.0321350097656, "logps/rejected": -631.3673706054688, "loss": 0.6785, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3309992849826813, "rewards/margins": 0.15238787233829498, "rewards/rejected": -0.48338717222213745, "step": 12320 }, { "epoch": 0.91, "learning_rate": 4.94940015615849e-08, "logits/chosen": -1.9560346603393555, "logits/rejected": -1.3214776515960693, "logps/chosen": -511.72857666015625, "logps/rejected": -690.3716430664062, "loss": 0.6729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3127884864807129, "rewards/margins": 0.24567165970802307, "rewards/rejected": -0.5584601163864136, "step": 12330 }, { "epoch": 0.91, "learning_rate": 4.8697126834325696e-08, "logits/chosen": -1.8640058040618896, "logits/rejected": -1.4543426036834717, "logps/chosen": -599.7916259765625, "logps/rejected": -752.7365112304688, "loss": 0.6805, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4223972260951996, "rewards/margins": 0.20344483852386475, "rewards/rejected": -0.6258420944213867, "step": 12340 }, { "epoch": 0.91, "learning_rate": 4.790655917428565e-08, "logits/chosen": -2.1006739139556885, "logits/rejected": -1.5606977939605713, "logps/chosen": -493.7810974121094, "logps/rejected": -634.1510009765625, "loss": 0.6786, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3173620104789734, "rewards/margins": 0.17867223918437958, "rewards/rejected": -0.4960342347621918, "step": 12350 }, { "epoch": 0.91, "learning_rate": 4.712230382286941e-08, "logits/chosen": -2.2758584022521973, "logits/rejected": -1.847110390663147, "logps/chosen": -552.2989501953125, "logps/rejected": -655.7720947265625, "loss": 0.6829, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3498491048812866, "rewards/margins": 0.13185501098632812, "rewards/rejected": -0.4817041754722595, "step": 12360 }, { "epoch": 0.91, "learning_rate": 4.634436597963176e-08, "logits/chosen": -2.06144380569458, "logits/rejected": -1.7734973430633545, "logps/chosen": -532.6207885742188, "logps/rejected": -674.7789306640625, "loss": 0.679, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3540617823600769, "rewards/margins": 0.16910326480865479, "rewards/rejected": -0.5231650471687317, "step": 12370 }, { "epoch": 0.91, "learning_rate": 4.557275080224232e-08, "logits/chosen": -1.8725783824920654, "logits/rejected": -1.4912055730819702, "logps/chosen": -554.1568603515625, "logps/rejected": -716.954833984375, "loss": 0.6774, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.37021172046661377, "rewards/margins": 0.19711796939373016, "rewards/rejected": -0.5673295855522156, "step": 12380 }, { "epoch": 0.91, "learning_rate": 4.480746340645258e-08, "logits/chosen": -2.1215012073516846, "logits/rejected": -1.6849712133407593, "logps/chosen": -617.5433349609375, "logps/rejected": -768.8670654296875, "loss": 0.6808, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4310119152069092, "rewards/margins": 0.19263164699077606, "rewards/rejected": -0.6236435174942017, "step": 12390 }, { "epoch": 0.91, "learning_rate": 4.404850886606093e-08, "logits/chosen": -2.16251802444458, "logits/rejected": -1.588786244392395, "logps/chosen": -493.6200256347656, "logps/rejected": -682.8429565429688, "loss": 0.6796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3486195206642151, "rewards/margins": 0.21529097855091095, "rewards/rejected": -0.5639104247093201, "step": 12400 }, { "epoch": 0.92, "learning_rate": 4.329589221287932e-08, "logits/chosen": -2.0078303813934326, "logits/rejected": -1.5909669399261475, "logps/chosen": -554.4102172851562, "logps/rejected": -691.4354248046875, "loss": 0.6799, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.39812731742858887, "rewards/margins": 0.16997286677360535, "rewards/rejected": -0.5681001543998718, "step": 12410 }, { "epoch": 0.92, "learning_rate": 4.254961843670024e-08, "logits/chosen": -2.0677542686462402, "logits/rejected": -1.4875930547714233, "logps/chosen": -492.14227294921875, "logps/rejected": -622.9188842773438, "loss": 0.6788, "rewards/accuracies": 0.75, "rewards/chosen": -0.37297675013542175, "rewards/margins": 0.17687606811523438, "rewards/rejected": -0.5498528480529785, "step": 12420 }, { "epoch": 0.92, "learning_rate": 4.180969248526334e-08, "logits/chosen": -2.0559403896331787, "logits/rejected": -1.647637963294983, "logps/chosen": -563.179931640625, "logps/rejected": -738.1256103515625, "loss": 0.6763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.4344251751899719, "rewards/margins": 0.19483225047588348, "rewards/rejected": -0.6292574405670166, "step": 12430 }, { "epoch": 0.92, "learning_rate": 4.107611926422228e-08, "logits/chosen": -2.0655243396759033, "logits/rejected": -1.6638034582138062, "logps/chosen": -483.3219299316406, "logps/rejected": -698.3675537109375, "loss": 0.6778, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3707185983657837, "rewards/margins": 0.2194487750530243, "rewards/rejected": -0.5901674032211304, "step": 12440 }, { "epoch": 0.92, "learning_rate": 4.0348903637113075e-08, "logits/chosen": -2.2623915672302246, "logits/rejected": -1.668949842453003, "logps/chosen": -539.6397705078125, "logps/rejected": -782.89306640625, "loss": 0.6798, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3115335702896118, "rewards/margins": 0.276858389377594, "rewards/rejected": -0.5883919596672058, "step": 12450 }, { "epoch": 0.92, "learning_rate": 3.9628050425321466e-08, "logits/chosen": -2.241410493850708, "logits/rejected": -1.6298377513885498, "logps/chosen": -435.259521484375, "logps/rejected": -653.3792724609375, "loss": 0.6773, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2522663474082947, "rewards/margins": 0.25332993268966675, "rewards/rejected": -0.5055962800979614, "step": 12460 }, { "epoch": 0.92, "learning_rate": 3.891356440805016e-08, "logits/chosen": -2.1635406017303467, "logits/rejected": -1.5163626670837402, "logps/chosen": -568.1689453125, "logps/rejected": -723.1339111328125, "loss": 0.6771, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.39934223890304565, "rewards/margins": 0.2077818214893341, "rewards/rejected": -0.6071240305900574, "step": 12470 }, { "epoch": 0.92, "learning_rate": 3.820545032228839e-08, "logits/chosen": -2.2672877311706543, "logits/rejected": -1.8073017597198486, "logps/chosen": -521.3099975585938, "logps/rejected": -659.9002685546875, "loss": 0.6825, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3549034297466278, "rewards/margins": 0.16230276226997375, "rewards/rejected": -0.5172062516212463, "step": 12480 }, { "epoch": 0.92, "learning_rate": 3.750371286277998e-08, "logits/chosen": -2.1741955280303955, "logits/rejected": -1.6443090438842773, "logps/chosen": -559.6614990234375, "logps/rejected": -797.0971069335938, "loss": 0.6791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4382163882255554, "rewards/margins": 0.2586824893951416, "rewards/rejected": -0.6968988180160522, "step": 12490 }, { "epoch": 0.92, "learning_rate": 3.6808356681991895e-08, "logits/chosen": -2.071382761001587, "logits/rejected": -1.7726924419403076, "logps/chosen": -479.6988830566406, "logps/rejected": -689.5442504882812, "loss": 0.6739, "rewards/accuracies": 0.75, "rewards/chosen": -0.3383418321609497, "rewards/margins": 0.20899935066699982, "rewards/rejected": -0.5473411083221436, "step": 12500 }, { "epoch": 0.92, "learning_rate": 3.611938639008372e-08, "logits/chosen": -1.979278326034546, "logits/rejected": -1.6839935779571533, "logps/chosen": -522.655029296875, "logps/rejected": -702.401611328125, "loss": 0.6799, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3733184337615967, "rewards/margins": 0.20117759704589844, "rewards/rejected": -0.5744960904121399, "step": 12510 }, { "epoch": 0.92, "learning_rate": 3.5436806554877466e-08, "logits/chosen": -2.159604549407959, "logits/rejected": -1.496917724609375, "logps/chosen": -516.35302734375, "logps/rejected": -771.8867797851562, "loss": 0.6809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.32883694767951965, "rewards/margins": 0.2792428731918335, "rewards/rejected": -0.6080797910690308, "step": 12520 }, { "epoch": 0.92, "learning_rate": 3.476062170182648e-08, "logits/chosen": -1.9358537197113037, "logits/rejected": -1.6329742670059204, "logps/chosen": -530.8876953125, "logps/rejected": -652.2896118164062, "loss": 0.6785, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3825833201408386, "rewards/margins": 0.16630962491035461, "rewards/rejected": -0.5488929152488708, "step": 12530 }, { "epoch": 0.92, "learning_rate": 3.4090836313985794e-08, "logits/chosen": -1.895594835281372, "logits/rejected": -1.4084235429763794, "logps/chosen": -552.0344848632812, "logps/rejected": -753.4379272460938, "loss": 0.6815, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4298694133758545, "rewards/margins": 0.19455775618553162, "rewards/rejected": -0.6244271993637085, "step": 12540 }, { "epoch": 0.93, "learning_rate": 3.342745483198295e-08, "logits/chosen": -1.9583213329315186, "logits/rejected": -1.2728601694107056, "logps/chosen": -592.2600708007812, "logps/rejected": -740.7923583984375, "loss": 0.6784, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.42338961362838745, "rewards/margins": 0.20841404795646667, "rewards/rejected": -0.6318036317825317, "step": 12550 }, { "epoch": 0.93, "learning_rate": 3.277048165398777e-08, "logits/chosen": -1.9481843709945679, "logits/rejected": -1.4089152812957764, "logps/chosen": -552.4390258789062, "logps/rejected": -720.6529541015625, "loss": 0.681, "rewards/accuracies": 0.75, "rewards/chosen": -0.3690037429332733, "rewards/margins": 0.20403249561786652, "rewards/rejected": -0.5730363130569458, "step": 12560 }, { "epoch": 0.93, "learning_rate": 3.2119921135683406e-08, "logits/chosen": -2.1274781227111816, "logits/rejected": -1.6278045177459717, "logps/chosen": -460.80828857421875, "logps/rejected": -628.8486938476562, "loss": 0.6753, "rewards/accuracies": 0.75, "rewards/chosen": -0.32486993074417114, "rewards/margins": 0.19036462903022766, "rewards/rejected": -0.5152345895767212, "step": 12570 }, { "epoch": 0.93, "learning_rate": 3.147577759023756e-08, "logits/chosen": -1.9793980121612549, "logits/rejected": -1.6847902536392212, "logps/chosen": -429.50469970703125, "logps/rejected": -524.2744750976562, "loss": 0.6831, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.2785450518131256, "rewards/margins": 0.12720152735710144, "rewards/rejected": -0.40574654936790466, "step": 12580 }, { "epoch": 0.93, "learning_rate": 3.083805528827421e-08, "logits/chosen": -2.0889806747436523, "logits/rejected": -1.5941998958587646, "logps/chosen": -576.1181640625, "logps/rejected": -743.1595458984375, "loss": 0.6734, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4244110584259033, "rewards/margins": 0.1927516907453537, "rewards/rejected": -0.6171627044677734, "step": 12590 }, { "epoch": 0.93, "learning_rate": 3.02067584578446e-08, "logits/chosen": -2.1686670780181885, "logits/rejected": -1.6382805109024048, "logps/chosen": -572.9634399414062, "logps/rejected": -738.1161499023438, "loss": 0.683, "rewards/accuracies": 0.75, "rewards/chosen": -0.3839063048362732, "rewards/margins": 0.20713265240192413, "rewards/rejected": -0.5910389423370361, "step": 12600 }, { "epoch": 0.93, "learning_rate": 2.9581891284399585e-08, "logits/chosen": -1.927865743637085, "logits/rejected": -1.573277235031128, "logps/chosen": -447.3641052246094, "logps/rejected": -626.9553833007812, "loss": 0.6772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31555140018463135, "rewards/margins": 0.2104417085647583, "rewards/rejected": -0.5259930491447449, "step": 12610 }, { "epoch": 0.93, "learning_rate": 2.896345791076238e-08, "logits/chosen": -2.187253952026367, "logits/rejected": -1.837829828262329, "logps/chosen": -487.6934509277344, "logps/rejected": -662.4732666015625, "loss": 0.6764, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.34798118472099304, "rewards/margins": 0.19764915108680725, "rewards/rejected": -0.5456303358078003, "step": 12620 }, { "epoch": 0.93, "learning_rate": 2.8351462437099737e-08, "logits/chosen": -1.83749520778656, "logits/rejected": -1.4956713914871216, "logps/chosen": -572.5322265625, "logps/rejected": -725.2957763671875, "loss": 0.6808, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.36789634823799133, "rewards/margins": 0.21386978030204773, "rewards/rejected": -0.5817661285400391, "step": 12630 }, { "epoch": 0.93, "learning_rate": 2.7745908920896344e-08, "logits/chosen": -2.258476734161377, "logits/rejected": -1.7427030801773071, "logps/chosen": -528.8125610351562, "logps/rejected": -626.0385131835938, "loss": 0.6838, "rewards/accuracies": 0.625, "rewards/chosen": -0.3671940267086029, "rewards/margins": 0.13916368782520294, "rewards/rejected": -0.506357729434967, "step": 12640 }, { "epoch": 0.93, "learning_rate": 2.7146801376927042e-08, "logits/chosen": -2.0235025882720947, "logits/rejected": -1.687343955039978, "logps/chosen": -497.86920166015625, "logps/rejected": -705.7901000976562, "loss": 0.6787, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3240594267845154, "rewards/margins": 0.18785949051380157, "rewards/rejected": -0.5119189023971558, "step": 12650 }, { "epoch": 0.93, "learning_rate": 2.6554143777230087e-08, "logits/chosen": -2.2414331436157227, "logits/rejected": -1.699150800704956, "logps/chosen": -575.0064086914062, "logps/rejected": -695.8781127929688, "loss": 0.6816, "rewards/accuracies": 0.625, "rewards/chosen": -0.4134212136268616, "rewards/margins": 0.17178848385810852, "rewards/rejected": -0.5852096676826477, "step": 12660 }, { "epoch": 0.93, "learning_rate": 2.596794005108127e-08, "logits/chosen": -2.2548136711120605, "logits/rejected": -1.5168583393096924, "logps/chosen": -531.1048583984375, "logps/rejected": -714.004150390625, "loss": 0.6714, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3167818486690521, "rewards/margins": 0.25903576612472534, "rewards/rejected": -0.5758176445960999, "step": 12670 }, { "epoch": 0.94, "learning_rate": 2.5388194084967507e-08, "logits/chosen": -2.135685443878174, "logits/rejected": -1.5943740606307983, "logps/chosen": -538.8492431640625, "logps/rejected": -742.7847900390625, "loss": 0.6664, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3877369165420532, "rewards/margins": 0.23145660758018494, "rewards/rejected": -0.6191935539245605, "step": 12680 }, { "epoch": 0.94, "learning_rate": 2.4814909722561507e-08, "logits/chosen": -2.0374417304992676, "logits/rejected": -1.6286847591400146, "logps/chosen": -502.6419372558594, "logps/rejected": -633.6027221679688, "loss": 0.6812, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3407842814922333, "rewards/margins": 0.16493551433086395, "rewards/rejected": -0.505719780921936, "step": 12690 }, { "epoch": 0.94, "learning_rate": 2.4248090764696026e-08, "logits/chosen": -2.157000780105591, "logits/rejected": -1.466365098953247, "logps/chosen": -517.0516967773438, "logps/rejected": -703.5821533203125, "loss": 0.6817, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3473134934902191, "rewards/margins": 0.2371932715177536, "rewards/rejected": -0.5845068097114563, "step": 12700 }, { "epoch": 0.94, "learning_rate": 2.3687740969338344e-08, "logits/chosen": -2.070208787918091, "logits/rejected": -1.601485013961792, "logps/chosen": -513.3284912109375, "logps/rejected": -676.622314453125, "loss": 0.676, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.38996586203575134, "rewards/margins": 0.18909959495067596, "rewards/rejected": -0.5790654420852661, "step": 12710 }, { "epoch": 0.94, "learning_rate": 2.313386405156592e-08, "logits/chosen": -2.195650100708008, "logits/rejected": -1.4931819438934326, "logps/chosen": -524.8739013671875, "logps/rejected": -695.8502197265625, "loss": 0.674, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3272765278816223, "rewards/margins": 0.23153066635131836, "rewards/rejected": -0.5588072538375854, "step": 12720 }, { "epoch": 0.94, "learning_rate": 2.258646368354167e-08, "logits/chosen": -2.4498372077941895, "logits/rejected": -1.9745458364486694, "logps/chosen": -521.4008178710938, "logps/rejected": -753.57861328125, "loss": 0.6724, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3431427478790283, "rewards/margins": 0.24590440094470978, "rewards/rejected": -0.5890471339225769, "step": 12730 }, { "epoch": 0.94, "learning_rate": 2.2045543494488948e-08, "logits/chosen": -2.2771286964416504, "logits/rejected": -1.9875965118408203, "logps/chosen": -530.1380004882812, "logps/rejected": -633.6187133789062, "loss": 0.6826, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3473342955112457, "rewards/margins": 0.15040257573127747, "rewards/rejected": -0.4977368712425232, "step": 12740 }, { "epoch": 0.94, "learning_rate": 2.151110707066872e-08, "logits/chosen": -2.2616169452667236, "logits/rejected": -1.5658981800079346, "logps/chosen": -470.3023376464844, "logps/rejected": -686.09375, "loss": 0.672, "rewards/accuracies": 0.875, "rewards/chosen": -0.3087359368801117, "rewards/margins": 0.24562890827655792, "rewards/rejected": -0.5543648600578308, "step": 12750 }, { "epoch": 0.94, "learning_rate": 2.0983157955354436e-08, "logits/chosen": -2.1398589611053467, "logits/rejected": -1.7079366445541382, "logps/chosen": -482.7460021972656, "logps/rejected": -622.2957153320312, "loss": 0.6849, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3170104920864105, "rewards/margins": 0.1619282364845276, "rewards/rejected": -0.4789387583732605, "step": 12760 }, { "epoch": 0.94, "learning_rate": 2.0461699648809395e-08, "logits/chosen": -2.054098606109619, "logits/rejected": -1.3973968029022217, "logps/chosen": -557.1055908203125, "logps/rejected": -714.4132080078125, "loss": 0.6787, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.36537691950798035, "rewards/margins": 0.20077764987945557, "rewards/rejected": -0.5661545395851135, "step": 12770 }, { "epoch": 0.94, "learning_rate": 1.9946735608263543e-08, "logits/chosen": -2.2132568359375, "logits/rejected": -1.5564374923706055, "logps/chosen": -612.535888671875, "logps/rejected": -743.5823974609375, "loss": 0.6781, "rewards/accuracies": 0.875, "rewards/chosen": -0.35540542006492615, "rewards/margins": 0.22272129356861115, "rewards/rejected": -0.5781267881393433, "step": 12780 }, { "epoch": 0.94, "learning_rate": 1.943826924789005e-08, "logits/chosen": -1.9563963413238525, "logits/rejected": -1.4060355424880981, "logps/chosen": -582.0482177734375, "logps/rejected": -719.01123046875, "loss": 0.6753, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4459984302520752, "rewards/margins": 0.1605527251958847, "rewards/rejected": -0.6065511107444763, "step": 12790 }, { "epoch": 0.94, "learning_rate": 1.893630393878287e-08, "logits/chosen": -2.168118715286255, "logits/rejected": -1.805616021156311, "logps/chosen": -463.6773986816406, "logps/rejected": -598.6619873046875, "loss": 0.681, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3253606855869293, "rewards/margins": 0.1589312106370926, "rewards/rejected": -0.48429185152053833, "step": 12800 }, { "epoch": 0.94, "learning_rate": 1.844084300893456e-08, "logits/chosen": -2.198554515838623, "logits/rejected": -1.547837257385254, "logps/chosen": -514.910400390625, "logps/rejected": -751.1119384765625, "loss": 0.671, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3264197111129761, "rewards/margins": 0.24178962409496307, "rewards/rejected": -0.5682093501091003, "step": 12810 }, { "epoch": 0.95, "learning_rate": 1.79518897432146e-08, "logits/chosen": -1.8074119091033936, "logits/rejected": -1.3799957036972046, "logps/chosen": -438.73699951171875, "logps/rejected": -635.436767578125, "loss": 0.6753, "rewards/accuracies": 0.75, "rewards/chosen": -0.2924322187900543, "rewards/margins": 0.22457607090473175, "rewards/rejected": -0.5170083045959473, "step": 12820 }, { "epoch": 0.95, "learning_rate": 1.7469447383346548e-08, "logits/chosen": -2.0501086711883545, "logits/rejected": -1.7211850881576538, "logps/chosen": -525.2173461914062, "logps/rejected": -678.3775634765625, "loss": 0.6767, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3501201868057251, "rewards/margins": 0.18896783888339996, "rewards/rejected": -0.5390880107879639, "step": 12830 }, { "epoch": 0.95, "learning_rate": 1.6993519127887157e-08, "logits/chosen": -2.0252304077148438, "logits/rejected": -1.523231029510498, "logps/chosen": -574.5531616210938, "logps/rejected": -738.9405517578125, "loss": 0.6828, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3965558409690857, "rewards/margins": 0.1902516782283783, "rewards/rejected": -0.5868075489997864, "step": 12840 }, { "epoch": 0.95, "learning_rate": 1.6524108132205618e-08, "logits/chosen": -2.278812885284424, "logits/rejected": -1.6600151062011719, "logps/chosen": -497.78314208984375, "logps/rejected": -605.1187744140625, "loss": 0.6858, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3145557940006256, "rewards/margins": 0.17666950821876526, "rewards/rejected": -0.49122530221939087, "step": 12850 }, { "epoch": 0.95, "learning_rate": 1.6061217508461676e-08, "logits/chosen": -2.21195387840271, "logits/rejected": -1.6285545825958252, "logps/chosen": -528.1466064453125, "logps/rejected": -685.7601928710938, "loss": 0.6817, "rewards/accuracies": 0.75, "rewards/chosen": -0.36646515130996704, "rewards/margins": 0.17061969637870789, "rewards/rejected": -0.5370848774909973, "step": 12860 }, { "epoch": 0.95, "learning_rate": 1.5604850325585672e-08, "logits/chosen": -2.0487258434295654, "logits/rejected": -1.4973214864730835, "logps/chosen": -565.08544921875, "logps/rejected": -717.5864868164062, "loss": 0.6796, "rewards/accuracies": 0.75, "rewards/chosen": -0.42035239934921265, "rewards/margins": 0.20494237542152405, "rewards/rejected": -0.6252948045730591, "step": 12870 }, { "epoch": 0.95, "learning_rate": 1.5155009609258195e-08, "logits/chosen": -2.013899564743042, "logits/rejected": -1.6389163732528687, "logps/chosen": -545.1976928710938, "logps/rejected": -696.5335693359375, "loss": 0.6766, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3495752811431885, "rewards/margins": 0.20340006053447723, "rewards/rejected": -0.5529753565788269, "step": 12880 }, { "epoch": 0.95, "learning_rate": 1.4711698341889567e-08, "logits/chosen": -2.174809217453003, "logits/rejected": -1.387448787689209, "logps/chosen": -580.315673828125, "logps/rejected": -702.5577392578125, "loss": 0.6743, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40892356634140015, "rewards/margins": 0.17046651244163513, "rewards/rejected": -0.5793901085853577, "step": 12890 }, { "epoch": 0.95, "learning_rate": 1.4274919462600621e-08, "logits/chosen": -2.0471010208129883, "logits/rejected": -1.5250636339187622, "logps/chosen": -570.1788940429688, "logps/rejected": -748.491943359375, "loss": 0.6774, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3776026666164398, "rewards/margins": 0.24097545444965363, "rewards/rejected": -0.6185781359672546, "step": 12900 }, { "epoch": 0.95, "learning_rate": 1.3844675867202726e-08, "logits/chosen": -2.105715036392212, "logits/rejected": -1.6746994256973267, "logps/chosen": -588.1861572265625, "logps/rejected": -692.4296875, "loss": 0.6804, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3868919312953949, "rewards/margins": 0.14658674597740173, "rewards/rejected": -0.5334786176681519, "step": 12910 }, { "epoch": 0.95, "learning_rate": 1.342097040817891e-08, "logits/chosen": -2.0214037895202637, "logits/rejected": -1.80129873752594, "logps/chosen": -630.8421630859375, "logps/rejected": -766.2052001953125, "loss": 0.6852, "rewards/accuracies": 0.625, "rewards/chosen": -0.4612027108669281, "rewards/margins": 0.126323863863945, "rewards/rejected": -0.5875265002250671, "step": 12920 }, { "epoch": 0.95, "learning_rate": 1.3003805894664766e-08, "logits/chosen": -2.28585147857666, "logits/rejected": -1.9301602840423584, "logps/chosen": -459.36981201171875, "logps/rejected": -623.2440185546875, "loss": 0.6793, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.333972305059433, "rewards/margins": 0.16807682812213898, "rewards/rejected": -0.5020490884780884, "step": 12930 }, { "epoch": 0.95, "learning_rate": 1.2593185092430014e-08, "logits/chosen": -2.067272663116455, "logits/rejected": -1.4308422803878784, "logps/chosen": -463.6817321777344, "logps/rejected": -685.2967529296875, "loss": 0.6702, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.31624871492385864, "rewards/margins": 0.24173979461193085, "rewards/rejected": -0.5579885244369507, "step": 12940 }, { "epoch": 0.96, "learning_rate": 1.2189110723859975e-08, "logits/chosen": -2.0791008472442627, "logits/rejected": -1.6438226699829102, "logps/chosen": -546.7664794921875, "logps/rejected": -670.3087768554688, "loss": 0.6816, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3831826448440552, "rewards/margins": 0.15285071730613708, "rewards/rejected": -0.5360333919525146, "step": 12950 }, { "epoch": 0.96, "learning_rate": 1.1791585467937681e-08, "logits/chosen": -2.1984708309173584, "logits/rejected": -1.7759685516357422, "logps/chosen": -545.2265014648438, "logps/rejected": -618.04150390625, "loss": 0.6856, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.3760373592376709, "rewards/margins": 0.11941524595022202, "rewards/rejected": -0.4954525828361511, "step": 12960 }, { "epoch": 0.96, "learning_rate": 1.1400611960225903e-08, "logits/chosen": -2.0263333320617676, "logits/rejected": -1.7157186269760132, "logps/chosen": -465.2334899902344, "logps/rejected": -633.6257934570312, "loss": 0.6757, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.34207168221473694, "rewards/margins": 0.18688231706619263, "rewards/rejected": -0.5289539694786072, "step": 12970 }, { "epoch": 0.96, "learning_rate": 1.101619279284982e-08, "logits/chosen": -2.0944366455078125, "logits/rejected": -1.6510741710662842, "logps/chosen": -520.9305419921875, "logps/rejected": -655.0965576171875, "loss": 0.6781, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3474411964416504, "rewards/margins": 0.17597565054893494, "rewards/rejected": -0.5234168767929077, "step": 12980 }, { "epoch": 0.96, "learning_rate": 1.0638330514480154e-08, "logits/chosen": -2.06375789642334, "logits/rejected": -1.603838562965393, "logps/chosen": -522.8187255859375, "logps/rejected": -705.0101318359375, "loss": 0.6776, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.39177557826042175, "rewards/margins": 0.18743351101875305, "rewards/rejected": -0.5792091488838196, "step": 12990 }, { "epoch": 0.96, "learning_rate": 1.0267027630315394e-08, "logits/chosen": -2.1107468605041504, "logits/rejected": -1.8384116888046265, "logps/chosen": -572.8711547851562, "logps/rejected": -644.3701782226562, "loss": 0.6826, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3919777274131775, "rewards/margins": 0.12720008194446564, "rewards/rejected": -0.5191778540611267, "step": 13000 }, { "epoch": 0.96, "learning_rate": 9.902286602066157e-09, "logits/chosen": -2.206927537918091, "logits/rejected": -1.3890177011489868, "logps/chosen": -589.2716064453125, "logps/rejected": -767.7894897460938, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40268659591674805, "rewards/margins": 0.251761257648468, "rewards/rejected": -0.6544478535652161, "step": 13010 }, { "epoch": 0.96, "learning_rate": 9.544109847938297e-09, "logits/chosen": -2.139653205871582, "logits/rejected": -1.6275535821914673, "logps/chosen": -538.1956787109375, "logps/rejected": -707.6997680664062, "loss": 0.6781, "rewards/accuracies": 0.75, "rewards/chosen": -0.37297746539115906, "rewards/margins": 0.20395520329475403, "rewards/rejected": -0.5769327282905579, "step": 13020 }, { "epoch": 0.96, "learning_rate": 9.192499742617044e-09, "logits/chosen": -2.323024034500122, "logits/rejected": -1.690679907798767, "logps/chosen": -456.1119689941406, "logps/rejected": -651.5538330078125, "loss": 0.6742, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.2678610384464264, "rewards/margins": 0.23654837906360626, "rewards/rejected": -0.5044094324111938, "step": 13030 }, { "epoch": 0.96, "learning_rate": 8.847458617251113e-09, "logits/chosen": -2.241178035736084, "logits/rejected": -1.7196604013442993, "logps/chosen": -550.4908447265625, "logps/rejected": -692.8678588867188, "loss": 0.6747, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3878917694091797, "rewards/margins": 0.18152369558811188, "rewards/rejected": -0.5694154500961304, "step": 13040 }, { "epoch": 0.96, "learning_rate": 8.508988759437619e-09, "logits/chosen": -2.16300630569458, "logits/rejected": -1.5461496114730835, "logps/chosen": -514.677490234375, "logps/rejected": -672.8048706054688, "loss": 0.6814, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3211614489555359, "rewards/margins": 0.207925483584404, "rewards/rejected": -0.5290868878364563, "step": 13050 }, { "epoch": 0.96, "learning_rate": 8.17709241320641e-09, "logits/chosen": -2.1075925827026367, "logits/rejected": -1.5794203281402588, "logps/chosen": -471.3956604003906, "logps/rejected": -664.6893310546875, "loss": 0.6755, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.3111240267753601, "rewards/margins": 0.22349794209003448, "rewards/rejected": -0.5346218943595886, "step": 13060 }, { "epoch": 0.96, "learning_rate": 7.851771779005534e-09, "logits/chosen": -1.936983346939087, "logits/rejected": -1.6087884902954102, "logps/chosen": -507.13763427734375, "logps/rejected": -719.4133911132812, "loss": 0.6776, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.35914814472198486, "rewards/margins": 0.2318972647190094, "rewards/rejected": -0.5910454392433167, "step": 13070 }, { "epoch": 0.96, "learning_rate": 7.533029013686687e-09, "logits/chosen": -2.024181365966797, "logits/rejected": -1.8280832767486572, "logps/chosen": -558.2418823242188, "logps/rejected": -710.1409912109375, "loss": 0.6809, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4209212362766266, "rewards/margins": 0.1445935070514679, "rewards/rejected": -0.5655147433280945, "step": 13080 }, { "epoch": 0.97, "learning_rate": 7.220866230490563e-09, "logits/chosen": -2.16593599319458, "logits/rejected": -1.3607076406478882, "logps/chosen": -571.2879638671875, "logps/rejected": -770.9668579101562, "loss": 0.6744, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.37986326217651367, "rewards/margins": 0.2637863755226135, "rewards/rejected": -0.643649697303772, "step": 13090 }, { "epoch": 0.97, "learning_rate": 6.915285499033307e-09, "logits/chosen": -2.0683255195617676, "logits/rejected": -1.381437063217163, "logps/chosen": -578.5155029296875, "logps/rejected": -755.6702880859375, "loss": 0.6715, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.44301700592041016, "rewards/margins": 0.23578095436096191, "rewards/rejected": -0.6787979006767273, "step": 13100 }, { "epoch": 0.97, "learning_rate": 6.616288845292195e-09, "logits/chosen": -1.9640045166015625, "logits/rejected": -1.5896854400634766, "logps/chosen": -517.5831298828125, "logps/rejected": -716.9608154296875, "loss": 0.6712, "rewards/accuracies": 0.75, "rewards/chosen": -0.37045198678970337, "rewards/margins": 0.2118285447359085, "rewards/rejected": -0.5822805166244507, "step": 13110 }, { "epoch": 0.97, "learning_rate": 6.323878251592973e-09, "logits/chosen": -2.003796100616455, "logits/rejected": -1.6037700176239014, "logps/chosen": -439.34417724609375, "logps/rejected": -608.4082641601562, "loss": 0.6755, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30554118752479553, "rewards/margins": 0.18911051750183105, "rewards/rejected": -0.494651734828949, "step": 13120 }, { "epoch": 0.97, "learning_rate": 6.038055656595986e-09, "logits/chosen": -2.1676759719848633, "logits/rejected": -1.4091155529022217, "logps/chosen": -583.8790283203125, "logps/rejected": -685.0205078125, "loss": 0.6814, "rewards/accuracies": 0.75, "rewards/chosen": -0.40804988145828247, "rewards/margins": 0.1627126932144165, "rewards/rejected": -0.570762574672699, "step": 13130 }, { "epoch": 0.97, "learning_rate": 5.758822955283849e-09, "logits/chosen": -2.145406723022461, "logits/rejected": -1.40517258644104, "logps/chosen": -502.626220703125, "logps/rejected": -661.7965698242188, "loss": 0.6811, "rewards/accuracies": 0.75, "rewards/chosen": -0.37643760442733765, "rewards/margins": 0.20693211257457733, "rewards/rejected": -0.5833696722984314, "step": 13140 }, { "epoch": 0.97, "learning_rate": 5.4861819989485695e-09, "logits/chosen": -2.1601741313934326, "logits/rejected": -1.7256304025650024, "logps/chosen": -523.087158203125, "logps/rejected": -649.1605224609375, "loss": 0.6797, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3833433985710144, "rewards/margins": 0.1486581563949585, "rewards/rejected": -0.5320014953613281, "step": 13150 }, { "epoch": 0.97, "learning_rate": 5.2201345951793375e-09, "logits/chosen": -2.4166359901428223, "logits/rejected": -1.7424606084823608, "logps/chosen": -497.49017333984375, "logps/rejected": -649.4292602539062, "loss": 0.6745, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.28180932998657227, "rewards/margins": 0.2306872308254242, "rewards/rejected": -0.5124965906143188, "step": 13160 }, { "epoch": 0.97, "learning_rate": 4.960682507850644e-09, "logits/chosen": -1.972560167312622, "logits/rejected": -1.4791983366012573, "logps/chosen": -634.6946411132812, "logps/rejected": -814.3884887695312, "loss": 0.6817, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4519330859184265, "rewards/margins": 0.23733659088611603, "rewards/rejected": -0.6892696619033813, "step": 13170 }, { "epoch": 0.97, "learning_rate": 4.707827457110624e-09, "logits/chosen": -2.1587576866149902, "logits/rejected": -1.7504053115844727, "logps/chosen": -470.0223693847656, "logps/rejected": -628.1024169921875, "loss": 0.6791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.30867791175842285, "rewards/margins": 0.19097523391246796, "rewards/rejected": -0.499653160572052, "step": 13180 }, { "epoch": 0.97, "learning_rate": 4.461571119369179e-09, "logits/chosen": -2.2616684436798096, "logits/rejected": -1.623813271522522, "logps/chosen": -457.1973571777344, "logps/rejected": -608.0726318359375, "loss": 0.6767, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2864590585231781, "rewards/margins": 0.20148620009422302, "rewards/rejected": -0.48794522881507874, "step": 13190 }, { "epoch": 0.97, "learning_rate": 4.221915127287756e-09, "logits/chosen": -1.9374793767929077, "logits/rejected": -1.4849828481674194, "logps/chosen": -597.2499389648438, "logps/rejected": -809.9119873046875, "loss": 0.6769, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4175606667995453, "rewards/margins": 0.23499807715415955, "rewards/rejected": -0.6525587439537048, "step": 13200 }, { "epoch": 0.97, "learning_rate": 3.988861069767591e-09, "logits/chosen": -2.2414326667785645, "logits/rejected": -1.7484687566757202, "logps/chosen": -441.80255126953125, "logps/rejected": -582.4473876953125, "loss": 0.681, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.26358121633529663, "rewards/margins": 0.18809624016284943, "rewards/rejected": -0.45167747139930725, "step": 13210 }, { "epoch": 0.98, "learning_rate": 3.762410491939816e-09, "logits/chosen": -1.9667119979858398, "logits/rejected": -1.689192771911621, "logps/chosen": -540.9521484375, "logps/rejected": -665.221923828125, "loss": 0.683, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.42336344718933105, "rewards/margins": 0.1426069736480713, "rewards/rejected": -0.5659704208374023, "step": 13220 }, { "epoch": 0.98, "learning_rate": 3.5425648951549204e-09, "logits/chosen": -2.0041444301605225, "logits/rejected": -1.2987147569656372, "logps/chosen": -547.6778564453125, "logps/rejected": -748.8751220703125, "loss": 0.6719, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.3950062692165375, "rewards/margins": 0.26244789361953735, "rewards/rejected": -0.6574541330337524, "step": 13230 }, { "epoch": 0.98, "learning_rate": 3.3293257369726436e-09, "logits/chosen": -1.8233144283294678, "logits/rejected": -1.5602797269821167, "logps/chosen": -510.71759033203125, "logps/rejected": -690.426025390625, "loss": 0.6797, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36618655920028687, "rewards/margins": 0.18110911548137665, "rewards/rejected": -0.5472956895828247, "step": 13240 }, { "epoch": 0.98, "learning_rate": 3.1226944311528724e-09, "logits/chosen": -1.8793814182281494, "logits/rejected": -1.4941141605377197, "logps/chosen": -575.6840209960938, "logps/rejected": -758.1213989257812, "loss": 0.6806, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.4564196467399597, "rewards/margins": 0.19512155652046204, "rewards/rejected": -0.6515413522720337, "step": 13250 }, { "epoch": 0.98, "learning_rate": 2.9226723476458714e-09, "logits/chosen": -2.047565460205078, "logits/rejected": -1.5422642230987549, "logps/chosen": -609.6829223632812, "logps/rejected": -737.3355712890625, "loss": 0.6789, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4010292589664459, "rewards/margins": 0.17936114966869354, "rewards/rejected": -0.580390453338623, "step": 13260 }, { "epoch": 0.98, "learning_rate": 2.7292608125830673e-09, "logits/chosen": -1.9535789489746094, "logits/rejected": -1.6642125844955444, "logps/chosen": -595.6658935546875, "logps/rejected": -761.8668212890625, "loss": 0.6744, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42665472626686096, "rewards/margins": 0.18317101895809174, "rewards/rejected": -0.6098257303237915, "step": 13270 }, { "epoch": 0.98, "learning_rate": 2.542461108268501e-09, "logits/chosen": -1.931683897972107, "logits/rejected": -1.3814071416854858, "logps/chosen": -555.2284545898438, "logps/rejected": -702.8944091796875, "loss": 0.6783, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.40477991104125977, "rewards/margins": 0.1771407127380371, "rewards/rejected": -0.5819206237792969, "step": 13280 }, { "epoch": 0.98, "learning_rate": 2.3622744731702783e-09, "logits/chosen": -1.7865186929702759, "logits/rejected": -1.2639232873916626, "logps/chosen": -625.0459594726562, "logps/rejected": -802.7432861328125, "loss": 0.6744, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.49223440885543823, "rewards/margins": 0.19496117532253265, "rewards/rejected": -0.6871955990791321, "step": 13290 }, { "epoch": 0.98, "learning_rate": 2.188702101912465e-09, "logits/chosen": -1.8762435913085938, "logits/rejected": -1.5491892099380493, "logps/chosen": -615.9432373046875, "logps/rejected": -756.2877197265625, "loss": 0.6806, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4746759533882141, "rewards/margins": 0.14187446236610413, "rewards/rejected": -0.6165503859519958, "step": 13300 }, { "epoch": 0.98, "learning_rate": 2.0217451452669843e-09, "logits/chosen": -2.022794008255005, "logits/rejected": -1.38359797000885, "logps/chosen": -541.3167724609375, "logps/rejected": -664.4022827148438, "loss": 0.6765, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.34366899728775024, "rewards/margins": 0.17721763253211975, "rewards/rejected": -0.5208866596221924, "step": 13310 }, { "epoch": 0.98, "learning_rate": 1.8614047101459528e-09, "logits/chosen": -2.162581205368042, "logits/rejected": -1.8075227737426758, "logps/chosen": -511.04229736328125, "logps/rejected": -686.2706909179688, "loss": 0.6805, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32198280096054077, "rewards/margins": 0.21019263565540314, "rewards/rejected": -0.5321754217147827, "step": 13320 }, { "epoch": 0.98, "learning_rate": 1.7076818595944675e-09, "logits/chosen": -2.095569610595703, "logits/rejected": -1.5508904457092285, "logps/chosen": -527.4337768554688, "logps/rejected": -690.9654541015625, "loss": 0.6761, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3786025047302246, "rewards/margins": 0.1883591264486313, "rewards/rejected": -0.5669616460800171, "step": 13330 }, { "epoch": 0.98, "learning_rate": 1.5605776127837201e-09, "logits/chosen": -1.916803002357483, "logits/rejected": -1.3846256732940674, "logps/chosen": -496.59759521484375, "logps/rejected": -664.5631713867188, "loss": 0.6758, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3698965013027191, "rewards/margins": 0.19248731434345245, "rewards/rejected": -0.562383770942688, "step": 13340 }, { "epoch": 0.98, "learning_rate": 1.4200929450037812e-09, "logits/chosen": -2.2147014141082764, "logits/rejected": -1.690588355064392, "logps/chosen": -511.4840393066406, "logps/rejected": -653.356689453125, "loss": 0.6799, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3197299838066101, "rewards/margins": 0.18700823187828064, "rewards/rejected": -0.5067383050918579, "step": 13350 }, { "epoch": 0.99, "learning_rate": 1.286228787657606e-09, "logits/chosen": -2.1606364250183105, "logits/rejected": -1.508656620979309, "logps/chosen": -543.90966796875, "logps/rejected": -703.4929809570312, "loss": 0.6783, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.35477083921432495, "rewards/margins": 0.2113669365644455, "rewards/rejected": -0.5661377906799316, "step": 13360 }, { "epoch": 0.99, "learning_rate": 1.1589860282545937e-09, "logits/chosen": -2.0532193183898926, "logits/rejected": -1.387063980102539, "logps/chosen": -488.90460205078125, "logps/rejected": -766.166015625, "loss": 0.6751, "rewards/accuracies": 0.75, "rewards/chosen": -0.2874642014503479, "rewards/margins": 0.3175165057182312, "rewards/rejected": -0.6049806475639343, "step": 13370 }, { "epoch": 0.99, "learning_rate": 1.038365510404815e-09, "logits/chosen": -2.1224310398101807, "logits/rejected": -1.9362611770629883, "logps/chosen": -548.8182983398438, "logps/rejected": -726.0052490234375, "loss": 0.6821, "rewards/accuracies": 0.625, "rewards/chosen": -0.35985809564590454, "rewards/margins": 0.13877247273921967, "rewards/rejected": -0.4986305832862854, "step": 13380 }, { "epoch": 0.99, "learning_rate": 9.243680338132387e-10, "logits/chosen": -2.0732409954071045, "logits/rejected": -1.223917007446289, "logps/chosen": -526.8265380859375, "logps/rejected": -730.7329711914062, "loss": 0.6724, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.3784313201904297, "rewards/margins": 0.2703152298927307, "rewards/rejected": -0.6487466096878052, "step": 13390 }, { "epoch": 0.99, "learning_rate": 8.169943542746249e-10, "logits/chosen": -1.8368756771087646, "logits/rejected": -1.5230515003204346, "logps/chosen": -562.7351684570312, "logps/rejected": -699.3673095703125, "loss": 0.6784, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.393062561750412, "rewards/margins": 0.16774524748325348, "rewards/rejected": -0.5608077645301819, "step": 13400 }, { "epoch": 0.99, "learning_rate": 7.162451836685291e-10, "logits/chosen": -1.8111841678619385, "logits/rejected": -1.4022225141525269, "logps/chosen": -611.3531494140625, "logps/rejected": -734.1405029296875, "loss": 0.6793, "rewards/accuracies": 0.75, "rewards/chosen": -0.4026584029197693, "rewards/margins": 0.20177336037158966, "rewards/rejected": -0.6044317483901978, "step": 13410 }, { "epoch": 0.99, "learning_rate": 6.221211899545276e-10, "logits/chosen": -2.2148821353912354, "logits/rejected": -1.8691234588623047, "logps/chosen": -509.396728515625, "logps/rejected": -607.8479614257812, "loss": 0.6821, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.32009974122047424, "rewards/margins": 0.17420199513435364, "rewards/rejected": -0.4943017065525055, "step": 13420 }, { "epoch": 0.99, "learning_rate": 5.346229971675553e-10, "logits/chosen": -2.0127973556518555, "logits/rejected": -1.5307101011276245, "logps/chosen": -584.527587890625, "logps/rejected": -741.5817260742188, "loss": 0.675, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3921803832054138, "rewards/margins": 0.2079574316740036, "rewards/rejected": -0.6001377701759338, "step": 13430 }, { "epoch": 0.99, "learning_rate": 4.5375118541413073e-10, "logits/chosen": -1.9897279739379883, "logits/rejected": -1.5283173322677612, "logps/chosen": -508.90057373046875, "logps/rejected": -701.4889526367188, "loss": 0.6741, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.35640522837638855, "rewards/margins": 0.21547165513038635, "rewards/rejected": -0.5718768835067749, "step": 13440 }, { "epoch": 0.99, "learning_rate": 3.7950629086835883e-10, "logits/chosen": -2.139770984649658, "logits/rejected": -1.6776905059814453, "logps/chosen": -541.8732299804688, "logps/rejected": -714.386962890625, "loss": 0.6714, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4230429232120514, "rewards/margins": 0.18228723108768463, "rewards/rejected": -0.6053301095962524, "step": 13450 }, { "epoch": 0.99, "learning_rate": 3.1188880576848987e-10, "logits/chosen": -1.9445394277572632, "logits/rejected": -1.4908473491668701, "logps/chosen": -562.7593383789062, "logps/rejected": -773.4583129882812, "loss": 0.668, "rewards/accuracies": 0.875, "rewards/chosen": -0.3930773138999939, "rewards/margins": 0.2565082013607025, "rewards/rejected": -0.649585485458374, "step": 13460 }, { "epoch": 0.99, "learning_rate": 2.5089917841325526e-10, "logits/chosen": -2.05952787399292, "logits/rejected": -1.6279888153076172, "logps/chosen": -564.8178100585938, "logps/rejected": -665.5748291015625, "loss": 0.6815, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40282002091407776, "rewards/margins": 0.16588379442691803, "rewards/rejected": -0.5687037706375122, "step": 13470 }, { "epoch": 0.99, "learning_rate": 1.9653781315931429e-10, "logits/chosen": -1.9622747898101807, "logits/rejected": -1.6166906356811523, "logps/chosen": -573.0601806640625, "logps/rejected": -684.2724609375, "loss": 0.6781, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4461306631565094, "rewards/margins": 0.13631372153759003, "rewards/rejected": -0.5824443101882935, "step": 13480 }, { "epoch": 1.0, "learning_rate": 1.4880507041870048e-10, "logits/chosen": -1.897928237915039, "logits/rejected": -1.4916741847991943, "logps/chosen": -508.88885498046875, "logps/rejected": -600.0769653320312, "loss": 0.6755, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3285246193408966, "rewards/margins": 0.16100214421749115, "rewards/rejected": -0.4895266890525818, "step": 13490 }, { "epoch": 1.0, "learning_rate": 1.0770126665571311e-10, "logits/chosen": -2.2393720149993896, "logits/rejected": -1.9091227054595947, "logps/chosen": -530.4959716796875, "logps/rejected": -627.6652221679688, "loss": 0.6802, "rewards/accuracies": 0.625, "rewards/chosen": -0.36690476536750793, "rewards/margins": 0.12346380949020386, "rewards/rejected": -0.4903685450553894, "step": 13500 }, { "epoch": 1.0, "learning_rate": 7.322667438558472e-11, "logits/chosen": -2.044738531112671, "logits/rejected": -1.4705584049224854, "logps/chosen": -541.358642578125, "logps/rejected": -724.5108642578125, "loss": 0.6819, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.40186768770217896, "rewards/margins": 0.2067098319530487, "rewards/rejected": -0.6085774898529053, "step": 13510 }, { "epoch": 1.0, "learning_rate": 4.53815221723719e-11, "logits/chosen": -2.3141417503356934, "logits/rejected": -1.8397624492645264, "logps/chosen": -561.0306396484375, "logps/rejected": -695.5577392578125, "loss": 0.68, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.38748764991760254, "rewards/margins": 0.1546572744846344, "rewards/rejected": -0.5421448349952698, "step": 13520 }, { "epoch": 1.0, "learning_rate": 2.4165994627178832e-11, "logits/chosen": -1.9670490026474, "logits/rejected": -1.5303655862808228, "logps/chosen": -520.1076049804688, "logps/rejected": -723.4019775390625, "loss": 0.6779, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3921434283256531, "rewards/margins": 0.20627228915691376, "rewards/rejected": -0.598415732383728, "step": 13530 }, { "epoch": 1.0, "learning_rate": 9.580232407602196e-12, "logits/chosen": -2.0110790729522705, "logits/rejected": -1.4291017055511475, "logps/chosen": -623.3369750976562, "logps/rejected": -783.488525390625, "loss": 0.6775, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.41276684403419495, "rewards/margins": 0.2016405612230301, "rewards/rejected": -0.6144074201583862, "step": 13540 }, { "epoch": 1.0, "learning_rate": 1.624332216065838e-12, "logits/chosen": -1.966925024986267, "logits/rejected": -1.2332022190093994, "logps/chosen": -574.753173828125, "logps/rejected": -794.7755126953125, "loss": 0.6727, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4285539984703064, "rewards/margins": 0.27313631772994995, "rewards/rejected": -0.7016903162002563, "step": 13550 }, { "epoch": 1.0, "step": 13557, "total_flos": 0.0, "train_loss": 0.6793609698138653, "train_runtime": 56023.2927, "train_samples_per_second": 0.968, "train_steps_per_second": 0.242 } ], "logging_steps": 10, "max_steps": 13557, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }