{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6, "eval_steps": 500, "global_step": 2868, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "logits/chosen": -2.2282397747039795, "logits/rejected": -1.7104178667068481, "logps/chosen": -277.4985046386719, "logps/rejected": -283.1687927246094, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.9831498861312866, "logits/rejected": -1.8470757007598877, "logps/chosen": -266.929931640625, "logps/rejected": -278.9443359375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.0, "learning_rate": 3e-06, "logits/chosen": -1.998997449874878, "logits/rejected": -1.5633281469345093, "logps/chosen": -398.8482971191406, "logps/rejected": -361.72137451171875, "loss": 0.6779, "rewards/accuracies": 0.5625, "rewards/chosen": -0.008343720808625221, "rewards/margins": 0.033162664622068405, "rewards/rejected": -0.041506387293338776, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "logits/chosen": -2.0003268718719482, "logits/rejected": -1.7799348831176758, "logps/chosen": -282.08514404296875, "logps/rejected": -252.58938598632812, "loss": 0.6878, "rewards/accuracies": 0.5625, "rewards/chosen": -0.008050250820815563, "rewards/margins": 0.012078572064638138, "rewards/rejected": -0.020128823816776276, "step": 4 }, { "epoch": 0.0, "learning_rate": 5e-06, "logits/chosen": -2.224046230316162, "logits/rejected": -2.0547187328338623, "logps/chosen": -202.93685913085938, "logps/rejected": -208.28402709960938, "loss": 0.7109, "rewards/accuracies": 0.4375, "rewards/chosen": 0.011011920869350433, "rewards/margins": -0.033290065824985504, "rewards/rejected": 0.04430198669433594, "step": 5 }, { "epoch": 0.0, "learning_rate": 6e-06, "logits/chosen": -2.2208118438720703, "logits/rejected": -1.7688552141189575, "logps/chosen": -426.36590576171875, "logps/rejected": -263.5623779296875, "loss": 0.6819, "rewards/accuracies": 0.5625, "rewards/chosen": 0.14544084668159485, "rewards/margins": 0.02388599142432213, "rewards/rejected": 0.12155484408140182, "step": 6 }, { "epoch": 0.0, "learning_rate": 7e-06, "logits/chosen": -2.1621174812316895, "logits/rejected": -1.9987232685089111, "logps/chosen": -269.5228271484375, "logps/rejected": -226.28701782226562, "loss": 0.687, "rewards/accuracies": 0.5625, "rewards/chosen": 0.039055682718753815, "rewards/margins": 0.013098955154418945, "rewards/rejected": 0.02595672756433487, "step": 7 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "logits/chosen": -2.036137580871582, "logits/rejected": -1.858740210533142, "logps/chosen": -322.3067321777344, "logps/rejected": -247.76370239257812, "loss": 0.7017, "rewards/accuracies": 0.375, "rewards/chosen": -0.03971423953771591, "rewards/margins": -0.015833258628845215, "rewards/rejected": -0.023880982771515846, "step": 8 }, { "epoch": 0.0, "learning_rate": 9e-06, "logits/chosen": -2.3385281562805176, "logits/rejected": -2.152259349822998, "logps/chosen": -342.80950927734375, "logps/rejected": -332.3177490234375, "loss": 0.6956, "rewards/accuracies": 0.5625, "rewards/chosen": 0.07614301145076752, "rewards/margins": -0.0035643568262457848, "rewards/rejected": 0.07970735430717468, "step": 9 }, { "epoch": 0.0, "learning_rate": 1e-05, "logits/chosen": -2.1200649738311768, "logits/rejected": -1.795499563217163, "logps/chosen": -450.69927978515625, "logps/rejected": -349.3813781738281, "loss": 0.7038, "rewards/accuracies": 0.5625, "rewards/chosen": 0.026192951947450638, "rewards/margins": -0.017020108178257942, "rewards/rejected": 0.04321306198835373, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.1000000000000001e-05, "logits/chosen": -2.361680507659912, "logits/rejected": -1.5736029148101807, "logps/chosen": -385.343505859375, "logps/rejected": -252.7088165283203, "loss": 0.6631, "rewards/accuracies": 0.8125, "rewards/chosen": -0.018131183460354805, "rewards/margins": 0.06300930678844452, "rewards/rejected": -0.08114049583673477, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.2e-05, "logits/chosen": -2.1654326915740967, "logits/rejected": -1.7892608642578125, "logps/chosen": -267.4467468261719, "logps/rejected": -194.1240692138672, "loss": 0.6679, "rewards/accuracies": 0.6875, "rewards/chosen": 0.046659138053655624, "rewards/margins": 0.054810453206300735, "rewards/rejected": -0.00815131701529026, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.3000000000000001e-05, "logits/chosen": -2.1767282485961914, "logits/rejected": -1.8208816051483154, "logps/chosen": -352.0940246582031, "logps/rejected": -267.9727783203125, "loss": 0.6301, "rewards/accuracies": 0.875, "rewards/chosen": 0.21059754490852356, "rewards/margins": 0.13529108464717865, "rewards/rejected": 0.0753064677119255, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.4e-05, "logits/chosen": -2.2854440212249756, "logits/rejected": -2.080085515975952, "logps/chosen": -376.6247253417969, "logps/rejected": -278.5927734375, "loss": 0.6278, "rewards/accuracies": 0.625, "rewards/chosen": 0.15183602273464203, "rewards/margins": 0.1464289128780365, "rewards/rejected": 0.005407098680734634, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.5000000000000002e-05, "logits/chosen": -2.4406135082244873, "logits/rejected": -2.322054147720337, "logps/chosen": -298.485595703125, "logps/rejected": -260.11932373046875, "loss": 0.6534, "rewards/accuracies": 0.75, "rewards/chosen": 0.004409570246934891, "rewards/margins": 0.10971016436815262, "rewards/rejected": -0.10530060529708862, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.6000000000000003e-05, "logits/chosen": -2.136800527572632, "logits/rejected": -2.085320472717285, "logps/chosen": -313.06280517578125, "logps/rejected": -308.3344421386719, "loss": 0.6359, "rewards/accuracies": 0.5625, "rewards/chosen": 0.07214680314064026, "rewards/margins": 0.1383473426103592, "rewards/rejected": -0.06620054692029953, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.7e-05, "logits/chosen": -2.343367099761963, "logits/rejected": -1.9561631679534912, "logps/chosen": -357.57171630859375, "logps/rejected": -278.8397521972656, "loss": 0.5886, "rewards/accuracies": 0.625, "rewards/chosen": 0.09857945144176483, "rewards/margins": 0.24340946972370148, "rewards/rejected": -0.14483001828193665, "step": 17 }, { "epoch": 0.0, "learning_rate": 1.8e-05, "logits/chosen": -2.080395221710205, "logits/rejected": -1.8958829641342163, "logps/chosen": -295.2388000488281, "logps/rejected": -324.56915283203125, "loss": 0.5596, "rewards/accuracies": 0.8125, "rewards/chosen": 0.20325438678264618, "rewards/margins": 0.32939791679382324, "rewards/rejected": -0.12614354491233826, "step": 18 }, { "epoch": 0.0, "learning_rate": 1.9e-05, "logits/chosen": -2.3108816146850586, "logits/rejected": -2.1787753105163574, "logps/chosen": -396.53424072265625, "logps/rejected": -331.013916015625, "loss": 0.5661, "rewards/accuracies": 0.8125, "rewards/chosen": 0.08369699120521545, "rewards/margins": 0.3793570399284363, "rewards/rejected": -0.29566001892089844, "step": 19 }, { "epoch": 0.0, "learning_rate": 2e-05, "logits/chosen": -2.091219902038574, "logits/rejected": -2.02215313911438, "logps/chosen": -257.3657531738281, "logps/rejected": -274.3866882324219, "loss": 0.6875, "rewards/accuracies": 0.625, "rewards/chosen": 0.07300397753715515, "rewards/margins": 0.10496324300765991, "rewards/rejected": -0.03195926174521446, "step": 20 }, { "epoch": 0.0, "learning_rate": 1.9995798319327732e-05, "logits/chosen": -2.039426326751709, "logits/rejected": -1.903738260269165, "logps/chosen": -240.83859252929688, "logps/rejected": -356.79248046875, "loss": 0.8172, "rewards/accuracies": 0.5625, "rewards/chosen": -0.16449320316314697, "rewards/margins": -0.060936544090509415, "rewards/rejected": -0.10355665534734726, "step": 21 }, { "epoch": 0.0, "learning_rate": 1.9991596638655465e-05, "logits/chosen": -2.1439273357391357, "logits/rejected": -1.8001049757003784, "logps/chosen": -311.072265625, "logps/rejected": -224.07933044433594, "loss": 0.6006, "rewards/accuracies": 0.6875, "rewards/chosen": -0.14014743268489838, "rewards/margins": 0.37872254848480225, "rewards/rejected": -0.518869936466217, "step": 22 }, { "epoch": 0.0, "learning_rate": 1.9987394957983196e-05, "logits/chosen": -2.2514448165893555, "logits/rejected": -1.93721604347229, "logps/chosen": -260.8023376464844, "logps/rejected": -245.62612915039062, "loss": 0.6351, "rewards/accuracies": 0.8125, "rewards/chosen": -0.11218864470720291, "rewards/margins": 0.15137380361557007, "rewards/rejected": -0.2635624408721924, "step": 23 }, { "epoch": 0.01, "learning_rate": 1.9983193277310926e-05, "logits/chosen": -2.155559539794922, "logits/rejected": -2.109809637069702, "logps/chosen": -171.87376403808594, "logps/rejected": -253.1971893310547, "loss": 0.5501, "rewards/accuracies": 0.875, "rewards/chosen": 0.003980059176683426, "rewards/margins": 0.3934435248374939, "rewards/rejected": -0.3894634544849396, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.9978991596638656e-05, "logits/chosen": -2.0442819595336914, "logits/rejected": -1.5270578861236572, "logps/chosen": -331.35784912109375, "logps/rejected": -254.6148681640625, "loss": 0.4698, "rewards/accuracies": 0.75, "rewards/chosen": 0.26868611574172974, "rewards/margins": 0.6276190876960754, "rewards/rejected": -0.3589329421520233, "step": 25 }, { "epoch": 0.01, "learning_rate": 1.997478991596639e-05, "logits/chosen": -2.1941871643066406, "logits/rejected": -1.9360427856445312, "logps/chosen": -286.4983215332031, "logps/rejected": -254.10491943359375, "loss": 0.4371, "rewards/accuracies": 0.875, "rewards/chosen": 0.09487167000770569, "rewards/margins": 0.8353039026260376, "rewards/rejected": -0.7404322624206543, "step": 26 }, { "epoch": 0.01, "learning_rate": 1.997058823529412e-05, "logits/chosen": -2.3349223136901855, "logits/rejected": -2.3109145164489746, "logps/chosen": -306.22509765625, "logps/rejected": -261.090576171875, "loss": 0.3504, "rewards/accuracies": 0.9375, "rewards/chosen": 0.43325841426849365, "rewards/margins": 1.2106494903564453, "rewards/rejected": -0.7773910760879517, "step": 27 }, { "epoch": 0.01, "learning_rate": 1.996638655462185e-05, "logits/chosen": -2.251776933670044, "logits/rejected": -2.2390496730804443, "logps/chosen": -351.43438720703125, "logps/rejected": -364.4002685546875, "loss": 0.384, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4874539375305176, "rewards/margins": 0.96163010597229, "rewards/rejected": -0.4741760790348053, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.996218487394958e-05, "logits/chosen": -2.179680109024048, "logits/rejected": -1.943859577178955, "logps/chosen": -353.5611572265625, "logps/rejected": -363.4343566894531, "loss": 0.554, "rewards/accuracies": 0.625, "rewards/chosen": 0.01377946138381958, "rewards/margins": 0.7094612717628479, "rewards/rejected": -0.6956818103790283, "step": 29 }, { "epoch": 0.01, "learning_rate": 1.9957983193277314e-05, "logits/chosen": -2.1971545219421387, "logits/rejected": -1.6987268924713135, "logps/chosen": -452.05914306640625, "logps/rejected": -332.03753662109375, "loss": 0.7594, "rewards/accuracies": 0.5625, "rewards/chosen": -0.534920334815979, "rewards/margins": 0.350356787443161, "rewards/rejected": -0.8852770924568176, "step": 30 }, { "epoch": 0.01, "learning_rate": 1.9953781512605044e-05, "logits/chosen": -2.1026246547698975, "logits/rejected": -1.983717679977417, "logps/chosen": -306.4521484375, "logps/rejected": -327.5496520996094, "loss": 0.6098, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03515242040157318, "rewards/margins": 0.6194229125976562, "rewards/rejected": -0.6545753479003906, "step": 31 }, { "epoch": 0.01, "learning_rate": 1.9949579831932774e-05, "logits/chosen": -1.9110617637634277, "logits/rejected": -1.9333263635635376, "logps/chosen": -259.166259765625, "logps/rejected": -358.37451171875, "loss": 0.3246, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07927685976028442, "rewards/margins": 1.2982083559036255, "rewards/rejected": -1.3774852752685547, "step": 32 }, { "epoch": 0.01, "learning_rate": 1.9945378151260505e-05, "logits/chosen": -1.9312708377838135, "logits/rejected": -1.825966715812683, "logps/chosen": -383.91357421875, "logps/rejected": -316.36798095703125, "loss": 0.5947, "rewards/accuracies": 0.5625, "rewards/chosen": -0.2903977930545807, "rewards/margins": 0.9727724194526672, "rewards/rejected": -1.2631702423095703, "step": 33 }, { "epoch": 0.01, "learning_rate": 1.9941176470588238e-05, "logits/chosen": -2.1588261127471924, "logits/rejected": -1.9789544343948364, "logps/chosen": -382.4302978515625, "logps/rejected": -377.56048583984375, "loss": 0.4118, "rewards/accuracies": 0.9375, "rewards/chosen": 0.20616267621517181, "rewards/margins": 0.9832318425178528, "rewards/rejected": -0.7770692110061646, "step": 34 }, { "epoch": 0.01, "learning_rate": 1.993697478991597e-05, "logits/chosen": -2.2012393474578857, "logits/rejected": -2.1398000717163086, "logps/chosen": -253.782470703125, "logps/rejected": -289.1595764160156, "loss": 0.6182, "rewards/accuracies": 0.625, "rewards/chosen": -0.3398907780647278, "rewards/margins": 0.6650518178939819, "rewards/rejected": -1.0049426555633545, "step": 35 }, { "epoch": 0.01, "learning_rate": 1.99327731092437e-05, "logits/chosen": -2.526705265045166, "logits/rejected": -2.1168999671936035, "logps/chosen": -298.3293762207031, "logps/rejected": -284.14892578125, "loss": 0.5028, "rewards/accuracies": 0.75, "rewards/chosen": 0.3161202669143677, "rewards/margins": 1.0427557229995728, "rewards/rejected": -0.7266354560852051, "step": 36 }, { "epoch": 0.01, "learning_rate": 1.992857142857143e-05, "logits/chosen": -2.27311372756958, "logits/rejected": -1.748600959777832, "logps/chosen": -293.44232177734375, "logps/rejected": -297.465087890625, "loss": 0.4748, "rewards/accuracies": 0.75, "rewards/chosen": 0.1936989277601242, "rewards/margins": 1.0498151779174805, "rewards/rejected": -0.8561161756515503, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.9924369747899163e-05, "logits/chosen": -2.2410919666290283, "logits/rejected": -1.9170420169830322, "logps/chosen": -335.0601806640625, "logps/rejected": -250.7333221435547, "loss": 0.6916, "rewards/accuracies": 0.625, "rewards/chosen": 0.02380967140197754, "rewards/margins": 0.4384457468986511, "rewards/rejected": -0.4146360754966736, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.9920168067226893e-05, "logits/chosen": -2.2150847911834717, "logits/rejected": -2.0835230350494385, "logps/chosen": -239.5399627685547, "logps/rejected": -255.68206787109375, "loss": 0.4157, "rewards/accuracies": 0.8125, "rewards/chosen": 0.26076507568359375, "rewards/margins": 0.8942439556121826, "rewards/rejected": -0.6334788203239441, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.9915966386554623e-05, "logits/chosen": -2.5332202911376953, "logits/rejected": -2.0763649940490723, "logps/chosen": -311.97802734375, "logps/rejected": -266.4057922363281, "loss": 0.4129, "rewards/accuracies": 0.8125, "rewards/chosen": 0.17071732878684998, "rewards/margins": 1.2794352769851685, "rewards/rejected": -1.108717918395996, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.9911764705882353e-05, "logits/chosen": -2.471341609954834, "logits/rejected": -1.6814744472503662, "logps/chosen": -355.7557067871094, "logps/rejected": -301.5888671875, "loss": 0.4109, "rewards/accuracies": 0.8125, "rewards/chosen": 0.49135756492614746, "rewards/margins": 1.131295919418335, "rewards/rejected": -0.6399383544921875, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.9907563025210087e-05, "logits/chosen": -2.2631304264068604, "logits/rejected": -2.234004259109497, "logps/chosen": -327.2241516113281, "logps/rejected": -300.0874938964844, "loss": 0.4831, "rewards/accuracies": 0.75, "rewards/chosen": -0.06589288264513016, "rewards/margins": 0.9846144914627075, "rewards/rejected": -1.0505073070526123, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.9903361344537817e-05, "logits/chosen": -2.5259833335876465, "logits/rejected": -2.2826600074768066, "logps/chosen": -267.2774658203125, "logps/rejected": -274.1180725097656, "loss": 0.4359, "rewards/accuracies": 0.75, "rewards/chosen": -0.12646692991256714, "rewards/margins": 0.9644603133201599, "rewards/rejected": -1.0909273624420166, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.9899159663865547e-05, "logits/chosen": -2.385162353515625, "logits/rejected": -1.7944622039794922, "logps/chosen": -453.2611083984375, "logps/rejected": -320.607421875, "loss": 0.2998, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5784784555435181, "rewards/margins": 1.655571699142456, "rewards/rejected": -1.0770931243896484, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.989495798319328e-05, "logits/chosen": -1.7950670719146729, "logits/rejected": -2.163729667663574, "logps/chosen": -198.92381286621094, "logps/rejected": -242.31094360351562, "loss": 0.391, "rewards/accuracies": 0.875, "rewards/chosen": 0.20681682229042053, "rewards/margins": 1.026400089263916, "rewards/rejected": -0.8195833563804626, "step": 45 }, { "epoch": 0.01, "learning_rate": 1.989075630252101e-05, "logits/chosen": -2.1599843502044678, "logits/rejected": -2.060497283935547, "logps/chosen": -295.61444091796875, "logps/rejected": -259.965576171875, "loss": 0.5432, "rewards/accuracies": 0.6875, "rewards/chosen": 0.3103800415992737, "rewards/margins": 0.9843414425849915, "rewards/rejected": -0.6739614009857178, "step": 46 }, { "epoch": 0.01, "learning_rate": 1.988655462184874e-05, "logits/chosen": -2.0689070224761963, "logits/rejected": -2.012744426727295, "logps/chosen": -232.267822265625, "logps/rejected": -297.8428649902344, "loss": 0.4166, "rewards/accuracies": 0.75, "rewards/chosen": -0.39347603917121887, "rewards/margins": 1.4002763032913208, "rewards/rejected": -1.7937523126602173, "step": 47 }, { "epoch": 0.01, "learning_rate": 1.988235294117647e-05, "logits/chosen": -2.195972442626953, "logits/rejected": -1.976593255996704, "logps/chosen": -293.2135314941406, "logps/rejected": -389.1631164550781, "loss": 0.4143, "rewards/accuracies": 0.75, "rewards/chosen": 0.4289471209049225, "rewards/margins": 1.2965888977050781, "rewards/rejected": -0.8676416873931885, "step": 48 }, { "epoch": 0.01, "learning_rate": 1.9878151260504205e-05, "logits/chosen": -2.09908127784729, "logits/rejected": -1.894362211227417, "logps/chosen": -316.28729248046875, "logps/rejected": -289.5418395996094, "loss": 0.4047, "rewards/accuracies": 0.75, "rewards/chosen": 0.4415051341056824, "rewards/margins": 1.1790716648101807, "rewards/rejected": -0.7375665903091431, "step": 49 }, { "epoch": 0.01, "learning_rate": 1.9873949579831935e-05, "logits/chosen": -2.077441930770874, "logits/rejected": -2.47188138961792, "logps/chosen": -264.8441162109375, "logps/rejected": -433.2682800292969, "loss": 0.4666, "rewards/accuracies": 0.9375, "rewards/chosen": 0.13278540968894958, "rewards/margins": 1.287182331085205, "rewards/rejected": -1.154396891593933, "step": 50 }, { "epoch": 0.01, "learning_rate": 1.9869747899159666e-05, "logits/chosen": -2.270512819290161, "logits/rejected": -2.3522863388061523, "logps/chosen": -333.0877685546875, "logps/rejected": -311.7333679199219, "loss": 0.2762, "rewards/accuracies": 0.875, "rewards/chosen": 0.30799394845962524, "rewards/margins": 1.776007056236267, "rewards/rejected": -1.4680131673812866, "step": 51 }, { "epoch": 0.01, "learning_rate": 1.9865546218487396e-05, "logits/chosen": -1.980552077293396, "logits/rejected": -1.7865663766860962, "logps/chosen": -296.15966796875, "logps/rejected": -359.48095703125, "loss": 0.3227, "rewards/accuracies": 0.75, "rewards/chosen": 0.21640604734420776, "rewards/margins": 1.4678544998168945, "rewards/rejected": -1.2514485120773315, "step": 52 }, { "epoch": 0.01, "learning_rate": 1.986134453781513e-05, "logits/chosen": -2.0458903312683105, "logits/rejected": -2.013563871383667, "logps/chosen": -206.70895385742188, "logps/rejected": -264.04412841796875, "loss": 0.3541, "rewards/accuracies": 0.875, "rewards/chosen": 0.39075812697410583, "rewards/margins": 1.5346307754516602, "rewards/rejected": -1.1438727378845215, "step": 53 }, { "epoch": 0.01, "learning_rate": 1.985714285714286e-05, "logits/chosen": -2.0617918968200684, "logits/rejected": -2.1093711853027344, "logps/chosen": -276.7305908203125, "logps/rejected": -346.8241882324219, "loss": 0.4043, "rewards/accuracies": 0.8125, "rewards/chosen": 0.25451546907424927, "rewards/margins": 1.3389711380004883, "rewards/rejected": -1.0844557285308838, "step": 54 }, { "epoch": 0.01, "learning_rate": 1.985294117647059e-05, "logits/chosen": -1.857073187828064, "logits/rejected": -1.600212574005127, "logps/chosen": -333.90447998046875, "logps/rejected": -300.74462890625, "loss": 0.5176, "rewards/accuracies": 0.6875, "rewards/chosen": 0.08902762830257416, "rewards/margins": 1.1346933841705322, "rewards/rejected": -1.0456656217575073, "step": 55 }, { "epoch": 0.01, "learning_rate": 1.984873949579832e-05, "logits/chosen": -2.073125123977661, "logits/rejected": -1.9293947219848633, "logps/chosen": -361.3773193359375, "logps/rejected": -243.97341918945312, "loss": 0.1839, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6487305760383606, "rewards/margins": 2.422443389892578, "rewards/rejected": -1.7737128734588623, "step": 56 }, { "epoch": 0.01, "learning_rate": 1.9844537815126054e-05, "logits/chosen": -2.073249340057373, "logits/rejected": -1.8651671409606934, "logps/chosen": -283.1850891113281, "logps/rejected": -260.81707763671875, "loss": 0.263, "rewards/accuracies": 0.8125, "rewards/chosen": 0.5375748872756958, "rewards/margins": 2.4909462928771973, "rewards/rejected": -1.953371286392212, "step": 57 }, { "epoch": 0.01, "learning_rate": 1.9840336134453784e-05, "logits/chosen": -2.2339138984680176, "logits/rejected": -2.1560802459716797, "logps/chosen": -360.5123596191406, "logps/rejected": -459.8302917480469, "loss": 0.2335, "rewards/accuracies": 0.9375, "rewards/chosen": 0.18645963072776794, "rewards/margins": 2.2421610355377197, "rewards/rejected": -2.05570125579834, "step": 58 }, { "epoch": 0.01, "learning_rate": 1.9836134453781514e-05, "logits/chosen": -2.163757801055908, "logits/rejected": -2.1205854415893555, "logps/chosen": -259.7275390625, "logps/rejected": -281.04296875, "loss": 0.6058, "rewards/accuracies": 0.625, "rewards/chosen": 0.37449726462364197, "rewards/margins": 0.622344434261322, "rewards/rejected": -0.24784713983535767, "step": 59 }, { "epoch": 0.01, "learning_rate": 1.9831932773109244e-05, "logits/chosen": -2.19521427154541, "logits/rejected": -2.30275297164917, "logps/chosen": -314.6921081542969, "logps/rejected": -378.01129150390625, "loss": 0.3264, "rewards/accuracies": 0.8125, "rewards/chosen": 0.23214736580848694, "rewards/margins": 1.752747654914856, "rewards/rejected": -1.5206003189086914, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.9827731092436978e-05, "logits/chosen": -2.1164236068725586, "logits/rejected": -1.7807343006134033, "logps/chosen": -293.411865234375, "logps/rejected": -250.91403198242188, "loss": 0.4, "rewards/accuracies": 0.8125, "rewards/chosen": -0.20865458250045776, "rewards/margins": 1.714974284172058, "rewards/rejected": -1.923628807067871, "step": 61 }, { "epoch": 0.01, "learning_rate": 1.9823529411764708e-05, "logits/chosen": -2.1823620796203613, "logits/rejected": -2.0653858184814453, "logps/chosen": -156.7562255859375, "logps/rejected": -190.81983947753906, "loss": 0.3667, "rewards/accuracies": 0.8125, "rewards/chosen": 0.2006514072418213, "rewards/margins": 1.9494022130966187, "rewards/rejected": -1.7487506866455078, "step": 62 }, { "epoch": 0.01, "learning_rate": 1.981932773109244e-05, "logits/chosen": -2.387006998062134, "logits/rejected": -1.9845913648605347, "logps/chosen": -400.44439697265625, "logps/rejected": -324.6969909667969, "loss": 0.4412, "rewards/accuracies": 0.75, "rewards/chosen": 0.4312658905982971, "rewards/margins": 1.395607352256775, "rewards/rejected": -0.9643415212631226, "step": 63 }, { "epoch": 0.01, "learning_rate": 1.981512605042017e-05, "logits/chosen": -2.2400944232940674, "logits/rejected": -2.0497281551361084, "logps/chosen": -303.5828552246094, "logps/rejected": -257.9069519042969, "loss": 0.763, "rewards/accuracies": 0.5625, "rewards/chosen": -0.03154442459344864, "rewards/margins": 0.9810715913772583, "rewards/rejected": -1.0126160383224487, "step": 64 }, { "epoch": 0.01, "learning_rate": 1.9810924369747902e-05, "logits/chosen": -2.16957950592041, "logits/rejected": -1.5903396606445312, "logps/chosen": -362.58184814453125, "logps/rejected": -244.8644561767578, "loss": 0.3795, "rewards/accuracies": 0.8125, "rewards/chosen": 0.4543456435203552, "rewards/margins": 1.6872265338897705, "rewards/rejected": -1.23288094997406, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.9806722689075632e-05, "logits/chosen": -1.9502336978912354, "logits/rejected": -2.0793938636779785, "logps/chosen": -299.5069580078125, "logps/rejected": -331.5346984863281, "loss": 0.4049, "rewards/accuracies": 0.6875, "rewards/chosen": 0.17613552510738373, "rewards/margins": 1.658630609512329, "rewards/rejected": -1.4824949502944946, "step": 66 }, { "epoch": 0.01, "learning_rate": 1.9802521008403363e-05, "logits/chosen": -2.1173956394195557, "logits/rejected": -2.0675406455993652, "logps/chosen": -302.7386779785156, "logps/rejected": -370.3883361816406, "loss": 0.2002, "rewards/accuracies": 0.9375, "rewards/chosen": 0.41592830419540405, "rewards/margins": 2.346912145614624, "rewards/rejected": -1.9309837818145752, "step": 67 }, { "epoch": 0.01, "learning_rate": 1.9798319327731096e-05, "logits/chosen": -2.1113007068634033, "logits/rejected": -1.8868319988250732, "logps/chosen": -338.0023193359375, "logps/rejected": -306.9248046875, "loss": 0.3672, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3922884166240692, "rewards/margins": 1.854377031326294, "rewards/rejected": -1.4620883464813232, "step": 68 }, { "epoch": 0.01, "learning_rate": 1.9794117647058827e-05, "logits/chosen": -1.895932674407959, "logits/rejected": -1.6613662242889404, "logps/chosen": -363.0187072753906, "logps/rejected": -502.70294189453125, "loss": 0.3624, "rewards/accuracies": 0.8125, "rewards/chosen": 0.6353020668029785, "rewards/margins": 2.4968245029449463, "rewards/rejected": -1.8615224361419678, "step": 69 }, { "epoch": 0.01, "learning_rate": 1.9789915966386557e-05, "logits/chosen": -1.977718710899353, "logits/rejected": -1.753633975982666, "logps/chosen": -276.27935791015625, "logps/rejected": -266.5805969238281, "loss": 0.5753, "rewards/accuracies": 0.6875, "rewards/chosen": 0.015602737665176392, "rewards/margins": 1.6934807300567627, "rewards/rejected": -1.6778781414031982, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.9785714285714287e-05, "logits/chosen": -2.136392831802368, "logits/rejected": -2.1143226623535156, "logps/chosen": -266.6164855957031, "logps/rejected": -341.439453125, "loss": 0.4063, "rewards/accuracies": 0.75, "rewards/chosen": 0.6387972831726074, "rewards/margins": 1.8382182121276855, "rewards/rejected": -1.1994209289550781, "step": 71 }, { "epoch": 0.02, "learning_rate": 1.978151260504202e-05, "logits/chosen": -2.1034064292907715, "logits/rejected": -1.6448578834533691, "logps/chosen": -320.40631103515625, "logps/rejected": -245.26319885253906, "loss": 0.3839, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16450253129005432, "rewards/margins": 2.016575336456299, "rewards/rejected": -1.8520729541778564, "step": 72 }, { "epoch": 0.02, "learning_rate": 1.977731092436975e-05, "logits/chosen": -1.767681360244751, "logits/rejected": -1.9492697715759277, "logps/chosen": -329.1862487792969, "logps/rejected": -435.3006896972656, "loss": 0.3475, "rewards/accuracies": 0.75, "rewards/chosen": 0.07233725488185883, "rewards/margins": 2.7824957370758057, "rewards/rejected": -2.710158586502075, "step": 73 }, { "epoch": 0.02, "learning_rate": 1.977310924369748e-05, "logits/chosen": -1.9439988136291504, "logits/rejected": -1.6310694217681885, "logps/chosen": -319.0556640625, "logps/rejected": -282.9508361816406, "loss": 0.3585, "rewards/accuracies": 0.8125, "rewards/chosen": 0.03838210180401802, "rewards/margins": 2.573625087738037, "rewards/rejected": -2.535243034362793, "step": 74 }, { "epoch": 0.02, "learning_rate": 1.976890756302521e-05, "logits/chosen": -1.7561790943145752, "logits/rejected": -1.949948787689209, "logps/chosen": -258.57366943359375, "logps/rejected": -538.1862182617188, "loss": 0.3153, "rewards/accuracies": 0.8125, "rewards/chosen": 0.23872801661491394, "rewards/margins": 2.344884157180786, "rewards/rejected": -2.10615611076355, "step": 75 }, { "epoch": 0.02, "learning_rate": 1.9764705882352945e-05, "logits/chosen": -2.3019189834594727, "logits/rejected": -2.0765202045440674, "logps/chosen": -270.93817138671875, "logps/rejected": -274.4999694824219, "loss": 0.581, "rewards/accuracies": 0.75, "rewards/chosen": 0.2101031243801117, "rewards/margins": 1.4574387073516846, "rewards/rejected": -1.2473355531692505, "step": 76 }, { "epoch": 0.02, "learning_rate": 1.9760504201680675e-05, "logits/chosen": -2.0135693550109863, "logits/rejected": -2.2255821228027344, "logps/chosen": -273.78082275390625, "logps/rejected": -289.54486083984375, "loss": 0.694, "rewards/accuracies": 0.5625, "rewards/chosen": -0.009193005971610546, "rewards/margins": 0.7785494923591614, "rewards/rejected": -0.7877424955368042, "step": 77 }, { "epoch": 0.02, "learning_rate": 1.9756302521008405e-05, "logits/chosen": -2.212216377258301, "logits/rejected": -2.2388317584991455, "logps/chosen": -263.4593505859375, "logps/rejected": -271.59466552734375, "loss": 0.3124, "rewards/accuracies": 0.875, "rewards/chosen": 0.40438124537467957, "rewards/margins": 1.8790018558502197, "rewards/rejected": -1.4746206998825073, "step": 78 }, { "epoch": 0.02, "learning_rate": 1.9752100840336136e-05, "logits/chosen": -1.9206621646881104, "logits/rejected": -1.7983925342559814, "logps/chosen": -379.12115478515625, "logps/rejected": -394.8392028808594, "loss": 0.5027, "rewards/accuracies": 0.75, "rewards/chosen": 0.5758500099182129, "rewards/margins": 1.5463589429855347, "rewards/rejected": -0.9705088138580322, "step": 79 }, { "epoch": 0.02, "learning_rate": 1.974789915966387e-05, "logits/chosen": -2.0521106719970703, "logits/rejected": -1.7378277778625488, "logps/chosen": -256.2105712890625, "logps/rejected": -302.44635009765625, "loss": 0.4625, "rewards/accuracies": 0.6875, "rewards/chosen": 0.2178654670715332, "rewards/margins": 1.67887544631958, "rewards/rejected": -1.4610100984573364, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.97436974789916e-05, "logits/chosen": -2.124636650085449, "logits/rejected": -2.006908655166626, "logps/chosen": -178.78956604003906, "logps/rejected": -279.22357177734375, "loss": 0.4233, "rewards/accuracies": 0.875, "rewards/chosen": 0.6088790893554688, "rewards/margins": 1.2809927463531494, "rewards/rejected": -0.6721135377883911, "step": 81 }, { "epoch": 0.02, "learning_rate": 1.973949579831933e-05, "logits/chosen": -2.103243827819824, "logits/rejected": -1.9861217737197876, "logps/chosen": -389.2289123535156, "logps/rejected": -385.99365234375, "loss": 0.4647, "rewards/accuracies": 0.875, "rewards/chosen": 0.7544265985488892, "rewards/margins": 1.7135108709335327, "rewards/rejected": -0.9590842723846436, "step": 82 }, { "epoch": 0.02, "learning_rate": 1.973529411764706e-05, "logits/chosen": -1.8935327529907227, "logits/rejected": -1.8215477466583252, "logps/chosen": -210.7542266845703, "logps/rejected": -241.78558349609375, "loss": 0.3477, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3735242187976837, "rewards/margins": 1.599092960357666, "rewards/rejected": -1.2255687713623047, "step": 83 }, { "epoch": 0.02, "learning_rate": 1.9731092436974793e-05, "logits/chosen": -2.29447603225708, "logits/rejected": -2.161421298980713, "logps/chosen": -275.94500732421875, "logps/rejected": -238.6147918701172, "loss": 0.2717, "rewards/accuracies": 1.0, "rewards/chosen": 0.5089815855026245, "rewards/margins": 1.4455078840255737, "rewards/rejected": -0.9365262985229492, "step": 84 }, { "epoch": 0.02, "learning_rate": 1.9726890756302524e-05, "logits/chosen": -1.8601469993591309, "logits/rejected": -1.8802802562713623, "logps/chosen": -235.6843719482422, "logps/rejected": -287.5454406738281, "loss": 0.4983, "rewards/accuracies": 0.8125, "rewards/chosen": 0.013763606548309326, "rewards/margins": 1.4783430099487305, "rewards/rejected": -1.464579463005066, "step": 85 }, { "epoch": 0.02, "learning_rate": 1.9722689075630254e-05, "logits/chosen": -2.183777332305908, "logits/rejected": -1.9824378490447998, "logps/chosen": -268.580078125, "logps/rejected": -227.19166564941406, "loss": 0.3715, "rewards/accuracies": 0.8125, "rewards/chosen": 0.7063171863555908, "rewards/margins": 2.124077320098877, "rewards/rejected": -1.4177602529525757, "step": 86 }, { "epoch": 0.02, "learning_rate": 1.9718487394957987e-05, "logits/chosen": -1.9882242679595947, "logits/rejected": -2.1597676277160645, "logps/chosen": -240.87130737304688, "logps/rejected": -204.73440551757812, "loss": 0.3994, "rewards/accuracies": 0.6875, "rewards/chosen": 0.6949409246444702, "rewards/margins": 1.5625944137573242, "rewards/rejected": -0.8676536083221436, "step": 87 }, { "epoch": 0.02, "learning_rate": 1.9714285714285718e-05, "logits/chosen": -1.8498257398605347, "logits/rejected": -1.7139078378677368, "logps/chosen": -291.52764892578125, "logps/rejected": -339.3037414550781, "loss": 0.4453, "rewards/accuracies": 0.8125, "rewards/chosen": 0.44802042841911316, "rewards/margins": 1.2824641466140747, "rewards/rejected": -0.8344437479972839, "step": 88 }, { "epoch": 0.02, "learning_rate": 1.9710084033613448e-05, "logits/chosen": -2.162508726119995, "logits/rejected": -1.9323761463165283, "logps/chosen": -344.0239562988281, "logps/rejected": -314.748046875, "loss": 0.3641, "rewards/accuracies": 0.875, "rewards/chosen": 0.7742578983306885, "rewards/margins": 1.3817992210388184, "rewards/rejected": -0.6075414419174194, "step": 89 }, { "epoch": 0.02, "learning_rate": 1.9705882352941178e-05, "logits/chosen": -1.9877519607543945, "logits/rejected": -1.9254560470581055, "logps/chosen": -287.8323974609375, "logps/rejected": -344.7055358886719, "loss": 0.4725, "rewards/accuracies": 0.875, "rewards/chosen": 0.8281481862068176, "rewards/margins": 1.6265738010406494, "rewards/rejected": -0.798425555229187, "step": 90 }, { "epoch": 0.02, "learning_rate": 1.9701680672268912e-05, "logits/chosen": -2.1865835189819336, "logits/rejected": -1.8122529983520508, "logps/chosen": -297.27642822265625, "logps/rejected": -297.00433349609375, "loss": 0.6156, "rewards/accuracies": 0.75, "rewards/chosen": 0.2508367896080017, "rewards/margins": 1.7117033004760742, "rewards/rejected": -1.4608666896820068, "step": 91 }, { "epoch": 0.02, "learning_rate": 1.9697478991596642e-05, "logits/chosen": -2.2020838260650635, "logits/rejected": -1.9021496772766113, "logps/chosen": -326.5546875, "logps/rejected": -308.9887390136719, "loss": 0.3583, "rewards/accuracies": 0.6875, "rewards/chosen": 1.3239891529083252, "rewards/margins": 2.97361421585083, "rewards/rejected": -1.6496249437332153, "step": 92 }, { "epoch": 0.02, "learning_rate": 1.9693277310924372e-05, "logits/chosen": -2.2372398376464844, "logits/rejected": -2.2184348106384277, "logps/chosen": -292.1517028808594, "logps/rejected": -269.4548034667969, "loss": 0.3216, "rewards/accuracies": 0.8125, "rewards/chosen": 0.6820988655090332, "rewards/margins": 1.7071502208709717, "rewards/rejected": -1.025051236152649, "step": 93 }, { "epoch": 0.02, "learning_rate": 1.9689075630252102e-05, "logits/chosen": -2.0083584785461426, "logits/rejected": -2.0635998249053955, "logps/chosen": -276.5091552734375, "logps/rejected": -315.46673583984375, "loss": 0.5145, "rewards/accuracies": 0.75, "rewards/chosen": 0.39702826738357544, "rewards/margins": 1.4720776081085205, "rewards/rejected": -1.0750494003295898, "step": 94 }, { "epoch": 0.02, "learning_rate": 1.9684873949579833e-05, "logits/chosen": -2.2963638305664062, "logits/rejected": -1.4817904233932495, "logps/chosen": -336.0533447265625, "logps/rejected": -276.6947326660156, "loss": 0.7605, "rewards/accuracies": 0.6875, "rewards/chosen": 0.5585237741470337, "rewards/margins": 1.2757525444030762, "rewards/rejected": -0.7172287702560425, "step": 95 }, { "epoch": 0.02, "learning_rate": 1.9680672268907563e-05, "logits/chosen": -2.1417922973632812, "logits/rejected": -1.952133059501648, "logps/chosen": -302.29949951171875, "logps/rejected": -260.100341796875, "loss": 0.3972, "rewards/accuracies": 0.875, "rewards/chosen": -0.18869030475616455, "rewards/margins": 1.1416288614273071, "rewards/rejected": -1.3303192853927612, "step": 96 }, { "epoch": 0.02, "learning_rate": 1.9676470588235293e-05, "logits/chosen": -2.2551465034484863, "logits/rejected": -2.1994338035583496, "logps/chosen": -259.6070556640625, "logps/rejected": -292.28729248046875, "loss": 0.4951, "rewards/accuracies": 0.75, "rewards/chosen": 0.12263701111078262, "rewards/margins": 2.051563024520874, "rewards/rejected": -1.9289261102676392, "step": 97 }, { "epoch": 0.02, "learning_rate": 1.9672268907563027e-05, "logits/chosen": -2.120534658432007, "logits/rejected": -1.8351314067840576, "logps/chosen": -325.5702819824219, "logps/rejected": -244.7603759765625, "loss": 0.3843, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8541961312294006, "rewards/margins": 2.112898826599121, "rewards/rejected": -1.2587027549743652, "step": 98 }, { "epoch": 0.02, "learning_rate": 1.9668067226890757e-05, "logits/chosen": -2.172074317932129, "logits/rejected": -2.0238358974456787, "logps/chosen": -468.5791320800781, "logps/rejected": -352.8669128417969, "loss": 0.274, "rewards/accuracies": 0.875, "rewards/chosen": 1.4931235313415527, "rewards/margins": 2.0761213302612305, "rewards/rejected": -0.5829975605010986, "step": 99 }, { "epoch": 0.02, "learning_rate": 1.9663865546218487e-05, "logits/chosen": -2.0707366466522217, "logits/rejected": -2.0279312133789062, "logps/chosen": -283.825439453125, "logps/rejected": -292.2297668457031, "loss": 0.5522, "rewards/accuracies": 0.6875, "rewards/chosen": 0.49507033824920654, "rewards/margins": 1.2633992433547974, "rewards/rejected": -0.7683289051055908, "step": 100 }, { "epoch": 0.02, "learning_rate": 1.965966386554622e-05, "logits/chosen": -2.2249021530151367, "logits/rejected": -2.0144784450531006, "logps/chosen": -388.1253967285156, "logps/rejected": -267.12945556640625, "loss": 0.4395, "rewards/accuracies": 0.8125, "rewards/chosen": 1.0910539627075195, "rewards/margins": 1.1310594081878662, "rewards/rejected": -0.04000537097454071, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.965546218487395e-05, "logits/chosen": -2.3198399543762207, "logits/rejected": -2.315199136734009, "logps/chosen": -255.4583282470703, "logps/rejected": -218.7842559814453, "loss": 0.5806, "rewards/accuracies": 0.6875, "rewards/chosen": 0.6165440082550049, "rewards/margins": 1.0216189622879028, "rewards/rejected": -0.40507495403289795, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.965126050420168e-05, "logits/chosen": -1.944331169128418, "logits/rejected": -1.6027865409851074, "logps/chosen": -299.79693603515625, "logps/rejected": -264.605712890625, "loss": 0.2472, "rewards/accuracies": 0.875, "rewards/chosen": 1.1237390041351318, "rewards/margins": 2.4650118350982666, "rewards/rejected": -1.3412729501724243, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.964705882352941e-05, "logits/chosen": -2.237401008605957, "logits/rejected": -1.9374208450317383, "logps/chosen": -291.79156494140625, "logps/rejected": -227.7943115234375, "loss": 0.2397, "rewards/accuracies": 0.875, "rewards/chosen": 0.8778971433639526, "rewards/margins": 2.1618452072143555, "rewards/rejected": -1.2839481830596924, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.9642857142857145e-05, "logits/chosen": -1.9214826822280884, "logits/rejected": -1.8263112306594849, "logps/chosen": -237.01693725585938, "logps/rejected": -332.3307800292969, "loss": 0.2999, "rewards/accuracies": 0.75, "rewards/chosen": 0.6954737901687622, "rewards/margins": 2.454423427581787, "rewards/rejected": -1.7589497566223145, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.9638655462184875e-05, "logits/chosen": -2.1410441398620605, "logits/rejected": -1.6226427555084229, "logps/chosen": -322.0953369140625, "logps/rejected": -296.8033447265625, "loss": 0.7646, "rewards/accuracies": 0.625, "rewards/chosen": 0.3530750274658203, "rewards/margins": 0.81498122215271, "rewards/rejected": -0.46190622448921204, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.9634453781512605e-05, "logits/chosen": -2.15517258644104, "logits/rejected": -1.472048282623291, "logps/chosen": -366.901611328125, "logps/rejected": -310.87249755859375, "loss": 0.3827, "rewards/accuracies": 0.75, "rewards/chosen": 0.5737791061401367, "rewards/margins": 1.995422601699829, "rewards/rejected": -1.4216433763504028, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.9630252100840336e-05, "logits/chosen": -2.0106077194213867, "logits/rejected": -2.048567056655884, "logps/chosen": -219.6585235595703, "logps/rejected": -252.0364990234375, "loss": 0.3465, "rewards/accuracies": 0.875, "rewards/chosen": 0.3845852017402649, "rewards/margins": 1.800605058670044, "rewards/rejected": -1.4160197973251343, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.962605042016807e-05, "logits/chosen": -2.107731819152832, "logits/rejected": -1.3294410705566406, "logps/chosen": -358.4281005859375, "logps/rejected": -257.8708190917969, "loss": 0.3206, "rewards/accuracies": 0.8125, "rewards/chosen": 1.3075557947158813, "rewards/margins": 1.9053153991699219, "rewards/rejected": -0.5977598428726196, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.96218487394958e-05, "logits/chosen": -2.026577949523926, "logits/rejected": -2.2460622787475586, "logps/chosen": -220.59320068359375, "logps/rejected": -325.16802978515625, "loss": 0.2967, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3612469732761383, "rewards/margins": 2.7077808380126953, "rewards/rejected": -2.346534013748169, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.961764705882353e-05, "logits/chosen": -1.7622581720352173, "logits/rejected": -1.7473357915878296, "logps/chosen": -223.30801391601562, "logps/rejected": -242.68637084960938, "loss": 0.7774, "rewards/accuracies": 0.625, "rewards/chosen": 0.3355538249015808, "rewards/margins": 1.0961945056915283, "rewards/rejected": -0.7606406807899475, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.961344537815126e-05, "logits/chosen": -2.2168211936950684, "logits/rejected": -1.904067039489746, "logps/chosen": -288.23004150390625, "logps/rejected": -282.98504638671875, "loss": 0.4975, "rewards/accuracies": 0.75, "rewards/chosen": 0.9544592499732971, "rewards/margins": 1.863938570022583, "rewards/rejected": -0.9094793796539307, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.9609243697478994e-05, "logits/chosen": -2.2095961570739746, "logits/rejected": -2.1803181171417236, "logps/chosen": -246.35244750976562, "logps/rejected": -244.9532928466797, "loss": 0.3461, "rewards/accuracies": 0.8125, "rewards/chosen": 0.6990171670913696, "rewards/margins": 1.7915334701538086, "rewards/rejected": -1.0925161838531494, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.9605042016806724e-05, "logits/chosen": -2.1334476470947266, "logits/rejected": -2.1948623657226562, "logps/chosen": -326.85205078125, "logps/rejected": -364.0404968261719, "loss": 0.261, "rewards/accuracies": 0.9375, "rewards/chosen": 1.2773776054382324, "rewards/margins": 1.8966569900512695, "rewards/rejected": -0.6192792654037476, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.9600840336134454e-05, "logits/chosen": -1.9861092567443848, "logits/rejected": -2.0006840229034424, "logps/chosen": -285.4367370605469, "logps/rejected": -315.72027587890625, "loss": 0.2797, "rewards/accuracies": 0.875, "rewards/chosen": 0.927230179309845, "rewards/margins": 2.433245897293091, "rewards/rejected": -1.5060157775878906, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.9596638655462184e-05, "logits/chosen": -1.9135236740112305, "logits/rejected": -1.761857032775879, "logps/chosen": -329.7629699707031, "logps/rejected": -281.57208251953125, "loss": 0.6673, "rewards/accuracies": 0.625, "rewards/chosen": 0.19162462651729584, "rewards/margins": 1.2261087894439697, "rewards/rejected": -1.0344840288162231, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.9592436974789918e-05, "logits/chosen": -2.148016929626465, "logits/rejected": -2.074423313140869, "logps/chosen": -225.41738891601562, "logps/rejected": -238.00477600097656, "loss": 0.2159, "rewards/accuracies": 1.0, "rewards/chosen": 0.8234500885009766, "rewards/margins": 2.332329750061035, "rewards/rejected": -1.5088801383972168, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.9588235294117648e-05, "logits/chosen": -2.361898899078369, "logits/rejected": -2.055607318878174, "logps/chosen": -289.41802978515625, "logps/rejected": -242.832275390625, "loss": 0.2781, "rewards/accuracies": 0.8125, "rewards/chosen": 0.8082376718521118, "rewards/margins": 2.2316887378692627, "rewards/rejected": -1.4234510660171509, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.9584033613445378e-05, "logits/chosen": -2.2593865394592285, "logits/rejected": -1.8688687086105347, "logps/chosen": -333.76416015625, "logps/rejected": -245.79701232910156, "loss": 0.2647, "rewards/accuracies": 0.875, "rewards/chosen": 1.189161777496338, "rewards/margins": 2.233233690261841, "rewards/rejected": -1.044072151184082, "step": 119 }, { "epoch": 0.03, "learning_rate": 1.957983193277311e-05, "logits/chosen": -2.0981814861297607, "logits/rejected": -1.8817328214645386, "logps/chosen": -328.9977722167969, "logps/rejected": -331.95281982421875, "loss": 0.4821, "rewards/accuracies": 0.75, "rewards/chosen": 0.7882914543151855, "rewards/margins": 1.6770789623260498, "rewards/rejected": -0.8887875080108643, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.9575630252100842e-05, "logits/chosen": -2.294989585876465, "logits/rejected": -2.0717599391937256, "logps/chosen": -441.1888427734375, "logps/rejected": -380.23779296875, "loss": 0.3929, "rewards/accuracies": 0.875, "rewards/chosen": 1.201527714729309, "rewards/margins": 1.7749658823013306, "rewards/rejected": -0.5734382271766663, "step": 121 }, { "epoch": 0.03, "learning_rate": 1.9571428571428572e-05, "logits/chosen": -2.1778323650360107, "logits/rejected": -2.0903797149658203, "logps/chosen": -267.32635498046875, "logps/rejected": -308.8648681640625, "loss": 0.2719, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7462610602378845, "rewards/margins": 2.151460647583008, "rewards/rejected": -1.4051997661590576, "step": 122 }, { "epoch": 0.03, "learning_rate": 1.9567226890756303e-05, "logits/chosen": -1.9987823963165283, "logits/rejected": -1.9070273637771606, "logps/chosen": -291.7791442871094, "logps/rejected": -235.40542602539062, "loss": 0.7292, "rewards/accuracies": 0.625, "rewards/chosen": 0.4117959141731262, "rewards/margins": 0.7061718702316284, "rewards/rejected": -0.2943759262561798, "step": 123 }, { "epoch": 0.03, "learning_rate": 1.9563025210084036e-05, "logits/chosen": -2.3301608562469482, "logits/rejected": -2.1610896587371826, "logps/chosen": -312.8262023925781, "logps/rejected": -219.18545532226562, "loss": 0.7426, "rewards/accuracies": 0.625, "rewards/chosen": 0.22050249576568604, "rewards/margins": 1.0287131071090698, "rewards/rejected": -0.8082106113433838, "step": 124 }, { "epoch": 0.03, "learning_rate": 1.9558823529411766e-05, "logits/chosen": -1.8436377048492432, "logits/rejected": -2.0876355171203613, "logps/chosen": -262.98211669921875, "logps/rejected": -287.9124450683594, "loss": 0.4098, "rewards/accuracies": 0.6875, "rewards/chosen": 0.7412158846855164, "rewards/margins": 2.2547714710235596, "rewards/rejected": -1.513555645942688, "step": 125 }, { "epoch": 0.03, "learning_rate": 1.9554621848739497e-05, "logits/chosen": -1.9680488109588623, "logits/rejected": -1.5334417819976807, "logps/chosen": -293.45220947265625, "logps/rejected": -337.33306884765625, "loss": 0.4283, "rewards/accuracies": 0.8125, "rewards/chosen": 0.4352495074272156, "rewards/margins": 1.2213841676712036, "rewards/rejected": -0.786134660243988, "step": 126 }, { "epoch": 0.03, "learning_rate": 1.9550420168067227e-05, "logits/chosen": -2.027254581451416, "logits/rejected": -1.6008858680725098, "logps/chosen": -292.0217590332031, "logps/rejected": -231.9427490234375, "loss": 0.3687, "rewards/accuracies": 1.0, "rewards/chosen": 0.3913624882698059, "rewards/margins": 1.060205340385437, "rewards/rejected": -0.6688427925109863, "step": 127 }, { "epoch": 0.03, "learning_rate": 1.954621848739496e-05, "logits/chosen": -2.1365368366241455, "logits/rejected": -2.2989044189453125, "logps/chosen": -280.9864196777344, "logps/rejected": -294.4695129394531, "loss": 0.6608, "rewards/accuracies": 0.75, "rewards/chosen": 0.39271318912506104, "rewards/margins": 1.1895842552185059, "rewards/rejected": -0.7968710064888, "step": 128 }, { "epoch": 0.03, "learning_rate": 1.954201680672269e-05, "logits/chosen": -2.260463237762451, "logits/rejected": -1.9508836269378662, "logps/chosen": -325.3363952636719, "logps/rejected": -304.3674621582031, "loss": 0.3539, "rewards/accuracies": 0.875, "rewards/chosen": 0.7612236738204956, "rewards/margins": 2.525355339050293, "rewards/rejected": -1.764131784439087, "step": 129 }, { "epoch": 0.03, "learning_rate": 1.953781512605042e-05, "logits/chosen": -2.052269697189331, "logits/rejected": -1.9552733898162842, "logps/chosen": -293.24993896484375, "logps/rejected": -256.47119140625, "loss": 0.5942, "rewards/accuracies": 0.75, "rewards/chosen": 0.9430196285247803, "rewards/margins": 1.513566255569458, "rewards/rejected": -0.5705466866493225, "step": 130 }, { "epoch": 0.03, "learning_rate": 1.953361344537815e-05, "logits/chosen": -2.144200325012207, "logits/rejected": -2.2826175689697266, "logps/chosen": -380.23797607421875, "logps/rejected": -341.62493896484375, "loss": 0.4899, "rewards/accuracies": 0.8125, "rewards/chosen": 0.559058427810669, "rewards/margins": 2.098733901977539, "rewards/rejected": -1.539675235748291, "step": 131 }, { "epoch": 0.03, "learning_rate": 1.9529411764705885e-05, "logits/chosen": -2.0861480236053467, "logits/rejected": -2.084181308746338, "logps/chosen": -374.23980712890625, "logps/rejected": -336.87451171875, "loss": 0.2313, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7874554395675659, "rewards/margins": 2.2659218311309814, "rewards/rejected": -1.478466272354126, "step": 132 }, { "epoch": 0.03, "learning_rate": 1.9525210084033615e-05, "logits/chosen": -2.1682991981506348, "logits/rejected": -1.839908242225647, "logps/chosen": -343.1785888671875, "logps/rejected": -283.29254150390625, "loss": 0.1942, "rewards/accuracies": 0.875, "rewards/chosen": 0.4628217816352844, "rewards/margins": 3.1726136207580566, "rewards/rejected": -2.709791660308838, "step": 133 }, { "epoch": 0.03, "learning_rate": 1.9521008403361345e-05, "logits/chosen": -2.3428895473480225, "logits/rejected": -1.9651906490325928, "logps/chosen": -357.1006164550781, "logps/rejected": -305.46295166015625, "loss": 0.4695, "rewards/accuracies": 0.75, "rewards/chosen": 0.7344120740890503, "rewards/margins": 1.314975619316101, "rewards/rejected": -0.5805636048316956, "step": 134 }, { "epoch": 0.03, "learning_rate": 1.9516806722689075e-05, "logits/chosen": -1.938299536705017, "logits/rejected": -1.843817949295044, "logps/chosen": -298.67462158203125, "logps/rejected": -329.4986267089844, "loss": 0.5497, "rewards/accuracies": 0.5625, "rewards/chosen": 0.06151680648326874, "rewards/margins": 1.017744779586792, "rewards/rejected": -0.9562280178070068, "step": 135 }, { "epoch": 0.03, "learning_rate": 1.951260504201681e-05, "logits/chosen": -2.382833242416382, "logits/rejected": -1.7691450119018555, "logps/chosen": -395.1127014160156, "logps/rejected": -326.5736389160156, "loss": 0.2933, "rewards/accuracies": 0.875, "rewards/chosen": 0.9504792094230652, "rewards/margins": 2.197152614593506, "rewards/rejected": -1.246673345565796, "step": 136 }, { "epoch": 0.03, "learning_rate": 1.950840336134454e-05, "logits/chosen": -2.1813700199127197, "logits/rejected": -1.7723538875579834, "logps/chosen": -415.26708984375, "logps/rejected": -375.0914001464844, "loss": 0.4077, "rewards/accuracies": 0.875, "rewards/chosen": 0.8795511722564697, "rewards/margins": 2.122403621673584, "rewards/rejected": -1.2428523302078247, "step": 137 }, { "epoch": 0.03, "learning_rate": 1.950420168067227e-05, "logits/chosen": -2.014397621154785, "logits/rejected": -1.6715497970581055, "logps/chosen": -155.23806762695312, "logps/rejected": -206.61004638671875, "loss": 0.543, "rewards/accuracies": 0.8125, "rewards/chosen": 0.4505138397216797, "rewards/margins": 1.0151722431182861, "rewards/rejected": -0.5646584033966064, "step": 138 }, { "epoch": 0.03, "learning_rate": 1.95e-05, "logits/chosen": -1.9987766742706299, "logits/rejected": -1.5939929485321045, "logps/chosen": -346.7440490722656, "logps/rejected": -199.17239379882812, "loss": 0.5008, "rewards/accuracies": 0.625, "rewards/chosen": 0.6306272745132446, "rewards/margins": 1.3868716955184937, "rewards/rejected": -0.7562444806098938, "step": 139 }, { "epoch": 0.03, "learning_rate": 1.9495798319327733e-05, "logits/chosen": -2.088118314743042, "logits/rejected": -1.984297752380371, "logps/chosen": -283.76739501953125, "logps/rejected": -404.423095703125, "loss": 0.3662, "rewards/accuracies": 0.875, "rewards/chosen": 0.7562440037727356, "rewards/margins": 1.527753472328186, "rewards/rejected": -0.7715095281600952, "step": 140 }, { "epoch": 0.03, "learning_rate": 1.9491596638655463e-05, "logits/chosen": -1.9548678398132324, "logits/rejected": -1.7473572492599487, "logps/chosen": -307.74554443359375, "logps/rejected": -423.6452331542969, "loss": 0.2522, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5310712456703186, "rewards/margins": 2.706507682800293, "rewards/rejected": -2.175436496734619, "step": 141 }, { "epoch": 0.03, "learning_rate": 1.9487394957983194e-05, "logits/chosen": -1.915533423423767, "logits/rejected": -1.689677119255066, "logps/chosen": -243.7513427734375, "logps/rejected": -244.32139587402344, "loss": 0.4161, "rewards/accuracies": 0.6875, "rewards/chosen": 0.45054006576538086, "rewards/margins": 1.5065770149230957, "rewards/rejected": -1.0560369491577148, "step": 142 }, { "epoch": 0.03, "learning_rate": 1.9483193277310924e-05, "logits/chosen": -1.9550589323043823, "logits/rejected": -2.287087917327881, "logps/chosen": -168.13516235351562, "logps/rejected": -239.2342071533203, "loss": 0.5656, "rewards/accuracies": 0.6875, "rewards/chosen": 0.7052255868911743, "rewards/margins": 1.1860077381134033, "rewards/rejected": -0.4807822108268738, "step": 143 }, { "epoch": 0.03, "learning_rate": 1.9478991596638658e-05, "logits/chosen": -2.001689910888672, "logits/rejected": -1.7274882793426514, "logps/chosen": -241.14906311035156, "logps/rejected": -238.80674743652344, "loss": 0.4332, "rewards/accuracies": 0.75, "rewards/chosen": 0.2465830147266388, "rewards/margins": 1.6028279066085815, "rewards/rejected": -1.3562450408935547, "step": 144 }, { "epoch": 0.03, "learning_rate": 1.9474789915966388e-05, "logits/chosen": -2.160628318786621, "logits/rejected": -2.014204978942871, "logps/chosen": -388.7725830078125, "logps/rejected": -345.66925048828125, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": 1.258589267730713, "rewards/margins": 1.8814054727554321, "rewards/rejected": -0.6228160858154297, "step": 145 }, { "epoch": 0.03, "learning_rate": 1.9470588235294118e-05, "logits/chosen": -1.9197282791137695, "logits/rejected": -1.8468551635742188, "logps/chosen": -345.3627624511719, "logps/rejected": -291.37933349609375, "loss": 0.2447, "rewards/accuracies": 0.8125, "rewards/chosen": 1.1265082359313965, "rewards/margins": 2.682868003845215, "rewards/rejected": -1.5563597679138184, "step": 146 }, { "epoch": 0.03, "learning_rate": 1.946638655462185e-05, "logits/chosen": -2.224470376968384, "logits/rejected": -1.9717036485671997, "logps/chosen": -241.99221801757812, "logps/rejected": -235.1395263671875, "loss": 0.3608, "rewards/accuracies": 0.8125, "rewards/chosen": 0.8303925395011902, "rewards/margins": 1.9417951107025146, "rewards/rejected": -1.1114026308059692, "step": 147 }, { "epoch": 0.03, "learning_rate": 1.9462184873949582e-05, "logits/chosen": -2.1907153129577637, "logits/rejected": -1.4065743684768677, "logps/chosen": -333.9032287597656, "logps/rejected": -222.32427978515625, "loss": 0.1965, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7542409300804138, "rewards/margins": 2.7903554439544678, "rewards/rejected": -2.0361146926879883, "step": 148 }, { "epoch": 0.03, "learning_rate": 1.9457983193277312e-05, "logits/chosen": -2.287984609603882, "logits/rejected": -1.894585132598877, "logps/chosen": -337.87335205078125, "logps/rejected": -346.18927001953125, "loss": 0.1899, "rewards/accuracies": 0.9375, "rewards/chosen": 1.2012064456939697, "rewards/margins": 3.105619430541992, "rewards/rejected": -1.9044129848480225, "step": 149 }, { "epoch": 0.03, "learning_rate": 1.9453781512605042e-05, "logits/chosen": -1.955935001373291, "logits/rejected": -2.054630994796753, "logps/chosen": -302.446533203125, "logps/rejected": -342.5522155761719, "loss": 0.2561, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1727423667907715, "rewards/margins": 2.5674867630004883, "rewards/rejected": -1.3947443962097168, "step": 150 }, { "epoch": 0.03, "learning_rate": 1.9449579831932776e-05, "logits/chosen": -1.795854091644287, "logits/rejected": -1.9701902866363525, "logps/chosen": -198.8837432861328, "logps/rejected": -320.54412841796875, "loss": 0.1421, "rewards/accuracies": 0.9375, "rewards/chosen": 0.45592638850212097, "rewards/margins": 3.1315865516662598, "rewards/rejected": -2.6756601333618164, "step": 151 }, { "epoch": 0.03, "learning_rate": 1.9445378151260506e-05, "logits/chosen": -1.774823546409607, "logits/rejected": -1.9420603513717651, "logps/chosen": -321.3607177734375, "logps/rejected": -369.28729248046875, "loss": 0.211, "rewards/accuracies": 0.9375, "rewards/chosen": 0.32832881808280945, "rewards/margins": 2.673469066619873, "rewards/rejected": -2.345140218734741, "step": 152 }, { "epoch": 0.03, "learning_rate": 1.9441176470588236e-05, "logits/chosen": -1.9179635047912598, "logits/rejected": -1.5516554117202759, "logps/chosen": -260.065185546875, "logps/rejected": -296.73236083984375, "loss": 0.2608, "rewards/accuracies": 0.875, "rewards/chosen": 0.7521446347236633, "rewards/margins": 3.222820997238159, "rewards/rejected": -2.4706761837005615, "step": 153 }, { "epoch": 0.03, "learning_rate": 1.9436974789915967e-05, "logits/chosen": -1.9910225868225098, "logits/rejected": -2.0822975635528564, "logps/chosen": -267.6091003417969, "logps/rejected": -328.9667663574219, "loss": 0.2232, "rewards/accuracies": 0.875, "rewards/chosen": 0.23971346020698547, "rewards/margins": 3.2980337142944336, "rewards/rejected": -3.0583202838897705, "step": 154 }, { "epoch": 0.03, "learning_rate": 1.94327731092437e-05, "logits/chosen": -2.140634536743164, "logits/rejected": -1.8932344913482666, "logps/chosen": -377.14013671875, "logps/rejected": -335.8614501953125, "loss": 0.4596, "rewards/accuracies": 0.75, "rewards/chosen": -0.05733342468738556, "rewards/margins": 1.7537956237792969, "rewards/rejected": -1.811129093170166, "step": 155 }, { "epoch": 0.03, "learning_rate": 1.942857142857143e-05, "logits/chosen": -2.20592999458313, "logits/rejected": -1.8383092880249023, "logps/chosen": -354.6993103027344, "logps/rejected": -219.24322509765625, "loss": 0.5416, "rewards/accuracies": 0.75, "rewards/chosen": -0.15585672855377197, "rewards/margins": 1.7308855056762695, "rewards/rejected": -1.8867424726486206, "step": 156 }, { "epoch": 0.03, "learning_rate": 1.942436974789916e-05, "logits/chosen": -2.0593392848968506, "logits/rejected": -1.82268226146698, "logps/chosen": -459.42303466796875, "logps/rejected": -314.2120056152344, "loss": 0.1246, "rewards/accuracies": 1.0, "rewards/chosen": 1.0004568099975586, "rewards/margins": 2.7035768032073975, "rewards/rejected": -1.7031197547912598, "step": 157 }, { "epoch": 0.03, "learning_rate": 1.942016806722689e-05, "logits/chosen": -2.0842440128326416, "logits/rejected": -1.7106075286865234, "logps/chosen": -301.0412902832031, "logps/rejected": -318.5955810546875, "loss": 0.1775, "rewards/accuracies": 1.0, "rewards/chosen": 0.38655221462249756, "rewards/margins": 2.980933904647827, "rewards/rejected": -2.59438157081604, "step": 158 }, { "epoch": 0.03, "learning_rate": 1.9415966386554624e-05, "logits/chosen": -2.2098166942596436, "logits/rejected": -2.253511905670166, "logps/chosen": -233.94956970214844, "logps/rejected": -315.84912109375, "loss": 0.2124, "rewards/accuracies": 0.9375, "rewards/chosen": 0.35227689146995544, "rewards/margins": 3.8535265922546387, "rewards/rejected": -3.5012497901916504, "step": 159 }, { "epoch": 0.03, "learning_rate": 1.9411764705882355e-05, "logits/chosen": -2.0606324672698975, "logits/rejected": -2.2401208877563477, "logps/chosen": -283.51531982421875, "logps/rejected": -335.6744079589844, "loss": 0.3731, "rewards/accuracies": 0.8125, "rewards/chosen": 0.30113106966018677, "rewards/margins": 2.1955299377441406, "rewards/rejected": -1.8943989276885986, "step": 160 }, { "epoch": 0.03, "learning_rate": 1.9407563025210085e-05, "logits/chosen": -2.006823778152466, "logits/rejected": -2.03440523147583, "logps/chosen": -301.72845458984375, "logps/rejected": -300.8660888671875, "loss": 0.3647, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05930629372596741, "rewards/margins": 1.845715045928955, "rewards/rejected": -1.9050211906433105, "step": 161 }, { "epoch": 0.03, "learning_rate": 1.9403361344537815e-05, "logits/chosen": -2.2592272758483887, "logits/rejected": -1.4900158643722534, "logps/chosen": -349.850341796875, "logps/rejected": -237.9735870361328, "loss": 0.4265, "rewards/accuracies": 0.75, "rewards/chosen": -0.4505152106285095, "rewards/margins": 2.180917739868164, "rewards/rejected": -2.6314330101013184, "step": 162 }, { "epoch": 0.03, "learning_rate": 1.939915966386555e-05, "logits/chosen": -2.138091802597046, "logits/rejected": -2.0000557899475098, "logps/chosen": -337.5413818359375, "logps/rejected": -281.70013427734375, "loss": 0.2175, "rewards/accuracies": 0.875, "rewards/chosen": 0.1180100366473198, "rewards/margins": 2.639892578125, "rewards/rejected": -2.5218825340270996, "step": 163 }, { "epoch": 0.03, "learning_rate": 1.939495798319328e-05, "logits/chosen": -2.351864814758301, "logits/rejected": -1.7518774271011353, "logps/chosen": -305.2463073730469, "logps/rejected": -265.68487548828125, "loss": 0.5453, "rewards/accuracies": 0.6875, "rewards/chosen": 0.15149277448654175, "rewards/margins": 1.4267189502716064, "rewards/rejected": -1.27522611618042, "step": 164 }, { "epoch": 0.03, "learning_rate": 1.939075630252101e-05, "logits/chosen": -2.159843683242798, "logits/rejected": -1.8581840991973877, "logps/chosen": -313.42755126953125, "logps/rejected": -305.45281982421875, "loss": 0.275, "rewards/accuracies": 0.75, "rewards/chosen": 0.12428408861160278, "rewards/margins": 3.574671506881714, "rewards/rejected": -3.4503870010375977, "step": 165 }, { "epoch": 0.03, "learning_rate": 1.9386554621848743e-05, "logits/chosen": -2.1018590927124023, "logits/rejected": -1.6583375930786133, "logps/chosen": -251.76638793945312, "logps/rejected": -222.0404052734375, "loss": 0.2708, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7818334698677063, "rewards/margins": 3.106484889984131, "rewards/rejected": -2.3246514797210693, "step": 166 }, { "epoch": 0.03, "learning_rate": 1.9382352941176473e-05, "logits/chosen": -2.204774856567383, "logits/rejected": -2.085190773010254, "logps/chosen": -310.175537109375, "logps/rejected": -311.9328308105469, "loss": 0.5247, "rewards/accuracies": 0.75, "rewards/chosen": 0.6363217830657959, "rewards/margins": 2.2235963344573975, "rewards/rejected": -1.5872745513916016, "step": 167 }, { "epoch": 0.04, "learning_rate": 1.9378151260504203e-05, "logits/chosen": -2.0087029933929443, "logits/rejected": -1.9225444793701172, "logps/chosen": -357.4611511230469, "logps/rejected": -365.505615234375, "loss": 0.2371, "rewards/accuracies": 0.8125, "rewards/chosen": 0.7516674995422363, "rewards/margins": 2.9219133853912354, "rewards/rejected": -2.170245885848999, "step": 168 }, { "epoch": 0.04, "learning_rate": 1.9373949579831933e-05, "logits/chosen": -2.308570623397827, "logits/rejected": -1.8169952630996704, "logps/chosen": -402.0672912597656, "logps/rejected": -297.09906005859375, "loss": 0.3505, "rewards/accuracies": 0.8125, "rewards/chosen": 0.7763336896896362, "rewards/margins": 3.253223419189453, "rewards/rejected": -2.4768898487091064, "step": 169 }, { "epoch": 0.04, "learning_rate": 1.9369747899159667e-05, "logits/chosen": -1.8160821199417114, "logits/rejected": -2.0576789379119873, "logps/chosen": -231.24964904785156, "logps/rejected": -230.78431701660156, "loss": 0.4406, "rewards/accuracies": 0.8125, "rewards/chosen": 0.29919058084487915, "rewards/margins": 1.8738502264022827, "rewards/rejected": -1.5746595859527588, "step": 170 }, { "epoch": 0.04, "learning_rate": 1.9365546218487397e-05, "logits/chosen": -2.0465896129608154, "logits/rejected": -1.9130158424377441, "logps/chosen": -308.604736328125, "logps/rejected": -290.31341552734375, "loss": 0.4036, "rewards/accuracies": 0.875, "rewards/chosen": -0.19760461151599884, "rewards/margins": 2.215013265609741, "rewards/rejected": -2.4126176834106445, "step": 171 }, { "epoch": 0.04, "learning_rate": 1.9361344537815127e-05, "logits/chosen": -2.220503568649292, "logits/rejected": -1.9116710424423218, "logps/chosen": -374.86773681640625, "logps/rejected": -367.9362487792969, "loss": 0.7568, "rewards/accuracies": 0.625, "rewards/chosen": -0.501358151435852, "rewards/margins": 1.8529269695281982, "rewards/rejected": -2.3542847633361816, "step": 172 }, { "epoch": 0.04, "learning_rate": 1.9357142857142858e-05, "logits/chosen": -2.1519575119018555, "logits/rejected": -2.108215808868408, "logps/chosen": -273.619873046875, "logps/rejected": -299.34429931640625, "loss": 0.2548, "rewards/accuracies": 0.9375, "rewards/chosen": 0.659705400466919, "rewards/margins": 2.3766002655029297, "rewards/rejected": -1.7168949842453003, "step": 173 }, { "epoch": 0.04, "learning_rate": 1.935294117647059e-05, "logits/chosen": -2.099363327026367, "logits/rejected": -1.8638235330581665, "logps/chosen": -266.3851013183594, "logps/rejected": -314.7110595703125, "loss": 0.36, "rewards/accuracies": 0.8125, "rewards/chosen": -0.42388927936553955, "rewards/margins": 2.994227886199951, "rewards/rejected": -3.4181172847747803, "step": 174 }, { "epoch": 0.04, "learning_rate": 1.934873949579832e-05, "logits/chosen": -2.004194498062134, "logits/rejected": -2.1080429553985596, "logps/chosen": -271.04913330078125, "logps/rejected": -303.30810546875, "loss": 0.427, "rewards/accuracies": 0.75, "rewards/chosen": 0.09644299745559692, "rewards/margins": 1.8110917806625366, "rewards/rejected": -1.714648723602295, "step": 175 }, { "epoch": 0.04, "learning_rate": 1.9344537815126052e-05, "logits/chosen": -1.9271230697631836, "logits/rejected": -1.4533002376556396, "logps/chosen": -379.56158447265625, "logps/rejected": -251.24574279785156, "loss": 0.3061, "rewards/accuracies": 0.8125, "rewards/chosen": 0.7267810106277466, "rewards/margins": 1.7747788429260254, "rewards/rejected": -1.0479978322982788, "step": 176 }, { "epoch": 0.04, "learning_rate": 1.9340336134453782e-05, "logits/chosen": -1.7387003898620605, "logits/rejected": -1.6673742532730103, "logps/chosen": -319.44677734375, "logps/rejected": -328.6859130859375, "loss": 0.2548, "rewards/accuracies": 0.9375, "rewards/chosen": 0.13148075342178345, "rewards/margins": 2.836094856262207, "rewards/rejected": -2.7046141624450684, "step": 177 }, { "epoch": 0.04, "learning_rate": 1.9336134453781516e-05, "logits/chosen": -2.1485331058502197, "logits/rejected": -1.7516132593154907, "logps/chosen": -312.14935302734375, "logps/rejected": -263.0650634765625, "loss": 0.3042, "rewards/accuracies": 0.8125, "rewards/chosen": 0.6039855480194092, "rewards/margins": 2.0820467472076416, "rewards/rejected": -1.4780610799789429, "step": 178 }, { "epoch": 0.04, "learning_rate": 1.9331932773109246e-05, "logits/chosen": -2.356072425842285, "logits/rejected": -1.7140439748764038, "logps/chosen": -474.1767272949219, "logps/rejected": -322.3995056152344, "loss": 0.481, "rewards/accuracies": 0.75, "rewards/chosen": 1.0852775573730469, "rewards/margins": 2.305659770965576, "rewards/rejected": -1.2203824520111084, "step": 179 }, { "epoch": 0.04, "learning_rate": 1.9327731092436976e-05, "logits/chosen": -2.0827455520629883, "logits/rejected": -1.9543653726577759, "logps/chosen": -329.56024169921875, "logps/rejected": -366.47454833984375, "loss": 0.3702, "rewards/accuracies": 0.875, "rewards/chosen": 0.2157536894083023, "rewards/margins": 2.586594343185425, "rewards/rejected": -2.370840549468994, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.9323529411764706e-05, "logits/chosen": -2.1309521198272705, "logits/rejected": -2.09285044670105, "logps/chosen": -197.16836547851562, "logps/rejected": -216.26132202148438, "loss": 0.4309, "rewards/accuracies": 0.75, "rewards/chosen": 0.6182193160057068, "rewards/margins": 1.49525785446167, "rewards/rejected": -0.8770386576652527, "step": 181 }, { "epoch": 0.04, "learning_rate": 1.931932773109244e-05, "logits/chosen": -1.9410741329193115, "logits/rejected": -2.0862960815429688, "logps/chosen": -191.31874084472656, "logps/rejected": -267.4620056152344, "loss": 0.5695, "rewards/accuracies": 0.75, "rewards/chosen": -0.24719244241714478, "rewards/margins": 2.0659613609313965, "rewards/rejected": -2.3131542205810547, "step": 182 }, { "epoch": 0.04, "learning_rate": 1.931512605042017e-05, "logits/chosen": -1.8501081466674805, "logits/rejected": -1.988316535949707, "logps/chosen": -253.83676147460938, "logps/rejected": -322.62493896484375, "loss": 0.8646, "rewards/accuracies": 0.6875, "rewards/chosen": 0.08060411363840103, "rewards/margins": 0.7999259829521179, "rewards/rejected": -0.7193217873573303, "step": 183 }, { "epoch": 0.04, "learning_rate": 1.93109243697479e-05, "logits/chosen": -2.361492872238159, "logits/rejected": -1.6896589994430542, "logps/chosen": -373.93548583984375, "logps/rejected": -351.4352722167969, "loss": 0.2221, "rewards/accuracies": 0.9375, "rewards/chosen": 1.030282974243164, "rewards/margins": 3.338300943374634, "rewards/rejected": -2.3080177307128906, "step": 184 }, { "epoch": 0.04, "learning_rate": 1.930672268907563e-05, "logits/chosen": -1.9699373245239258, "logits/rejected": -1.8167184591293335, "logps/chosen": -260.5339050292969, "logps/rejected": -273.19976806640625, "loss": 0.4921, "rewards/accuracies": 0.75, "rewards/chosen": 0.6234461665153503, "rewards/margins": 1.8294168710708618, "rewards/rejected": -1.2059706449508667, "step": 185 }, { "epoch": 0.04, "learning_rate": 1.9302521008403364e-05, "logits/chosen": -2.2624993324279785, "logits/rejected": -2.042703628540039, "logps/chosen": -354.2139892578125, "logps/rejected": -319.7133483886719, "loss": 0.5529, "rewards/accuracies": 0.875, "rewards/chosen": 0.5018466711044312, "rewards/margins": 2.1002578735351562, "rewards/rejected": -1.598411202430725, "step": 186 }, { "epoch": 0.04, "learning_rate": 1.9298319327731094e-05, "logits/chosen": -1.943152904510498, "logits/rejected": -2.1798253059387207, "logps/chosen": -260.026611328125, "logps/rejected": -357.4902648925781, "loss": 0.4278, "rewards/accuracies": 0.8125, "rewards/chosen": 0.5993878245353699, "rewards/margins": 2.099064826965332, "rewards/rejected": -1.4996771812438965, "step": 187 }, { "epoch": 0.04, "learning_rate": 1.9294117647058825e-05, "logits/chosen": -2.321877956390381, "logits/rejected": -1.9957356452941895, "logps/chosen": -267.6776123046875, "logps/rejected": -274.17572021484375, "loss": 0.2828, "rewards/accuracies": 0.8125, "rewards/chosen": 0.015132449567317963, "rewards/margins": 2.1086621284484863, "rewards/rejected": -2.09352970123291, "step": 188 }, { "epoch": 0.04, "learning_rate": 1.9289915966386558e-05, "logits/chosen": -2.2177798748016357, "logits/rejected": -1.7982139587402344, "logps/chosen": -389.71221923828125, "logps/rejected": -341.09716796875, "loss": 0.3212, "rewards/accuracies": 0.8125, "rewards/chosen": 0.5387109518051147, "rewards/margins": 2.2304818630218506, "rewards/rejected": -1.6917710304260254, "step": 189 }, { "epoch": 0.04, "learning_rate": 1.928571428571429e-05, "logits/chosen": -2.0826494693756104, "logits/rejected": -1.9526170492172241, "logps/chosen": -244.26217651367188, "logps/rejected": -226.49497985839844, "loss": 0.686, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2411191165447235, "rewards/margins": 1.1531486511230469, "rewards/rejected": -1.3942676782608032, "step": 190 }, { "epoch": 0.04, "learning_rate": 1.928151260504202e-05, "logits/chosen": -2.1943392753601074, "logits/rejected": -2.0047335624694824, "logps/chosen": -322.46368408203125, "logps/rejected": -266.3966064453125, "loss": 0.4314, "rewards/accuracies": 0.75, "rewards/chosen": 0.7097187638282776, "rewards/margins": 1.5068656206130981, "rewards/rejected": -0.7971468567848206, "step": 191 }, { "epoch": 0.04, "learning_rate": 1.927731092436975e-05, "logits/chosen": -2.4108781814575195, "logits/rejected": -2.2714500427246094, "logps/chosen": -239.07113647460938, "logps/rejected": -287.70806884765625, "loss": 0.265, "rewards/accuracies": 0.9375, "rewards/chosen": 0.4333779811859131, "rewards/margins": 2.222304344177246, "rewards/rejected": -1.788926362991333, "step": 192 }, { "epoch": 0.04, "learning_rate": 1.9273109243697482e-05, "logits/chosen": -2.0882983207702637, "logits/rejected": -1.599724531173706, "logps/chosen": -306.90264892578125, "logps/rejected": -306.5920104980469, "loss": 0.4409, "rewards/accuracies": 0.75, "rewards/chosen": 0.35444700717926025, "rewards/margins": 3.6877753734588623, "rewards/rejected": -3.3333282470703125, "step": 193 }, { "epoch": 0.04, "learning_rate": 1.9268907563025213e-05, "logits/chosen": -1.5826417207717896, "logits/rejected": -1.6908752918243408, "logps/chosen": -213.50213623046875, "logps/rejected": -323.8135986328125, "loss": 0.2619, "rewards/accuracies": 0.875, "rewards/chosen": 0.4410989284515381, "rewards/margins": 3.3193366527557373, "rewards/rejected": -2.878237724304199, "step": 194 }, { "epoch": 0.04, "learning_rate": 1.9264705882352943e-05, "logits/chosen": -2.1554768085479736, "logits/rejected": -1.9104681015014648, "logps/chosen": -263.67022705078125, "logps/rejected": -263.89251708984375, "loss": 0.3077, "rewards/accuracies": 0.875, "rewards/chosen": 0.4525309205055237, "rewards/margins": 2.7871453762054443, "rewards/rejected": -2.3346142768859863, "step": 195 }, { "epoch": 0.04, "learning_rate": 1.9260504201680673e-05, "logits/chosen": -1.8429673910140991, "logits/rejected": -1.953737735748291, "logps/chosen": -217.81649780273438, "logps/rejected": -239.5341339111328, "loss": 0.0909, "rewards/accuracies": 1.0, "rewards/chosen": 0.3554931581020355, "rewards/margins": 3.4962148666381836, "rewards/rejected": -3.1407217979431152, "step": 196 }, { "epoch": 0.04, "learning_rate": 1.9256302521008407e-05, "logits/chosen": -2.090711832046509, "logits/rejected": -1.7367603778839111, "logps/chosen": -295.37701416015625, "logps/rejected": -233.35989379882812, "loss": 0.5899, "rewards/accuracies": 0.75, "rewards/chosen": 0.28794872760772705, "rewards/margins": 1.9939861297607422, "rewards/rejected": -1.7060374021530151, "step": 197 }, { "epoch": 0.04, "learning_rate": 1.9252100840336137e-05, "logits/chosen": -2.216714859008789, "logits/rejected": -1.3142722845077515, "logps/chosen": -379.0946350097656, "logps/rejected": -216.58425903320312, "loss": 0.5711, "rewards/accuracies": 0.75, "rewards/chosen": 0.676600456237793, "rewards/margins": 1.7255703210830688, "rewards/rejected": -1.0489698648452759, "step": 198 }, { "epoch": 0.04, "learning_rate": 1.9247899159663867e-05, "logits/chosen": -1.886040210723877, "logits/rejected": -2.2005977630615234, "logps/chosen": -220.26943969726562, "logps/rejected": -333.1991271972656, "loss": 0.4988, "rewards/accuracies": 0.8125, "rewards/chosen": 0.5622369050979614, "rewards/margins": 2.4243102073669434, "rewards/rejected": -1.862073302268982, "step": 199 }, { "epoch": 0.04, "learning_rate": 1.9243697478991597e-05, "logits/chosen": -2.152661085128784, "logits/rejected": -1.7738873958587646, "logps/chosen": -231.73829650878906, "logps/rejected": -238.894287109375, "loss": 0.6994, "rewards/accuracies": 0.8125, "rewards/chosen": 0.24995756149291992, "rewards/margins": 1.5514318943023682, "rewards/rejected": -1.3014743328094482, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.923949579831933e-05, "logits/chosen": -1.9182398319244385, "logits/rejected": -1.8288938999176025, "logps/chosen": -412.51019287109375, "logps/rejected": -310.742919921875, "loss": 0.5176, "rewards/accuracies": 0.75, "rewards/chosen": 0.2572101950645447, "rewards/margins": 2.264309883117676, "rewards/rejected": -2.0070996284484863, "step": 201 }, { "epoch": 0.04, "learning_rate": 1.923529411764706e-05, "logits/chosen": -2.0700290203094482, "logits/rejected": -1.8612308502197266, "logps/chosen": -325.8376159667969, "logps/rejected": -348.48760986328125, "loss": 0.2201, "rewards/accuracies": 0.875, "rewards/chosen": 0.3816840350627899, "rewards/margins": 2.883958578109741, "rewards/rejected": -2.502274513244629, "step": 202 }, { "epoch": 0.04, "learning_rate": 1.923109243697479e-05, "logits/chosen": -2.0668513774871826, "logits/rejected": -1.7440872192382812, "logps/chosen": -304.94110107421875, "logps/rejected": -294.171875, "loss": 0.6027, "rewards/accuracies": 0.75, "rewards/chosen": 0.6673010587692261, "rewards/margins": 2.2133984565734863, "rewards/rejected": -1.5460972785949707, "step": 203 }, { "epoch": 0.04, "learning_rate": 1.922689075630252e-05, "logits/chosen": -2.1958768367767334, "logits/rejected": -1.6824707984924316, "logps/chosen": -287.90673828125, "logps/rejected": -251.39566040039062, "loss": 0.5216, "rewards/accuracies": 0.75, "rewards/chosen": 1.2339067459106445, "rewards/margins": 1.9839694499969482, "rewards/rejected": -0.7500627040863037, "step": 204 }, { "epoch": 0.04, "learning_rate": 1.9222689075630255e-05, "logits/chosen": -1.7814674377441406, "logits/rejected": -1.9055691957473755, "logps/chosen": -212.4736328125, "logps/rejected": -259.2861022949219, "loss": 0.5105, "rewards/accuracies": 0.6875, "rewards/chosen": 0.6588839888572693, "rewards/margins": 1.3305463790893555, "rewards/rejected": -0.6716625094413757, "step": 205 }, { "epoch": 0.04, "learning_rate": 1.9218487394957985e-05, "logits/chosen": -2.300837278366089, "logits/rejected": -1.6157662868499756, "logps/chosen": -377.23388671875, "logps/rejected": -244.01519775390625, "loss": 0.5409, "rewards/accuracies": 0.625, "rewards/chosen": 1.2487529516220093, "rewards/margins": 1.6235289573669434, "rewards/rejected": -0.3747760057449341, "step": 206 }, { "epoch": 0.04, "learning_rate": 1.9214285714285716e-05, "logits/chosen": -2.1403493881225586, "logits/rejected": -1.8367571830749512, "logps/chosen": -359.39410400390625, "logps/rejected": -320.2593994140625, "loss": 0.2947, "rewards/accuracies": 0.875, "rewards/chosen": 0.9880273938179016, "rewards/margins": 2.1673243045806885, "rewards/rejected": -1.1792969703674316, "step": 207 }, { "epoch": 0.04, "learning_rate": 1.9210084033613446e-05, "logits/chosen": -2.1622567176818848, "logits/rejected": -1.9735891819000244, "logps/chosen": -348.40472412109375, "logps/rejected": -389.960205078125, "loss": 0.5641, "rewards/accuracies": 0.75, "rewards/chosen": 0.7891466617584229, "rewards/margins": 1.1746538877487183, "rewards/rejected": -0.38550716638565063, "step": 208 }, { "epoch": 0.04, "learning_rate": 1.920588235294118e-05, "logits/chosen": -2.1938869953155518, "logits/rejected": -1.7905843257904053, "logps/chosen": -353.5918273925781, "logps/rejected": -348.0268859863281, "loss": 0.5327, "rewards/accuracies": 0.6875, "rewards/chosen": 1.1191864013671875, "rewards/margins": 1.3848307132720947, "rewards/rejected": -0.26564449071884155, "step": 209 }, { "epoch": 0.04, "learning_rate": 1.920168067226891e-05, "logits/chosen": -2.2892205715179443, "logits/rejected": -2.0363335609436035, "logps/chosen": -300.6004638671875, "logps/rejected": -274.6438903808594, "loss": 0.2904, "rewards/accuracies": 0.8125, "rewards/chosen": 0.7398238182067871, "rewards/margins": 1.7600440979003906, "rewards/rejected": -1.020220160484314, "step": 210 }, { "epoch": 0.04, "learning_rate": 1.919747899159664e-05, "logits/chosen": -2.097750186920166, "logits/rejected": -1.3871397972106934, "logps/chosen": -276.9554443359375, "logps/rejected": -232.28392028808594, "loss": 0.2275, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6810877323150635, "rewards/margins": 2.238926887512207, "rewards/rejected": -1.5578391551971436, "step": 211 }, { "epoch": 0.04, "learning_rate": 1.9193277310924374e-05, "logits/chosen": -2.3199985027313232, "logits/rejected": -1.8059470653533936, "logps/chosen": -392.4477233886719, "logps/rejected": -366.4036560058594, "loss": 0.2247, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8302945494651794, "rewards/margins": 2.384056806564331, "rewards/rejected": -1.5537623167037964, "step": 212 }, { "epoch": 0.04, "learning_rate": 1.9189075630252104e-05, "logits/chosen": -1.8848183155059814, "logits/rejected": -1.9005846977233887, "logps/chosen": -222.84994506835938, "logps/rejected": -350.2418518066406, "loss": 0.5068, "rewards/accuracies": 0.6875, "rewards/chosen": 0.389511376619339, "rewards/margins": 1.5237209796905518, "rewards/rejected": -1.1342095136642456, "step": 213 }, { "epoch": 0.04, "learning_rate": 1.9184873949579834e-05, "logits/chosen": -1.7811338901519775, "logits/rejected": -1.7196321487426758, "logps/chosen": -317.6466979980469, "logps/rejected": -279.0945739746094, "loss": 0.2893, "rewards/accuracies": 0.8125, "rewards/chosen": 0.8474152088165283, "rewards/margins": 1.9977939128875732, "rewards/rejected": -1.1503784656524658, "step": 214 }, { "epoch": 0.04, "learning_rate": 1.9180672268907564e-05, "logits/chosen": -2.217418909072876, "logits/rejected": -2.235569715499878, "logps/chosen": -209.42669677734375, "logps/rejected": -303.7484436035156, "loss": 0.3552, "rewards/accuracies": 0.8125, "rewards/chosen": 0.4288293719291687, "rewards/margins": 1.933406114578247, "rewards/rejected": -1.5045769214630127, "step": 215 }, { "epoch": 0.05, "learning_rate": 1.9176470588235298e-05, "logits/chosen": -2.2915008068084717, "logits/rejected": -2.000013828277588, "logps/chosen": -305.03656005859375, "logps/rejected": -240.19345092773438, "loss": 0.6803, "rewards/accuracies": 0.6875, "rewards/chosen": -0.02194342017173767, "rewards/margins": 1.5948190689086914, "rewards/rejected": -1.6167625188827515, "step": 216 }, { "epoch": 0.05, "learning_rate": 1.9172268907563028e-05, "logits/chosen": -2.33363676071167, "logits/rejected": -2.2526702880859375, "logps/chosen": -283.939697265625, "logps/rejected": -242.2597198486328, "loss": 0.3365, "rewards/accuracies": 0.8125, "rewards/chosen": 0.585136890411377, "rewards/margins": 2.0768470764160156, "rewards/rejected": -1.4917103052139282, "step": 217 }, { "epoch": 0.05, "learning_rate": 1.9168067226890758e-05, "logits/chosen": -1.9670711755752563, "logits/rejected": -1.3943191766738892, "logps/chosen": -300.0361328125, "logps/rejected": -274.5738525390625, "loss": 0.4034, "rewards/accuracies": 0.875, "rewards/chosen": 0.20370739698410034, "rewards/margins": 1.436436653137207, "rewards/rejected": -1.232729196548462, "step": 218 }, { "epoch": 0.05, "learning_rate": 1.916386554621849e-05, "logits/chosen": -1.8724455833435059, "logits/rejected": -1.7746968269348145, "logps/chosen": -256.06634521484375, "logps/rejected": -269.5330810546875, "loss": 0.3588, "rewards/accuracies": 0.8125, "rewards/chosen": 0.3698304295539856, "rewards/margins": 1.6467514038085938, "rewards/rejected": -1.2769211530685425, "step": 219 }, { "epoch": 0.05, "learning_rate": 1.9159663865546222e-05, "logits/chosen": -1.8914530277252197, "logits/rejected": -1.9324146509170532, "logps/chosen": -282.6627502441406, "logps/rejected": -291.2701721191406, "loss": 0.1739, "rewards/accuracies": 1.0, "rewards/chosen": 0.34912729263305664, "rewards/margins": 3.305083751678467, "rewards/rejected": -2.95595645904541, "step": 220 }, { "epoch": 0.05, "learning_rate": 1.9155462184873952e-05, "logits/chosen": -2.0394859313964844, "logits/rejected": -1.800555944442749, "logps/chosen": -289.435791015625, "logps/rejected": -235.32615661621094, "loss": 0.5441, "rewards/accuracies": 0.6875, "rewards/chosen": 0.4788973033428192, "rewards/margins": 1.236483097076416, "rewards/rejected": -0.7575857639312744, "step": 221 }, { "epoch": 0.05, "learning_rate": 1.9151260504201683e-05, "logits/chosen": -2.2122366428375244, "logits/rejected": -2.196902275085449, "logps/chosen": -299.8267517089844, "logps/rejected": -268.9872741699219, "loss": 0.4434, "rewards/accuracies": 0.75, "rewards/chosen": 0.1474180668592453, "rewards/margins": 1.3896268606185913, "rewards/rejected": -1.24220871925354, "step": 222 }, { "epoch": 0.05, "learning_rate": 1.9147058823529413e-05, "logits/chosen": -2.2167115211486816, "logits/rejected": -2.136651039123535, "logps/chosen": -445.08013916015625, "logps/rejected": -368.6702880859375, "loss": 0.271, "rewards/accuracies": 0.875, "rewards/chosen": 0.4029645323753357, "rewards/margins": 2.2742044925689697, "rewards/rejected": -1.8712400197982788, "step": 223 }, { "epoch": 0.05, "learning_rate": 1.9142857142857146e-05, "logits/chosen": -2.292498826980591, "logits/rejected": -1.9991729259490967, "logps/chosen": -290.1639404296875, "logps/rejected": -351.75665283203125, "loss": 0.176, "rewards/accuracies": 1.0, "rewards/chosen": 0.6641996502876282, "rewards/margins": 3.4761962890625, "rewards/rejected": -2.8119964599609375, "step": 224 }, { "epoch": 0.05, "learning_rate": 1.9138655462184877e-05, "logits/chosen": -2.0794904232025146, "logits/rejected": -2.17048978805542, "logps/chosen": -167.24432373046875, "logps/rejected": -274.07122802734375, "loss": 0.204, "rewards/accuracies": 0.875, "rewards/chosen": -0.022808387875556946, "rewards/margins": 3.534047842025757, "rewards/rejected": -3.556856155395508, "step": 225 }, { "epoch": 0.05, "learning_rate": 1.9134453781512607e-05, "logits/chosen": -1.8843648433685303, "logits/rejected": -2.0126261711120605, "logps/chosen": -180.7335968017578, "logps/rejected": -201.5263671875, "loss": 0.4142, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1616390347480774, "rewards/margins": 1.5557749271392822, "rewards/rejected": -1.7174140214920044, "step": 226 }, { "epoch": 0.05, "learning_rate": 1.9130252100840337e-05, "logits/chosen": -1.9826205968856812, "logits/rejected": -1.7967023849487305, "logps/chosen": -349.6240234375, "logps/rejected": -337.2738037109375, "loss": 0.2759, "rewards/accuracies": 0.8125, "rewards/chosen": 0.321922242641449, "rewards/margins": 2.146087646484375, "rewards/rejected": -1.8241654634475708, "step": 227 }, { "epoch": 0.05, "learning_rate": 1.912605042016807e-05, "logits/chosen": -2.325867176055908, "logits/rejected": -2.057299852371216, "logps/chosen": -391.8019714355469, "logps/rejected": -323.2319641113281, "loss": 0.1588, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3355216681957245, "rewards/margins": 3.1053762435913086, "rewards/rejected": -2.7698545455932617, "step": 228 }, { "epoch": 0.05, "learning_rate": 1.91218487394958e-05, "logits/chosen": -2.1116814613342285, "logits/rejected": -1.9139583110809326, "logps/chosen": -378.53472900390625, "logps/rejected": -291.7130126953125, "loss": 0.6155, "rewards/accuracies": 0.75, "rewards/chosen": -0.35140061378479004, "rewards/margins": 2.5469369888305664, "rewards/rejected": -2.8983376026153564, "step": 229 }, { "epoch": 0.05, "learning_rate": 1.911764705882353e-05, "logits/chosen": -2.312340259552002, "logits/rejected": -2.018899440765381, "logps/chosen": -227.06011962890625, "logps/rejected": -285.7034606933594, "loss": 0.4459, "rewards/accuracies": 0.6875, "rewards/chosen": -0.29365068674087524, "rewards/margins": 2.335214614868164, "rewards/rejected": -2.6288652420043945, "step": 230 }, { "epoch": 0.05, "learning_rate": 1.911344537815126e-05, "logits/chosen": -2.029402017593384, "logits/rejected": -1.6517996788024902, "logps/chosen": -214.90447998046875, "logps/rejected": -220.51390075683594, "loss": 0.1437, "rewards/accuracies": 1.0, "rewards/chosen": 0.18353751301765442, "rewards/margins": 3.010864496231079, "rewards/rejected": -2.827326774597168, "step": 231 }, { "epoch": 0.05, "learning_rate": 1.9109243697478995e-05, "logits/chosen": -2.1900229454040527, "logits/rejected": -2.079422950744629, "logps/chosen": -266.41741943359375, "logps/rejected": -246.25958251953125, "loss": 0.2645, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3034582734107971, "rewards/margins": 3.1612443923950195, "rewards/rejected": -3.46470308303833, "step": 232 }, { "epoch": 0.05, "learning_rate": 1.9105042016806725e-05, "logits/chosen": -2.1312716007232666, "logits/rejected": -2.0581488609313965, "logps/chosen": -330.78875732421875, "logps/rejected": -368.5685119628906, "loss": 0.4995, "rewards/accuracies": 0.875, "rewards/chosen": 0.09492186456918716, "rewards/margins": 3.064556121826172, "rewards/rejected": -2.9696342945098877, "step": 233 }, { "epoch": 0.05, "learning_rate": 1.9100840336134455e-05, "logits/chosen": -2.068007469177246, "logits/rejected": -1.6838291883468628, "logps/chosen": -257.7823181152344, "logps/rejected": -202.9130859375, "loss": 1.0677, "rewards/accuracies": 0.5625, "rewards/chosen": -0.6025550365447998, "rewards/margins": 0.6695663928985596, "rewards/rejected": -1.272121548652649, "step": 234 }, { "epoch": 0.05, "learning_rate": 1.909663865546219e-05, "logits/chosen": -1.8926564455032349, "logits/rejected": -1.7910258769989014, "logps/chosen": -267.2583312988281, "logps/rejected": -335.667236328125, "loss": 0.1903, "rewards/accuracies": 0.9375, "rewards/chosen": -0.09820468723773956, "rewards/margins": 2.3982319831848145, "rewards/rejected": -2.496436595916748, "step": 235 }, { "epoch": 0.05, "learning_rate": 1.909243697478992e-05, "logits/chosen": -2.1656434535980225, "logits/rejected": -1.5117740631103516, "logps/chosen": -323.4513854980469, "logps/rejected": -210.25413513183594, "loss": 0.2304, "rewards/accuracies": 0.8125, "rewards/chosen": 0.12024146318435669, "rewards/margins": 2.3969602584838867, "rewards/rejected": -2.2767186164855957, "step": 236 }, { "epoch": 0.05, "learning_rate": 1.908823529411765e-05, "logits/chosen": -2.2615199089050293, "logits/rejected": -2.3321115970611572, "logps/chosen": -391.67791748046875, "logps/rejected": -341.98870849609375, "loss": 0.4762, "rewards/accuracies": 0.875, "rewards/chosen": 0.07293607294559479, "rewards/margins": 2.3192949295043945, "rewards/rejected": -2.246358871459961, "step": 237 }, { "epoch": 0.05, "learning_rate": 1.908403361344538e-05, "logits/chosen": -2.0217580795288086, "logits/rejected": -2.2776317596435547, "logps/chosen": -242.20521545410156, "logps/rejected": -317.45501708984375, "loss": 0.5189, "rewards/accuracies": 0.75, "rewards/chosen": 0.13696017861366272, "rewards/margins": 1.7944462299346924, "rewards/rejected": -1.6574859619140625, "step": 238 }, { "epoch": 0.05, "learning_rate": 1.9079831932773113e-05, "logits/chosen": -1.7655162811279297, "logits/rejected": -1.946895956993103, "logps/chosen": -250.6832733154297, "logps/rejected": -407.4606628417969, "loss": 0.3513, "rewards/accuracies": 0.75, "rewards/chosen": -0.013535760343074799, "rewards/margins": 3.0488224029541016, "rewards/rejected": -3.0623579025268555, "step": 239 }, { "epoch": 0.05, "learning_rate": 1.9075630252100844e-05, "logits/chosen": -2.2607085704803467, "logits/rejected": -2.2937917709350586, "logps/chosen": -302.1427001953125, "logps/rejected": -371.2340087890625, "loss": 0.2655, "rewards/accuracies": 0.875, "rewards/chosen": 0.45845815539360046, "rewards/margins": 2.677032232284546, "rewards/rejected": -2.218574047088623, "step": 240 }, { "epoch": 0.05, "learning_rate": 1.9071428571428574e-05, "logits/chosen": -2.1637423038482666, "logits/rejected": -1.8416534662246704, "logps/chosen": -308.8120422363281, "logps/rejected": -242.09458923339844, "loss": 0.3804, "rewards/accuracies": 0.8125, "rewards/chosen": -0.37554559111595154, "rewards/margins": 1.8725324869155884, "rewards/rejected": -2.2480781078338623, "step": 241 }, { "epoch": 0.05, "learning_rate": 1.9067226890756304e-05, "logits/chosen": -2.151689052581787, "logits/rejected": -2.1586365699768066, "logps/chosen": -376.4841003417969, "logps/rejected": -369.2525634765625, "loss": 0.2161, "rewards/accuracies": 0.875, "rewards/chosen": -0.03918618708848953, "rewards/margins": 3.1665728092193604, "rewards/rejected": -3.205759048461914, "step": 242 }, { "epoch": 0.05, "learning_rate": 1.9063025210084038e-05, "logits/chosen": -1.8786776065826416, "logits/rejected": -1.9437311887741089, "logps/chosen": -249.69354248046875, "logps/rejected": -200.59603881835938, "loss": 0.4053, "rewards/accuracies": 0.875, "rewards/chosen": -0.7313644289970398, "rewards/margins": 1.9353132247924805, "rewards/rejected": -2.666677474975586, "step": 243 }, { "epoch": 0.05, "learning_rate": 1.9058823529411764e-05, "logits/chosen": -2.0630462169647217, "logits/rejected": -1.9633992910385132, "logps/chosen": -329.28411865234375, "logps/rejected": -286.7142639160156, "loss": 0.5749, "rewards/accuracies": 0.75, "rewards/chosen": -0.43454915285110474, "rewards/margins": 1.681921362876892, "rewards/rejected": -2.1164703369140625, "step": 244 }, { "epoch": 0.05, "learning_rate": 1.9054621848739495e-05, "logits/chosen": -2.1442770957946777, "logits/rejected": -1.7254831790924072, "logps/chosen": -375.1960754394531, "logps/rejected": -285.4134521484375, "loss": 0.3865, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5157605409622192, "rewards/margins": 2.7622878551483154, "rewards/rejected": -3.278048515319824, "step": 245 }, { "epoch": 0.05, "learning_rate": 1.9050420168067228e-05, "logits/chosen": -1.7018543481826782, "logits/rejected": -2.0866665840148926, "logps/chosen": -331.94183349609375, "logps/rejected": -319.7412109375, "loss": 0.4157, "rewards/accuracies": 0.75, "rewards/chosen": -0.6551017761230469, "rewards/margins": 2.117560625076294, "rewards/rejected": -2.7726621627807617, "step": 246 }, { "epoch": 0.05, "learning_rate": 1.904621848739496e-05, "logits/chosen": -2.163576126098633, "logits/rejected": -1.554231882095337, "logps/chosen": -452.3487548828125, "logps/rejected": -313.6382751464844, "loss": 0.4994, "rewards/accuracies": 0.75, "rewards/chosen": -0.30681225657463074, "rewards/margins": 2.259860038757324, "rewards/rejected": -2.5666723251342773, "step": 247 }, { "epoch": 0.05, "learning_rate": 1.904201680672269e-05, "logits/chosen": -2.1186232566833496, "logits/rejected": -2.1142830848693848, "logps/chosen": -409.2235107421875, "logps/rejected": -347.56829833984375, "loss": 0.2788, "rewards/accuracies": 1.0, "rewards/chosen": -0.06957337260246277, "rewards/margins": 1.7370926141738892, "rewards/rejected": -1.8066661357879639, "step": 248 }, { "epoch": 0.05, "learning_rate": 1.9037815126050422e-05, "logits/chosen": -2.2889556884765625, "logits/rejected": -1.9796427488327026, "logps/chosen": -308.3511962890625, "logps/rejected": -284.92462158203125, "loss": 0.2701, "rewards/accuracies": 0.9375, "rewards/chosen": 0.16298097372055054, "rewards/margins": 2.3532967567443848, "rewards/rejected": -2.1903157234191895, "step": 249 }, { "epoch": 0.05, "learning_rate": 1.9033613445378152e-05, "logits/chosen": -1.8164410591125488, "logits/rejected": -1.902475357055664, "logps/chosen": -380.65618896484375, "logps/rejected": -288.0320739746094, "loss": 0.3634, "rewards/accuracies": 0.75, "rewards/chosen": -0.5005645155906677, "rewards/margins": 2.498218059539795, "rewards/rejected": -2.9987828731536865, "step": 250 }, { "epoch": 0.05, "learning_rate": 1.9029411764705883e-05, "logits/chosen": -2.247981071472168, "logits/rejected": -1.8208394050598145, "logps/chosen": -311.01983642578125, "logps/rejected": -251.33236694335938, "loss": 0.3057, "rewards/accuracies": 0.8125, "rewards/chosen": -0.45743709802627563, "rewards/margins": 2.6454529762268066, "rewards/rejected": -3.1028902530670166, "step": 251 }, { "epoch": 0.05, "learning_rate": 1.9025210084033613e-05, "logits/chosen": -2.20320463180542, "logits/rejected": -2.198298454284668, "logps/chosen": -422.0494384765625, "logps/rejected": -486.51641845703125, "loss": 0.2731, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4743783473968506, "rewards/margins": 2.0090861320495605, "rewards/rejected": -2.483464241027832, "step": 252 }, { "epoch": 0.05, "learning_rate": 1.9021008403361347e-05, "logits/chosen": -2.2514636516571045, "logits/rejected": -1.893636703491211, "logps/chosen": -314.3566589355469, "logps/rejected": -224.54420471191406, "loss": 0.3205, "rewards/accuracies": 0.875, "rewards/chosen": 0.48091644048690796, "rewards/margins": 2.4646921157836914, "rewards/rejected": -1.9837758541107178, "step": 253 }, { "epoch": 0.05, "learning_rate": 1.9016806722689077e-05, "logits/chosen": -2.4021365642547607, "logits/rejected": -2.1228184700012207, "logps/chosen": -343.2098388671875, "logps/rejected": -316.01507568359375, "loss": 0.3059, "rewards/accuracies": 0.75, "rewards/chosen": -0.37482625246047974, "rewards/margins": 2.382723569869995, "rewards/rejected": -2.757550001144409, "step": 254 }, { "epoch": 0.05, "learning_rate": 1.9012605042016807e-05, "logits/chosen": -2.2218337059020996, "logits/rejected": -2.286167860031128, "logps/chosen": -253.73788452148438, "logps/rejected": -309.34893798828125, "loss": 0.2949, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7315078973770142, "rewards/margins": 3.1146249771118164, "rewards/rejected": -3.846132755279541, "step": 255 }, { "epoch": 0.05, "learning_rate": 1.9008403361344537e-05, "logits/chosen": -2.080099582672119, "logits/rejected": -1.8542243242263794, "logps/chosen": -402.4940185546875, "logps/rejected": -320.7508850097656, "loss": 0.2761, "rewards/accuracies": 0.9375, "rewards/chosen": -0.09946957230567932, "rewards/margins": 2.6404869556427, "rewards/rejected": -2.7399566173553467, "step": 256 }, { "epoch": 0.05, "learning_rate": 1.900420168067227e-05, "logits/chosen": -1.9595483541488647, "logits/rejected": -1.8734363317489624, "logps/chosen": -272.85577392578125, "logps/rejected": -249.11328125, "loss": 2.2891, "rewards/accuracies": 0.5625, "rewards/chosen": -0.9585080742835999, "rewards/margins": 0.6084715723991394, "rewards/rejected": -1.5669796466827393, "step": 257 }, { "epoch": 0.05, "learning_rate": 1.9e-05, "logits/chosen": -1.998580813407898, "logits/rejected": -2.021963596343994, "logps/chosen": -220.8612060546875, "logps/rejected": -326.28802490234375, "loss": 0.7624, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8799558877944946, "rewards/margins": 2.375614881515503, "rewards/rejected": -3.255570888519287, "step": 258 }, { "epoch": 0.05, "learning_rate": 1.899579831932773e-05, "logits/chosen": -2.2134571075439453, "logits/rejected": -2.075129270553589, "logps/chosen": -236.2484130859375, "logps/rejected": -276.21527099609375, "loss": 0.1764, "rewards/accuracies": 0.9375, "rewards/chosen": 0.06507375091314316, "rewards/margins": 3.29520320892334, "rewards/rejected": -3.2301292419433594, "step": 259 }, { "epoch": 0.05, "learning_rate": 1.899159663865546e-05, "logits/chosen": -2.4684386253356934, "logits/rejected": -2.0644631385803223, "logps/chosen": -332.96466064453125, "logps/rejected": -267.83953857421875, "loss": 0.2236, "rewards/accuracies": 0.875, "rewards/chosen": 0.35851046442985535, "rewards/margins": 3.5825247764587402, "rewards/rejected": -3.2240142822265625, "step": 260 }, { "epoch": 0.05, "learning_rate": 1.8987394957983195e-05, "logits/chosen": -2.151036262512207, "logits/rejected": -2.0170624256134033, "logps/chosen": -303.647216796875, "logps/rejected": -343.37701416015625, "loss": 0.7025, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6481125950813293, "rewards/margins": 1.8401405811309814, "rewards/rejected": -2.488253116607666, "step": 261 }, { "epoch": 0.05, "learning_rate": 1.8983193277310925e-05, "logits/chosen": -1.9874547719955444, "logits/rejected": -1.628706693649292, "logps/chosen": -212.7614288330078, "logps/rejected": -187.1929473876953, "loss": 0.553, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8250977993011475, "rewards/margins": 1.5300283432006836, "rewards/rejected": -2.355126142501831, "step": 262 }, { "epoch": 0.06, "learning_rate": 1.8978991596638656e-05, "logits/chosen": -1.567716360092163, "logits/rejected": -1.9890470504760742, "logps/chosen": -278.9992370605469, "logps/rejected": -370.65118408203125, "loss": 0.3707, "rewards/accuracies": 0.6875, "rewards/chosen": -0.41911080479621887, "rewards/margins": 1.344163179397583, "rewards/rejected": -1.763274073600769, "step": 263 }, { "epoch": 0.06, "learning_rate": 1.8974789915966386e-05, "logits/chosen": -2.0037648677825928, "logits/rejected": -1.9156556129455566, "logps/chosen": -332.451171875, "logps/rejected": -446.02532958984375, "loss": 0.2825, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6619440913200378, "rewards/margins": 2.5153517723083496, "rewards/rejected": -3.177295684814453, "step": 264 }, { "epoch": 0.06, "learning_rate": 1.897058823529412e-05, "logits/chosen": -2.224169969558716, "logits/rejected": -1.6302118301391602, "logps/chosen": -336.26580810546875, "logps/rejected": -236.43392944335938, "loss": 0.4961, "rewards/accuracies": 0.75, "rewards/chosen": -0.29110512137413025, "rewards/margins": 2.6822776794433594, "rewards/rejected": -2.9733829498291016, "step": 265 }, { "epoch": 0.06, "learning_rate": 1.896638655462185e-05, "logits/chosen": -2.2972166538238525, "logits/rejected": -1.8663579225540161, "logps/chosen": -349.3228759765625, "logps/rejected": -323.1174621582031, "loss": 0.3396, "rewards/accuracies": 0.75, "rewards/chosen": -0.28448155522346497, "rewards/margins": 3.315586566925049, "rewards/rejected": -3.6000683307647705, "step": 266 }, { "epoch": 0.06, "learning_rate": 1.896218487394958e-05, "logits/chosen": -2.286379814147949, "logits/rejected": -1.89598548412323, "logps/chosen": -326.5625305175781, "logps/rejected": -281.0647277832031, "loss": 0.3079, "rewards/accuracies": 0.875, "rewards/chosen": -0.38871973752975464, "rewards/margins": 2.892576217651367, "rewards/rejected": -3.2812960147857666, "step": 267 }, { "epoch": 0.06, "learning_rate": 1.8957983193277313e-05, "logits/chosen": -2.266174077987671, "logits/rejected": -2.196779727935791, "logps/chosen": -225.83517456054688, "logps/rejected": -273.6953430175781, "loss": 0.8555, "rewards/accuracies": 0.6875, "rewards/chosen": -1.102109670639038, "rewards/margins": 3.273979425430298, "rewards/rejected": -4.376089096069336, "step": 268 }, { "epoch": 0.06, "learning_rate": 1.8953781512605044e-05, "logits/chosen": -1.931058645248413, "logits/rejected": -1.9792654514312744, "logps/chosen": -477.3355712890625, "logps/rejected": -296.3453674316406, "loss": 0.2875, "rewards/accuracies": 0.875, "rewards/chosen": -0.0060585737228393555, "rewards/margins": 2.8947529792785645, "rewards/rejected": -2.9008116722106934, "step": 269 }, { "epoch": 0.06, "learning_rate": 1.8949579831932774e-05, "logits/chosen": -2.336129665374756, "logits/rejected": -2.0650296211242676, "logps/chosen": -279.13299560546875, "logps/rejected": -314.7516174316406, "loss": 0.3795, "rewards/accuracies": 0.8125, "rewards/chosen": -0.09615585207939148, "rewards/margins": 3.437487840652466, "rewards/rejected": -3.5336437225341797, "step": 270 }, { "epoch": 0.06, "learning_rate": 1.8945378151260504e-05, "logits/chosen": -2.0036585330963135, "logits/rejected": -1.9419636726379395, "logps/chosen": -419.7447509765625, "logps/rejected": -381.2106018066406, "loss": 0.3541, "rewards/accuracies": 0.875, "rewards/chosen": -0.19862446188926697, "rewards/margins": 2.218693494796753, "rewards/rejected": -2.4173178672790527, "step": 271 }, { "epoch": 0.06, "learning_rate": 1.8941176470588238e-05, "logits/chosen": -2.2175207138061523, "logits/rejected": -1.7529559135437012, "logps/chosen": -252.12490844726562, "logps/rejected": -297.6517028808594, "loss": 0.1474, "rewards/accuracies": 0.875, "rewards/chosen": -0.5155524015426636, "rewards/margins": 4.058847904205322, "rewards/rejected": -4.574400424957275, "step": 272 }, { "epoch": 0.06, "learning_rate": 1.8936974789915968e-05, "logits/chosen": -2.0892577171325684, "logits/rejected": -1.3442286252975464, "logps/chosen": -254.39706420898438, "logps/rejected": -274.16485595703125, "loss": 0.2011, "rewards/accuracies": 0.875, "rewards/chosen": -0.5458801984786987, "rewards/margins": 3.198228597640991, "rewards/rejected": -3.7441086769104004, "step": 273 }, { "epoch": 0.06, "learning_rate": 1.8932773109243698e-05, "logits/chosen": -2.259138345718384, "logits/rejected": -2.2775609493255615, "logps/chosen": -521.3603515625, "logps/rejected": -478.9537353515625, "loss": 0.1142, "rewards/accuracies": 1.0, "rewards/chosen": -0.2233598828315735, "rewards/margins": 3.9802184104919434, "rewards/rejected": -4.203578948974609, "step": 274 }, { "epoch": 0.06, "learning_rate": 1.892857142857143e-05, "logits/chosen": -2.0820884704589844, "logits/rejected": -1.5601340532302856, "logps/chosen": -327.8568115234375, "logps/rejected": -256.6611022949219, "loss": 0.4629, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4417157769203186, "rewards/margins": 2.0913429260253906, "rewards/rejected": -2.5330586433410645, "step": 275 }, { "epoch": 0.06, "learning_rate": 1.8924369747899162e-05, "logits/chosen": -2.283698320388794, "logits/rejected": -2.0509748458862305, "logps/chosen": -211.555419921875, "logps/rejected": -264.49810791015625, "loss": 0.2272, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3787537217140198, "rewards/margins": 3.1879568099975586, "rewards/rejected": -3.5667107105255127, "step": 276 }, { "epoch": 0.06, "learning_rate": 1.8920168067226892e-05, "logits/chosen": -2.255230188369751, "logits/rejected": -2.3941588401794434, "logps/chosen": -323.314697265625, "logps/rejected": -339.1155090332031, "loss": 0.197, "rewards/accuracies": 1.0, "rewards/chosen": 0.19374454021453857, "rewards/margins": 2.264287233352661, "rewards/rejected": -2.070542812347412, "step": 277 }, { "epoch": 0.06, "learning_rate": 1.8915966386554622e-05, "logits/chosen": -2.122606039047241, "logits/rejected": -1.7213150262832642, "logps/chosen": -305.01373291015625, "logps/rejected": -372.7522277832031, "loss": 0.4153, "rewards/accuracies": 0.875, "rewards/chosen": -0.014068517833948135, "rewards/margins": 3.018646717071533, "rewards/rejected": -3.032715320587158, "step": 278 }, { "epoch": 0.06, "learning_rate": 1.8911764705882353e-05, "logits/chosen": -1.9908778667449951, "logits/rejected": -1.467978596687317, "logps/chosen": -272.57501220703125, "logps/rejected": -250.95005798339844, "loss": 0.4133, "rewards/accuracies": 0.8125, "rewards/chosen": -0.15837684273719788, "rewards/margins": 2.1676838397979736, "rewards/rejected": -2.3260607719421387, "step": 279 }, { "epoch": 0.06, "learning_rate": 1.8907563025210086e-05, "logits/chosen": -2.3047399520874023, "logits/rejected": -2.1433892250061035, "logps/chosen": -333.09716796875, "logps/rejected": -371.75286865234375, "loss": 0.2282, "rewards/accuracies": 0.875, "rewards/chosen": -0.47221025824546814, "rewards/margins": 3.88632869720459, "rewards/rejected": -4.358538627624512, "step": 280 }, { "epoch": 0.06, "learning_rate": 1.8903361344537816e-05, "logits/chosen": -2.0384902954101562, "logits/rejected": -1.8693532943725586, "logps/chosen": -351.90643310546875, "logps/rejected": -319.672607421875, "loss": 0.4755, "rewards/accuracies": 0.75, "rewards/chosen": -0.5231740474700928, "rewards/margins": 2.775447368621826, "rewards/rejected": -3.298621654510498, "step": 281 }, { "epoch": 0.06, "learning_rate": 1.8899159663865547e-05, "logits/chosen": -1.7592918872833252, "logits/rejected": -1.6648476123809814, "logps/chosen": -246.908935546875, "logps/rejected": -258.38177490234375, "loss": 0.2218, "rewards/accuracies": 0.9375, "rewards/chosen": -0.705916166305542, "rewards/margins": 3.2774713039398193, "rewards/rejected": -3.9833874702453613, "step": 282 }, { "epoch": 0.06, "learning_rate": 1.8894957983193277e-05, "logits/chosen": -2.456876039505005, "logits/rejected": -1.9175190925598145, "logps/chosen": -382.9876708984375, "logps/rejected": -289.6297607421875, "loss": 0.334, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8396276235580444, "rewards/margins": 2.283569574356079, "rewards/rejected": -3.123197317123413, "step": 283 }, { "epoch": 0.06, "learning_rate": 1.889075630252101e-05, "logits/chosen": -1.8148776292800903, "logits/rejected": -1.9756126403808594, "logps/chosen": -227.39370727539062, "logps/rejected": -298.6007995605469, "loss": 0.2814, "rewards/accuracies": 0.875, "rewards/chosen": -0.2220604121685028, "rewards/margins": 3.041797161102295, "rewards/rejected": -3.263857364654541, "step": 284 }, { "epoch": 0.06, "learning_rate": 1.888655462184874e-05, "logits/chosen": -2.056464195251465, "logits/rejected": -2.0410242080688477, "logps/chosen": -232.30111694335938, "logps/rejected": -307.7833251953125, "loss": 0.3806, "rewards/accuracies": 0.75, "rewards/chosen": -0.5008337497711182, "rewards/margins": 2.573366165161133, "rewards/rejected": -3.074199676513672, "step": 285 }, { "epoch": 0.06, "learning_rate": 1.888235294117647e-05, "logits/chosen": -2.2546117305755615, "logits/rejected": -2.1923813819885254, "logps/chosen": -347.1071472167969, "logps/rejected": -352.00140380859375, "loss": 0.3022, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2465924471616745, "rewards/margins": 3.0192251205444336, "rewards/rejected": -3.265817403793335, "step": 286 }, { "epoch": 0.06, "learning_rate": 1.88781512605042e-05, "logits/chosen": -2.086280584335327, "logits/rejected": -1.8711687326431274, "logps/chosen": -246.7713623046875, "logps/rejected": -322.1441650390625, "loss": 0.3882, "rewards/accuracies": 0.75, "rewards/chosen": -0.22017957270145416, "rewards/margins": 2.388051986694336, "rewards/rejected": -2.608231544494629, "step": 287 }, { "epoch": 0.06, "learning_rate": 1.8873949579831935e-05, "logits/chosen": -2.0063281059265137, "logits/rejected": -2.295560598373413, "logps/chosen": -278.6932067871094, "logps/rejected": -262.2654724121094, "loss": 0.2467, "rewards/accuracies": 0.875, "rewards/chosen": -0.07579703629016876, "rewards/margins": 3.047999143600464, "rewards/rejected": -3.1237964630126953, "step": 288 }, { "epoch": 0.06, "learning_rate": 1.8869747899159665e-05, "logits/chosen": -2.074051856994629, "logits/rejected": -2.1156082153320312, "logps/chosen": -220.39712524414062, "logps/rejected": -272.962890625, "loss": 0.2378, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6701609492301941, "rewards/margins": 2.572181224822998, "rewards/rejected": -3.242342472076416, "step": 289 }, { "epoch": 0.06, "learning_rate": 1.8865546218487395e-05, "logits/chosen": -1.7695175409317017, "logits/rejected": -1.9864470958709717, "logps/chosen": -281.2634582519531, "logps/rejected": -314.4126281738281, "loss": 0.6327, "rewards/accuracies": 0.6875, "rewards/chosen": -0.7106997966766357, "rewards/margins": 3.1313889026641846, "rewards/rejected": -3.842088222503662, "step": 290 }, { "epoch": 0.06, "learning_rate": 1.886134453781513e-05, "logits/chosen": -1.754223108291626, "logits/rejected": -2.0436923503875732, "logps/chosen": -244.408447265625, "logps/rejected": -292.4617919921875, "loss": 0.5628, "rewards/accuracies": 0.8125, "rewards/chosen": -1.268982172012329, "rewards/margins": 1.5875968933105469, "rewards/rejected": -2.856578826904297, "step": 291 }, { "epoch": 0.06, "learning_rate": 1.885714285714286e-05, "logits/chosen": -1.9902204275131226, "logits/rejected": -1.825770378112793, "logps/chosen": -278.1590270996094, "logps/rejected": -252.11109924316406, "loss": 0.3886, "rewards/accuracies": 0.75, "rewards/chosen": -0.9952681660652161, "rewards/margins": 2.203479051589966, "rewards/rejected": -3.198747158050537, "step": 292 }, { "epoch": 0.06, "learning_rate": 1.885294117647059e-05, "logits/chosen": -1.912260890007019, "logits/rejected": -2.068472146987915, "logps/chosen": -253.81625366210938, "logps/rejected": -272.1974182128906, "loss": 0.4716, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9447513818740845, "rewards/margins": 1.7583000659942627, "rewards/rejected": -2.7030515670776367, "step": 293 }, { "epoch": 0.06, "learning_rate": 1.884873949579832e-05, "logits/chosen": -2.188776969909668, "logits/rejected": -1.8033409118652344, "logps/chosen": -220.3309326171875, "logps/rejected": -269.0039978027344, "loss": 0.2543, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8321278095245361, "rewards/margins": 2.7947778701782227, "rewards/rejected": -3.626905679702759, "step": 294 }, { "epoch": 0.06, "learning_rate": 1.8844537815126053e-05, "logits/chosen": -2.291764736175537, "logits/rejected": -2.0348153114318848, "logps/chosen": -320.97943115234375, "logps/rejected": -295.7033386230469, "loss": 0.5771, "rewards/accuracies": 0.75, "rewards/chosen": -0.5811628699302673, "rewards/margins": 3.624668598175049, "rewards/rejected": -4.205831527709961, "step": 295 }, { "epoch": 0.06, "learning_rate": 1.8840336134453783e-05, "logits/chosen": -2.5415329933166504, "logits/rejected": -1.9511866569519043, "logps/chosen": -417.8042297363281, "logps/rejected": -382.7701416015625, "loss": 0.7825, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8851407766342163, "rewards/margins": 2.9997334480285645, "rewards/rejected": -3.8848745822906494, "step": 296 }, { "epoch": 0.06, "learning_rate": 1.8836134453781514e-05, "logits/chosen": -2.2607715129852295, "logits/rejected": -1.6191909313201904, "logps/chosen": -326.38037109375, "logps/rejected": -314.59716796875, "loss": 0.3887, "rewards/accuracies": 0.8125, "rewards/chosen": -0.36671337485313416, "rewards/margins": 3.2380211353302, "rewards/rejected": -3.6047346591949463, "step": 297 }, { "epoch": 0.06, "learning_rate": 1.8831932773109244e-05, "logits/chosen": -2.2181785106658936, "logits/rejected": -2.300800085067749, "logps/chosen": -255.30764770507812, "logps/rejected": -309.82220458984375, "loss": 0.5814, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5556820631027222, "rewards/margins": 2.1392624378204346, "rewards/rejected": -2.694944381713867, "step": 298 }, { "epoch": 0.06, "learning_rate": 1.8827731092436977e-05, "logits/chosen": -2.268632411956787, "logits/rejected": -1.9622522592544556, "logps/chosen": -249.33746337890625, "logps/rejected": -247.32687377929688, "loss": 0.326, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3187025785446167, "rewards/margins": 2.138777017593384, "rewards/rejected": -2.457479476928711, "step": 299 }, { "epoch": 0.06, "learning_rate": 1.8823529411764708e-05, "logits/chosen": -2.109956741333008, "logits/rejected": -1.9494073390960693, "logps/chosen": -323.4368591308594, "logps/rejected": -397.69049072265625, "loss": 0.3467, "rewards/accuracies": 0.75, "rewards/chosen": -0.34082433581352234, "rewards/margins": 2.7976865768432617, "rewards/rejected": -3.1385107040405273, "step": 300 }, { "epoch": 0.06, "learning_rate": 1.8819327731092438e-05, "logits/chosen": -2.0344057083129883, "logits/rejected": -2.1402249336242676, "logps/chosen": -217.72195434570312, "logps/rejected": -290.2239990234375, "loss": 0.4264, "rewards/accuracies": 0.625, "rewards/chosen": -0.4302474856376648, "rewards/margins": 2.074941873550415, "rewards/rejected": -2.5051894187927246, "step": 301 }, { "epoch": 0.06, "learning_rate": 1.8815126050420168e-05, "logits/chosen": -2.4479141235351562, "logits/rejected": -1.9408173561096191, "logps/chosen": -354.33929443359375, "logps/rejected": -286.9940185546875, "loss": 0.3129, "rewards/accuracies": 0.875, "rewards/chosen": 0.2552834153175354, "rewards/margins": 2.541254758834839, "rewards/rejected": -2.285971164703369, "step": 302 }, { "epoch": 0.06, "learning_rate": 1.88109243697479e-05, "logits/chosen": -2.211653709411621, "logits/rejected": -1.9729622602462769, "logps/chosen": -327.70196533203125, "logps/rejected": -297.71484375, "loss": 0.1917, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12104669958353043, "rewards/margins": 3.1653082370758057, "rewards/rejected": -3.2863550186157227, "step": 303 }, { "epoch": 0.06, "learning_rate": 1.8806722689075632e-05, "logits/chosen": -2.3373312950134277, "logits/rejected": -2.1898093223571777, "logps/chosen": -392.64312744140625, "logps/rejected": -428.7371826171875, "loss": 0.2219, "rewards/accuracies": 0.9375, "rewards/chosen": -0.31619569659233093, "rewards/margins": 2.3710060119628906, "rewards/rejected": -2.687201976776123, "step": 304 }, { "epoch": 0.06, "learning_rate": 1.8802521008403362e-05, "logits/chosen": -2.27375864982605, "logits/rejected": -2.2790725231170654, "logps/chosen": -332.09051513671875, "logps/rejected": -304.3880615234375, "loss": 0.4099, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4454871118068695, "rewards/margins": 2.11026668548584, "rewards/rejected": -2.555753707885742, "step": 305 }, { "epoch": 0.06, "learning_rate": 1.8798319327731092e-05, "logits/chosen": -1.9241347312927246, "logits/rejected": -1.7147314548492432, "logps/chosen": -312.8620300292969, "logps/rejected": -332.3287353515625, "loss": 0.3528, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6963294148445129, "rewards/margins": 3.4313244819641113, "rewards/rejected": -4.127654075622559, "step": 306 }, { "epoch": 0.06, "learning_rate": 1.8794117647058826e-05, "logits/chosen": -2.44700288772583, "logits/rejected": -2.201770782470703, "logps/chosen": -333.5986328125, "logps/rejected": -257.613037109375, "loss": 0.2882, "rewards/accuracies": 0.875, "rewards/chosen": 0.3046763241291046, "rewards/margins": 1.9939825534820557, "rewards/rejected": -1.6893062591552734, "step": 307 }, { "epoch": 0.06, "learning_rate": 1.8789915966386556e-05, "logits/chosen": -1.9482791423797607, "logits/rejected": -2.108036994934082, "logps/chosen": -239.434326171875, "logps/rejected": -284.23931884765625, "loss": 0.1588, "rewards/accuracies": 0.9375, "rewards/chosen": 0.07599126547574997, "rewards/margins": 3.667056083679199, "rewards/rejected": -3.591064453125, "step": 308 }, { "epoch": 0.06, "learning_rate": 1.8785714285714286e-05, "logits/chosen": -2.2287769317626953, "logits/rejected": -2.2077245712280273, "logps/chosen": -269.6983642578125, "logps/rejected": -325.807861328125, "loss": 0.4458, "rewards/accuracies": 0.875, "rewards/chosen": -0.7661252617835999, "rewards/margins": 2.473928928375244, "rewards/rejected": -3.2400546073913574, "step": 309 }, { "epoch": 0.06, "learning_rate": 1.8781512605042017e-05, "logits/chosen": -2.2159597873687744, "logits/rejected": -1.8691554069519043, "logps/chosen": -339.8736572265625, "logps/rejected": -294.8172302246094, "loss": 0.3963, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9596534967422485, "rewards/margins": 1.9829761981964111, "rewards/rejected": -2.942629814147949, "step": 310 }, { "epoch": 0.07, "learning_rate": 1.877731092436975e-05, "logits/chosen": -1.7915797233581543, "logits/rejected": -1.4641615152359009, "logps/chosen": -329.60443115234375, "logps/rejected": -242.4039764404297, "loss": 0.3085, "rewards/accuracies": 0.8125, "rewards/chosen": -0.020845927298069, "rewards/margins": 2.6320667266845703, "rewards/rejected": -2.6529126167297363, "step": 311 }, { "epoch": 0.07, "learning_rate": 1.877310924369748e-05, "logits/chosen": -2.1414425373077393, "logits/rejected": -2.300830364227295, "logps/chosen": -269.4566650390625, "logps/rejected": -341.6777038574219, "loss": 0.4163, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6433926820755005, "rewards/margins": 2.5839271545410156, "rewards/rejected": -3.2273197174072266, "step": 312 }, { "epoch": 0.07, "learning_rate": 1.876890756302521e-05, "logits/chosen": -1.96380615234375, "logits/rejected": -1.8046247959136963, "logps/chosen": -348.15655517578125, "logps/rejected": -293.32647705078125, "loss": 0.3273, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0788097381591797, "rewards/margins": 2.217039108276367, "rewards/rejected": -3.295849084854126, "step": 313 }, { "epoch": 0.07, "learning_rate": 1.8764705882352944e-05, "logits/chosen": -2.4041481018066406, "logits/rejected": -1.9352842569351196, "logps/chosen": -407.56939697265625, "logps/rejected": -310.6763000488281, "loss": 0.311, "rewards/accuracies": 0.75, "rewards/chosen": 0.20415282249450684, "rewards/margins": 3.2726519107818604, "rewards/rejected": -3.0684990882873535, "step": 314 }, { "epoch": 0.07, "learning_rate": 1.8760504201680674e-05, "logits/chosen": -2.2820425033569336, "logits/rejected": -2.1718294620513916, "logps/chosen": -331.9612731933594, "logps/rejected": -334.3269348144531, "loss": 0.2979, "rewards/accuracies": 0.9375, "rewards/chosen": -0.43394508957862854, "rewards/margins": 2.825974225997925, "rewards/rejected": -3.2599191665649414, "step": 315 }, { "epoch": 0.07, "learning_rate": 1.8756302521008405e-05, "logits/chosen": -2.4595770835876465, "logits/rejected": -2.195279598236084, "logps/chosen": -308.5310363769531, "logps/rejected": -235.79022216796875, "loss": 0.5167, "rewards/accuracies": 0.6875, "rewards/chosen": 0.16394472122192383, "rewards/margins": 1.5328145027160645, "rewards/rejected": -1.3688697814941406, "step": 316 }, { "epoch": 0.07, "learning_rate": 1.8752100840336135e-05, "logits/chosen": -1.913292646408081, "logits/rejected": -1.69960618019104, "logps/chosen": -340.7834777832031, "logps/rejected": -300.80084228515625, "loss": 0.3818, "rewards/accuracies": 0.625, "rewards/chosen": 0.1078362911939621, "rewards/margins": 3.6973748207092285, "rewards/rejected": -3.58953857421875, "step": 317 }, { "epoch": 0.07, "learning_rate": 1.874789915966387e-05, "logits/chosen": -2.0673024654388428, "logits/rejected": -1.9299508333206177, "logps/chosen": -373.1454772949219, "logps/rejected": -371.3177185058594, "loss": 0.2814, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4277085065841675, "rewards/margins": 3.9399666786193848, "rewards/rejected": -4.367674827575684, "step": 318 }, { "epoch": 0.07, "learning_rate": 1.87436974789916e-05, "logits/chosen": -2.0578806400299072, "logits/rejected": -2.324023485183716, "logps/chosen": -254.89859008789062, "logps/rejected": -359.4049072265625, "loss": 0.1694, "rewards/accuracies": 0.875, "rewards/chosen": 0.6514156460762024, "rewards/margins": 3.6118431091308594, "rewards/rejected": -2.9604272842407227, "step": 319 }, { "epoch": 0.07, "learning_rate": 1.873949579831933e-05, "logits/chosen": -1.7575109004974365, "logits/rejected": -1.914217472076416, "logps/chosen": -233.79579162597656, "logps/rejected": -274.8104553222656, "loss": 0.3963, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5326837301254272, "rewards/margins": 2.6236653327941895, "rewards/rejected": -3.1563491821289062, "step": 320 }, { "epoch": 0.07, "learning_rate": 1.873529411764706e-05, "logits/chosen": -1.8810131549835205, "logits/rejected": -2.008780002593994, "logps/chosen": -419.2375183105469, "logps/rejected": -430.22003173828125, "loss": 0.2468, "rewards/accuracies": 0.75, "rewards/chosen": -0.6155575513839722, "rewards/margins": 4.04728889465332, "rewards/rejected": -4.662846565246582, "step": 321 }, { "epoch": 0.07, "learning_rate": 1.8731092436974793e-05, "logits/chosen": -1.6235010623931885, "logits/rejected": -1.586295247077942, "logps/chosen": -266.53515625, "logps/rejected": -328.22259521484375, "loss": 0.3211, "rewards/accuracies": 0.875, "rewards/chosen": -0.9877183437347412, "rewards/margins": 3.058164358139038, "rewards/rejected": -4.045882225036621, "step": 322 }, { "epoch": 0.07, "learning_rate": 1.8726890756302523e-05, "logits/chosen": -2.0542685985565186, "logits/rejected": -2.024887800216675, "logps/chosen": -233.3220977783203, "logps/rejected": -302.41998291015625, "loss": 0.3254, "rewards/accuracies": 0.875, "rewards/chosen": -0.3559727370738983, "rewards/margins": 2.621556043624878, "rewards/rejected": -2.9775285720825195, "step": 323 }, { "epoch": 0.07, "learning_rate": 1.8722689075630253e-05, "logits/chosen": -2.036128520965576, "logits/rejected": -1.8352479934692383, "logps/chosen": -313.8388977050781, "logps/rejected": -397.1602783203125, "loss": 0.3042, "rewards/accuracies": 0.8125, "rewards/chosen": -0.001976490020751953, "rewards/margins": 2.834493637084961, "rewards/rejected": -2.836470127105713, "step": 324 }, { "epoch": 0.07, "learning_rate": 1.8718487394957983e-05, "logits/chosen": -2.0788910388946533, "logits/rejected": -1.8667393922805786, "logps/chosen": -268.1964111328125, "logps/rejected": -296.2843017578125, "loss": 0.6825, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8272196650505066, "rewards/margins": 1.1636391878128052, "rewards/rejected": -1.9908589124679565, "step": 325 }, { "epoch": 0.07, "learning_rate": 1.8714285714285717e-05, "logits/chosen": -2.3844246864318848, "logits/rejected": -1.6167030334472656, "logps/chosen": -407.9466552734375, "logps/rejected": -231.8106231689453, "loss": 0.3028, "rewards/accuracies": 0.8125, "rewards/chosen": 0.042908817529678345, "rewards/margins": 3.6082944869995117, "rewards/rejected": -3.565385341644287, "step": 326 }, { "epoch": 0.07, "learning_rate": 1.8710084033613447e-05, "logits/chosen": -2.2293574810028076, "logits/rejected": -1.669480800628662, "logps/chosen": -249.737060546875, "logps/rejected": -264.0361328125, "loss": 0.2597, "rewards/accuracies": 0.8125, "rewards/chosen": -0.14727619290351868, "rewards/margins": 2.817415952682495, "rewards/rejected": -2.9646918773651123, "step": 327 }, { "epoch": 0.07, "learning_rate": 1.8705882352941178e-05, "logits/chosen": -1.481929063796997, "logits/rejected": -1.774039626121521, "logps/chosen": -241.51197814941406, "logps/rejected": -283.94915771484375, "loss": 0.5201, "rewards/accuracies": 0.6875, "rewards/chosen": -1.1028923988342285, "rewards/margins": 2.2659027576446533, "rewards/rejected": -3.368795156478882, "step": 328 }, { "epoch": 0.07, "learning_rate": 1.8701680672268908e-05, "logits/chosen": -2.4668219089508057, "logits/rejected": -2.148750066757202, "logps/chosen": -373.35162353515625, "logps/rejected": -303.9354248046875, "loss": 0.2006, "rewards/accuracies": 0.9375, "rewards/chosen": 0.15432801842689514, "rewards/margins": 3.2531533241271973, "rewards/rejected": -3.098825454711914, "step": 329 }, { "epoch": 0.07, "learning_rate": 1.869747899159664e-05, "logits/chosen": -1.9888383150100708, "logits/rejected": -1.939383625984192, "logps/chosen": -262.57318115234375, "logps/rejected": -344.1221923828125, "loss": 0.5699, "rewards/accuracies": 0.75, "rewards/chosen": -0.4157131314277649, "rewards/margins": 2.8421761989593506, "rewards/rejected": -3.2578892707824707, "step": 330 }, { "epoch": 0.07, "learning_rate": 1.869327731092437e-05, "logits/chosen": -1.9494271278381348, "logits/rejected": -2.0966906547546387, "logps/chosen": -312.8277587890625, "logps/rejected": -350.18145751953125, "loss": 0.7105, "rewards/accuracies": 0.75, "rewards/chosen": -0.9800263047218323, "rewards/margins": 1.9737093448638916, "rewards/rejected": -2.953735828399658, "step": 331 }, { "epoch": 0.07, "learning_rate": 1.8689075630252102e-05, "logits/chosen": -2.0298752784729004, "logits/rejected": -1.875227928161621, "logps/chosen": -216.7428741455078, "logps/rejected": -203.33787536621094, "loss": 0.3845, "rewards/accuracies": 0.875, "rewards/chosen": -0.5260271430015564, "rewards/margins": 1.8222005367279053, "rewards/rejected": -2.3482275009155273, "step": 332 }, { "epoch": 0.07, "learning_rate": 1.8684873949579832e-05, "logits/chosen": -2.2422947883605957, "logits/rejected": -1.9197068214416504, "logps/chosen": -341.98828125, "logps/rejected": -315.65814208984375, "loss": 0.2355, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4491279423236847, "rewards/margins": 2.794490098953247, "rewards/rejected": -3.2436182498931885, "step": 333 }, { "epoch": 0.07, "learning_rate": 1.8680672268907566e-05, "logits/chosen": -2.3078951835632324, "logits/rejected": -1.8790884017944336, "logps/chosen": -396.8863830566406, "logps/rejected": -309.27490234375, "loss": 0.5343, "rewards/accuracies": 0.8125, "rewards/chosen": -0.007239677011966705, "rewards/margins": 2.369271993637085, "rewards/rejected": -2.376511573791504, "step": 334 }, { "epoch": 0.07, "learning_rate": 1.8676470588235296e-05, "logits/chosen": -2.066531181335449, "logits/rejected": -1.5957533121109009, "logps/chosen": -365.20709228515625, "logps/rejected": -291.71044921875, "loss": 0.3208, "rewards/accuracies": 0.875, "rewards/chosen": -1.17337965965271, "rewards/margins": 3.090036630630493, "rewards/rejected": -4.263416290283203, "step": 335 }, { "epoch": 0.07, "learning_rate": 1.8672268907563026e-05, "logits/chosen": -2.06984281539917, "logits/rejected": -2.126033306121826, "logps/chosen": -347.90960693359375, "logps/rejected": -309.54217529296875, "loss": 0.5627, "rewards/accuracies": 0.75, "rewards/chosen": -0.6166422963142395, "rewards/margins": 1.3381564617156982, "rewards/rejected": -1.9547988176345825, "step": 336 }, { "epoch": 0.07, "learning_rate": 1.866806722689076e-05, "logits/chosen": -1.8469455242156982, "logits/rejected": -1.8469024896621704, "logps/chosen": -352.01727294921875, "logps/rejected": -376.5155029296875, "loss": 0.6246, "rewards/accuracies": 0.6875, "rewards/chosen": -0.32721078395843506, "rewards/margins": 1.8597333431243896, "rewards/rejected": -2.186944007873535, "step": 337 }, { "epoch": 0.07, "learning_rate": 1.866386554621849e-05, "logits/chosen": -2.183018445968628, "logits/rejected": -1.9800773859024048, "logps/chosen": -341.5853576660156, "logps/rejected": -325.4683532714844, "loss": 0.3829, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0828176736831665, "rewards/margins": 1.801600456237793, "rewards/rejected": -2.884418249130249, "step": 338 }, { "epoch": 0.07, "learning_rate": 1.865966386554622e-05, "logits/chosen": -2.216343879699707, "logits/rejected": -1.7860909700393677, "logps/chosen": -324.175537109375, "logps/rejected": -295.8573913574219, "loss": 0.4519, "rewards/accuracies": 0.8125, "rewards/chosen": -0.10657677054405212, "rewards/margins": 1.616962194442749, "rewards/rejected": -1.723538875579834, "step": 339 }, { "epoch": 0.07, "learning_rate": 1.865546218487395e-05, "logits/chosen": -2.2852091789245605, "logits/rejected": -2.203788995742798, "logps/chosen": -263.1763610839844, "logps/rejected": -265.32757568359375, "loss": 0.2124, "rewards/accuracies": 0.8125, "rewards/chosen": 0.000583946704864502, "rewards/margins": 3.75418758392334, "rewards/rejected": -3.753603458404541, "step": 340 }, { "epoch": 0.07, "learning_rate": 1.8651260504201684e-05, "logits/chosen": -2.134643316268921, "logits/rejected": -1.8497560024261475, "logps/chosen": -256.9386291503906, "logps/rejected": -291.808349609375, "loss": 0.7313, "rewards/accuracies": 0.75, "rewards/chosen": -0.5955746173858643, "rewards/margins": 2.1250791549682617, "rewards/rejected": -2.720653772354126, "step": 341 }, { "epoch": 0.07, "learning_rate": 1.8647058823529414e-05, "logits/chosen": -2.129666805267334, "logits/rejected": -2.147286891937256, "logps/chosen": -297.31158447265625, "logps/rejected": -316.26409912109375, "loss": 0.4878, "rewards/accuracies": 0.8125, "rewards/chosen": -0.024975964799523354, "rewards/margins": 2.073550224304199, "rewards/rejected": -2.0985262393951416, "step": 342 }, { "epoch": 0.07, "learning_rate": 1.8642857142857144e-05, "logits/chosen": -2.2581629753112793, "logits/rejected": -2.1744561195373535, "logps/chosen": -371.3339538574219, "logps/rejected": -305.3000183105469, "loss": 0.406, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0708468109369278, "rewards/margins": 2.916726589202881, "rewards/rejected": -2.9875733852386475, "step": 343 }, { "epoch": 0.07, "learning_rate": 1.8638655462184875e-05, "logits/chosen": -1.987851619720459, "logits/rejected": -1.8836939334869385, "logps/chosen": -264.5604248046875, "logps/rejected": -257.2608337402344, "loss": 0.2367, "rewards/accuracies": 0.875, "rewards/chosen": -0.3610621690750122, "rewards/margins": 2.3745124340057373, "rewards/rejected": -2.735574722290039, "step": 344 }, { "epoch": 0.07, "learning_rate": 1.8634453781512608e-05, "logits/chosen": -1.753413438796997, "logits/rejected": -1.6945829391479492, "logps/chosen": -200.69012451171875, "logps/rejected": -283.2491149902344, "loss": 0.5487, "rewards/accuracies": 0.75, "rewards/chosen": -0.46275192499160767, "rewards/margins": 1.767363429069519, "rewards/rejected": -2.2301154136657715, "step": 345 }, { "epoch": 0.07, "learning_rate": 1.863025210084034e-05, "logits/chosen": -2.334855079650879, "logits/rejected": -2.088732957839966, "logps/chosen": -397.83343505859375, "logps/rejected": -338.78179931640625, "loss": 0.1275, "rewards/accuracies": 1.0, "rewards/chosen": 0.19868981838226318, "rewards/margins": 3.2203967571258545, "rewards/rejected": -3.0217068195343018, "step": 346 }, { "epoch": 0.07, "learning_rate": 1.862605042016807e-05, "logits/chosen": -2.3031864166259766, "logits/rejected": -1.9945582151412964, "logps/chosen": -385.1195068359375, "logps/rejected": -343.19866943359375, "loss": 0.5877, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5145111680030823, "rewards/margins": 2.5496606826782227, "rewards/rejected": -3.0641720294952393, "step": 347 }, { "epoch": 0.07, "learning_rate": 1.86218487394958e-05, "logits/chosen": -2.096303701400757, "logits/rejected": -2.223142623901367, "logps/chosen": -273.1221008300781, "logps/rejected": -258.70733642578125, "loss": 0.4627, "rewards/accuracies": 0.875, "rewards/chosen": -0.5140308737754822, "rewards/margins": 1.6974841356277466, "rewards/rejected": -2.211515188217163, "step": 348 }, { "epoch": 0.07, "learning_rate": 1.8617647058823533e-05, "logits/chosen": -1.9672343730926514, "logits/rejected": -2.240957498550415, "logps/chosen": -270.3798522949219, "logps/rejected": -333.7206726074219, "loss": 0.3858, "rewards/accuracies": 0.8125, "rewards/chosen": -0.17106646299362183, "rewards/margins": 2.6529464721679688, "rewards/rejected": -2.8240127563476562, "step": 349 }, { "epoch": 0.07, "learning_rate": 1.8613445378151263e-05, "logits/chosen": -2.130988359451294, "logits/rejected": -1.7660956382751465, "logps/chosen": -269.3900451660156, "logps/rejected": -189.3504638671875, "loss": 0.4339, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2162952721118927, "rewards/margins": 1.6339480876922607, "rewards/rejected": -1.850243330001831, "step": 350 }, { "epoch": 0.07, "learning_rate": 1.8609243697478993e-05, "logits/chosen": -2.2752649784088135, "logits/rejected": -1.899603247642517, "logps/chosen": -398.2548522949219, "logps/rejected": -360.26324462890625, "loss": 0.229, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7394152283668518, "rewards/margins": 2.6867666244506836, "rewards/rejected": -3.4261817932128906, "step": 351 }, { "epoch": 0.07, "learning_rate": 1.8605042016806723e-05, "logits/chosen": -2.5080926418304443, "logits/rejected": -2.2032487392425537, "logps/chosen": -452.4739685058594, "logps/rejected": -370.524658203125, "loss": 0.4107, "rewards/accuracies": 0.8125, "rewards/chosen": -0.21627482771873474, "rewards/margins": 2.1269168853759766, "rewards/rejected": -2.343191623687744, "step": 352 }, { "epoch": 0.07, "learning_rate": 1.8600840336134457e-05, "logits/chosen": -2.0756988525390625, "logits/rejected": -1.8713806867599487, "logps/chosen": -221.3297882080078, "logps/rejected": -252.8258056640625, "loss": 0.6293, "rewards/accuracies": 0.75, "rewards/chosen": -1.1484546661376953, "rewards/margins": 1.8259799480438232, "rewards/rejected": -2.9744346141815186, "step": 353 }, { "epoch": 0.07, "learning_rate": 1.8596638655462187e-05, "logits/chosen": -2.014289140701294, "logits/rejected": -1.9472618103027344, "logps/chosen": -299.89471435546875, "logps/rejected": -335.1524658203125, "loss": 0.494, "rewards/accuracies": 0.8125, "rewards/chosen": -0.978775143623352, "rewards/margins": 1.995316743850708, "rewards/rejected": -2.9740920066833496, "step": 354 }, { "epoch": 0.07, "learning_rate": 1.8592436974789917e-05, "logits/chosen": -1.694716453552246, "logits/rejected": -1.962080478668213, "logps/chosen": -222.0389404296875, "logps/rejected": -271.0969543457031, "loss": 0.4964, "rewards/accuracies": 0.75, "rewards/chosen": -0.03180364519357681, "rewards/margins": 2.024240016937256, "rewards/rejected": -2.0560436248779297, "step": 355 }, { "epoch": 0.07, "learning_rate": 1.8588235294117647e-05, "logits/chosen": -2.2815263271331787, "logits/rejected": -1.7262376546859741, "logps/chosen": -459.6976623535156, "logps/rejected": -336.3739013671875, "loss": 0.251, "rewards/accuracies": 0.875, "rewards/chosen": -0.8403612375259399, "rewards/margins": 2.8347840309143066, "rewards/rejected": -3.675145149230957, "step": 356 }, { "epoch": 0.07, "learning_rate": 1.858403361344538e-05, "logits/chosen": -1.8447967767715454, "logits/rejected": -1.6927156448364258, "logps/chosen": -329.1031494140625, "logps/rejected": -462.51922607421875, "loss": 0.3266, "rewards/accuracies": 0.875, "rewards/chosen": -0.17482687532901764, "rewards/margins": 4.396930694580078, "rewards/rejected": -4.5717573165893555, "step": 357 }, { "epoch": 0.07, "learning_rate": 1.857983193277311e-05, "logits/chosen": -2.336015224456787, "logits/rejected": -2.1152777671813965, "logps/chosen": -381.9425354003906, "logps/rejected": -426.5019836425781, "loss": 0.2882, "rewards/accuracies": 0.875, "rewards/chosen": -0.517792820930481, "rewards/margins": 1.9772413969039917, "rewards/rejected": -2.4950342178344727, "step": 358 }, { "epoch": 0.08, "learning_rate": 1.857563025210084e-05, "logits/chosen": -2.1697511672973633, "logits/rejected": -1.469695806503296, "logps/chosen": -297.51885986328125, "logps/rejected": -271.3316345214844, "loss": 0.4923, "rewards/accuracies": 0.75, "rewards/chosen": -0.37828758358955383, "rewards/margins": 2.0601744651794434, "rewards/rejected": -2.438462018966675, "step": 359 }, { "epoch": 0.08, "learning_rate": 1.8571428571428575e-05, "logits/chosen": -2.045991897583008, "logits/rejected": -1.8669451475143433, "logps/chosen": -364.60028076171875, "logps/rejected": -256.4580993652344, "loss": 0.2367, "rewards/accuracies": 0.875, "rewards/chosen": -0.22408923506736755, "rewards/margins": 2.64922833442688, "rewards/rejected": -2.8733177185058594, "step": 360 }, { "epoch": 0.08, "learning_rate": 1.8567226890756305e-05, "logits/chosen": -2.367703914642334, "logits/rejected": -1.7133262157440186, "logps/chosen": -325.2849426269531, "logps/rejected": -233.791259765625, "loss": 0.4509, "rewards/accuracies": 0.75, "rewards/chosen": -0.3806113600730896, "rewards/margins": 1.185697078704834, "rewards/rejected": -1.5663084983825684, "step": 361 }, { "epoch": 0.08, "learning_rate": 1.8563025210084036e-05, "logits/chosen": -1.9064981937408447, "logits/rejected": -1.80112886428833, "logps/chosen": -266.93218994140625, "logps/rejected": -295.7356872558594, "loss": 0.3796, "rewards/accuracies": 0.6875, "rewards/chosen": 0.09755747765302658, "rewards/margins": 2.2983222007751465, "rewards/rejected": -2.2007646560668945, "step": 362 }, { "epoch": 0.08, "learning_rate": 1.8558823529411766e-05, "logits/chosen": -2.099736213684082, "logits/rejected": -1.7851954698562622, "logps/chosen": -284.7494812011719, "logps/rejected": -312.81378173828125, "loss": 0.2357, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5242412686347961, "rewards/margins": 2.6215972900390625, "rewards/rejected": -3.1458382606506348, "step": 363 }, { "epoch": 0.08, "learning_rate": 1.85546218487395e-05, "logits/chosen": -2.456117630004883, "logits/rejected": -2.2640092372894287, "logps/chosen": -333.8551025390625, "logps/rejected": -326.3819580078125, "loss": 0.9617, "rewards/accuracies": 0.75, "rewards/chosen": -0.24694955348968506, "rewards/margins": 1.4985586404800415, "rewards/rejected": -1.7455081939697266, "step": 364 }, { "epoch": 0.08, "learning_rate": 1.855042016806723e-05, "logits/chosen": -1.9883825778961182, "logits/rejected": -1.817191481590271, "logps/chosen": -229.99081420898438, "logps/rejected": -208.41629028320312, "loss": 0.2795, "rewards/accuracies": 0.75, "rewards/chosen": -0.0852956771850586, "rewards/margins": 2.571378231048584, "rewards/rejected": -2.6566736698150635, "step": 365 }, { "epoch": 0.08, "learning_rate": 1.854621848739496e-05, "logits/chosen": -1.9803918600082397, "logits/rejected": -1.8723807334899902, "logps/chosen": -386.2872314453125, "logps/rejected": -357.6424255371094, "loss": 0.3321, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3730486333370209, "rewards/margins": 2.036879301071167, "rewards/rejected": -2.4099278450012207, "step": 366 }, { "epoch": 0.08, "learning_rate": 1.854201680672269e-05, "logits/chosen": -1.9376304149627686, "logits/rejected": -1.6873728036880493, "logps/chosen": -266.780029296875, "logps/rejected": -243.25033569335938, "loss": 0.1883, "rewards/accuracies": 0.9375, "rewards/chosen": 0.0850076824426651, "rewards/margins": 2.605006694793701, "rewards/rejected": -2.5199992656707764, "step": 367 }, { "epoch": 0.08, "learning_rate": 1.8537815126050424e-05, "logits/chosen": -2.0228683948516846, "logits/rejected": -2.041003942489624, "logps/chosen": -204.14617919921875, "logps/rejected": -217.356689453125, "loss": 0.4402, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6097668409347534, "rewards/margins": 1.8481462001800537, "rewards/rejected": -2.4579131603240967, "step": 368 }, { "epoch": 0.08, "learning_rate": 1.8533613445378154e-05, "logits/chosen": -2.023157835006714, "logits/rejected": -1.913124442100525, "logps/chosen": -273.25335693359375, "logps/rejected": -218.37193298339844, "loss": 0.2036, "rewards/accuracies": 0.875, "rewards/chosen": -0.01626184582710266, "rewards/margins": 3.1192469596862793, "rewards/rejected": -3.1355087757110596, "step": 369 }, { "epoch": 0.08, "learning_rate": 1.8529411764705884e-05, "logits/chosen": -2.0578420162200928, "logits/rejected": -2.2603094577789307, "logps/chosen": -323.858642578125, "logps/rejected": -400.9482421875, "loss": 0.1523, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2392684519290924, "rewards/margins": 3.3774380683898926, "rewards/rejected": -3.616706371307373, "step": 370 }, { "epoch": 0.08, "learning_rate": 1.8525210084033614e-05, "logits/chosen": -1.761908769607544, "logits/rejected": -1.8511838912963867, "logps/chosen": -310.01739501953125, "logps/rejected": -336.5382080078125, "loss": 0.2275, "rewards/accuracies": 0.875, "rewards/chosen": -0.773158073425293, "rewards/margins": 2.381699562072754, "rewards/rejected": -3.154857635498047, "step": 371 }, { "epoch": 0.08, "learning_rate": 1.8521008403361348e-05, "logits/chosen": -2.2980709075927734, "logits/rejected": -2.1128406524658203, "logps/chosen": -313.9911804199219, "logps/rejected": -297.58551025390625, "loss": 0.3405, "rewards/accuracies": 0.875, "rewards/chosen": 0.003889426589012146, "rewards/margins": 2.741454839706421, "rewards/rejected": -2.737565517425537, "step": 372 }, { "epoch": 0.08, "learning_rate": 1.8516806722689078e-05, "logits/chosen": -1.8783695697784424, "logits/rejected": -1.7485692501068115, "logps/chosen": -190.28128051757812, "logps/rejected": -228.3871307373047, "loss": 0.4097, "rewards/accuracies": 0.75, "rewards/chosen": -0.7916634678840637, "rewards/margins": 1.9011632204055786, "rewards/rejected": -2.692826747894287, "step": 373 }, { "epoch": 0.08, "learning_rate": 1.851260504201681e-05, "logits/chosen": -1.8537722826004028, "logits/rejected": -1.660403847694397, "logps/chosen": -255.82949829101562, "logps/rejected": -304.01934814453125, "loss": 0.5173, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1927258968353271, "rewards/margins": 2.1019415855407715, "rewards/rejected": -3.2946677207946777, "step": 374 }, { "epoch": 0.08, "learning_rate": 1.850840336134454e-05, "logits/chosen": -2.033233642578125, "logits/rejected": -1.7248446941375732, "logps/chosen": -342.703857421875, "logps/rejected": -289.2439880371094, "loss": 0.3215, "rewards/accuracies": 0.8125, "rewards/chosen": -0.33285123109817505, "rewards/margins": 2.8797762393951416, "rewards/rejected": -3.212627410888672, "step": 375 }, { "epoch": 0.08, "learning_rate": 1.8504201680672272e-05, "logits/chosen": -2.2549326419830322, "logits/rejected": -1.728357195854187, "logps/chosen": -347.8545837402344, "logps/rejected": -270.393798828125, "loss": 0.0825, "rewards/accuracies": 1.0, "rewards/chosen": -0.4776824712753296, "rewards/margins": 4.028282165527344, "rewards/rejected": -4.505964756011963, "step": 376 }, { "epoch": 0.08, "learning_rate": 1.8500000000000002e-05, "logits/chosen": -1.7460956573486328, "logits/rejected": -1.9304550886154175, "logps/chosen": -372.11505126953125, "logps/rejected": -441.93463134765625, "loss": 0.3237, "rewards/accuracies": 0.875, "rewards/chosen": -0.5257360935211182, "rewards/margins": 3.7753725051879883, "rewards/rejected": -4.301108360290527, "step": 377 }, { "epoch": 0.08, "learning_rate": 1.8495798319327733e-05, "logits/chosen": -2.3899779319763184, "logits/rejected": -1.9155993461608887, "logps/chosen": -304.52337646484375, "logps/rejected": -241.9775848388672, "loss": 0.4843, "rewards/accuracies": 0.8125, "rewards/chosen": 0.13882781565189362, "rewards/margins": 2.850095748901367, "rewards/rejected": -2.711268186569214, "step": 378 }, { "epoch": 0.08, "learning_rate": 1.8491596638655466e-05, "logits/chosen": -1.9093596935272217, "logits/rejected": -1.9763383865356445, "logps/chosen": -250.72787475585938, "logps/rejected": -354.8570556640625, "loss": 0.5871, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9904338121414185, "rewards/margins": 1.7757227420806885, "rewards/rejected": -2.7661566734313965, "step": 379 }, { "epoch": 0.08, "learning_rate": 1.8487394957983196e-05, "logits/chosen": -2.28853702545166, "logits/rejected": -2.0587716102600098, "logps/chosen": -429.751220703125, "logps/rejected": -351.5947570800781, "loss": 0.5801, "rewards/accuracies": 0.8125, "rewards/chosen": -1.238794207572937, "rewards/margins": 2.1691994667053223, "rewards/rejected": -3.407993793487549, "step": 380 }, { "epoch": 0.08, "learning_rate": 1.8483193277310927e-05, "logits/chosen": -2.10351824760437, "logits/rejected": -1.8710891008377075, "logps/chosen": -342.17236328125, "logps/rejected": -328.80499267578125, "loss": 0.2729, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3526344895362854, "rewards/margins": 2.7666478157043457, "rewards/rejected": -3.1192824840545654, "step": 381 }, { "epoch": 0.08, "learning_rate": 1.8478991596638657e-05, "logits/chosen": -2.0367956161499023, "logits/rejected": -2.171997547149658, "logps/chosen": -391.1765441894531, "logps/rejected": -394.83306884765625, "loss": 0.3867, "rewards/accuracies": 0.8125, "rewards/chosen": -0.61182701587677, "rewards/margins": 2.4131217002868652, "rewards/rejected": -3.024948835372925, "step": 382 }, { "epoch": 0.08, "learning_rate": 1.847478991596639e-05, "logits/chosen": -2.070009469985962, "logits/rejected": -2.1090354919433594, "logps/chosen": -337.0970153808594, "logps/rejected": -356.8552551269531, "loss": 0.1785, "rewards/accuracies": 0.9375, "rewards/chosen": -0.47881823778152466, "rewards/margins": 3.552011013031006, "rewards/rejected": -4.030829429626465, "step": 383 }, { "epoch": 0.08, "learning_rate": 1.847058823529412e-05, "logits/chosen": -2.2249088287353516, "logits/rejected": -2.104274034500122, "logps/chosen": -302.1180114746094, "logps/rejected": -384.06268310546875, "loss": 0.3293, "rewards/accuracies": 0.875, "rewards/chosen": -0.7090399861335754, "rewards/margins": 2.8228683471679688, "rewards/rejected": -3.5319085121154785, "step": 384 }, { "epoch": 0.08, "learning_rate": 1.846638655462185e-05, "logits/chosen": -2.3999202251434326, "logits/rejected": -2.082932710647583, "logps/chosen": -319.16180419921875, "logps/rejected": -283.59246826171875, "loss": 0.254, "rewards/accuracies": 0.875, "rewards/chosen": -0.5637760162353516, "rewards/margins": 2.9321329593658447, "rewards/rejected": -3.4959089756011963, "step": 385 }, { "epoch": 0.08, "learning_rate": 1.846218487394958e-05, "logits/chosen": -2.296931028366089, "logits/rejected": -1.7105531692504883, "logps/chosen": -335.31927490234375, "logps/rejected": -289.3534240722656, "loss": 0.5521, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9079472422599792, "rewards/margins": 1.7789301872253418, "rewards/rejected": -2.6868772506713867, "step": 386 }, { "epoch": 0.08, "learning_rate": 1.8457983193277315e-05, "logits/chosen": -1.9932727813720703, "logits/rejected": -2.1460158824920654, "logps/chosen": -510.9311218261719, "logps/rejected": -411.8099060058594, "loss": 0.2199, "rewards/accuracies": 0.875, "rewards/chosen": -0.21243184804916382, "rewards/margins": 3.4859776496887207, "rewards/rejected": -3.69840931892395, "step": 387 }, { "epoch": 0.08, "learning_rate": 1.8453781512605045e-05, "logits/chosen": -2.1076436042785645, "logits/rejected": -1.8722412586212158, "logps/chosen": -278.708251953125, "logps/rejected": -431.20684814453125, "loss": 0.528, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4809589982032776, "rewards/margins": 2.700566291809082, "rewards/rejected": -3.181525230407715, "step": 388 }, { "epoch": 0.08, "learning_rate": 1.8449579831932775e-05, "logits/chosen": -2.1607489585876465, "logits/rejected": -1.726609468460083, "logps/chosen": -378.8289794921875, "logps/rejected": -346.7107238769531, "loss": 0.1821, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8186622858047485, "rewards/margins": 2.972604274749756, "rewards/rejected": -3.791266441345215, "step": 389 }, { "epoch": 0.08, "learning_rate": 1.8445378151260505e-05, "logits/chosen": -2.319326162338257, "logits/rejected": -1.7188987731933594, "logps/chosen": -342.0713806152344, "logps/rejected": -288.20916748046875, "loss": 0.4432, "rewards/accuracies": 0.875, "rewards/chosen": -1.0290842056274414, "rewards/margins": 2.874004364013672, "rewards/rejected": -3.9030888080596924, "step": 390 }, { "epoch": 0.08, "learning_rate": 1.844117647058824e-05, "logits/chosen": -2.2356772422790527, "logits/rejected": -2.220611333847046, "logps/chosen": -351.1100769042969, "logps/rejected": -338.7408447265625, "loss": 0.5559, "rewards/accuracies": 0.8125, "rewards/chosen": -0.503207266330719, "rewards/margins": 1.8040645122528076, "rewards/rejected": -2.307271718978882, "step": 391 }, { "epoch": 0.08, "learning_rate": 1.8436974789915966e-05, "logits/chosen": -2.1344399452209473, "logits/rejected": -2.0783677101135254, "logps/chosen": -261.3807678222656, "logps/rejected": -330.52557373046875, "loss": 0.2463, "rewards/accuracies": 0.875, "rewards/chosen": -1.0167301893234253, "rewards/margins": 2.9774012565612793, "rewards/rejected": -3.994131565093994, "step": 392 }, { "epoch": 0.08, "learning_rate": 1.84327731092437e-05, "logits/chosen": -1.9403150081634521, "logits/rejected": -1.5085800886154175, "logps/chosen": -249.44728088378906, "logps/rejected": -210.45999145507812, "loss": 0.8026, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2809702157974243, "rewards/margins": 1.0851925611495972, "rewards/rejected": -2.3661627769470215, "step": 393 }, { "epoch": 0.08, "learning_rate": 1.842857142857143e-05, "logits/chosen": -1.9822087287902832, "logits/rejected": -2.161910057067871, "logps/chosen": -254.12217712402344, "logps/rejected": -345.6323547363281, "loss": 0.1703, "rewards/accuracies": 1.0, "rewards/chosen": -0.8024090528488159, "rewards/margins": 2.9197912216186523, "rewards/rejected": -3.722200393676758, "step": 394 }, { "epoch": 0.08, "learning_rate": 1.842436974789916e-05, "logits/chosen": -1.6290154457092285, "logits/rejected": -1.6783397197723389, "logps/chosen": -340.3009948730469, "logps/rejected": -310.3421936035156, "loss": 0.1576, "rewards/accuracies": 0.9375, "rewards/chosen": -0.35950857400894165, "rewards/margins": 3.5517892837524414, "rewards/rejected": -3.9112980365753174, "step": 395 }, { "epoch": 0.08, "learning_rate": 1.842016806722689e-05, "logits/chosen": -2.0145533084869385, "logits/rejected": -1.870767593383789, "logps/chosen": -257.0691223144531, "logps/rejected": -346.18621826171875, "loss": 0.147, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5434238314628601, "rewards/margins": 4.360313415527344, "rewards/rejected": -4.903737545013428, "step": 396 }, { "epoch": 0.08, "learning_rate": 1.8415966386554624e-05, "logits/chosen": -2.049251079559326, "logits/rejected": -1.7490243911743164, "logps/chosen": -301.72601318359375, "logps/rejected": -335.76373291015625, "loss": 0.48, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6389510035514832, "rewards/margins": 2.212846279144287, "rewards/rejected": -2.851797103881836, "step": 397 }, { "epoch": 0.08, "learning_rate": 1.8411764705882354e-05, "logits/chosen": -2.0430452823638916, "logits/rejected": -2.023911952972412, "logps/chosen": -321.87933349609375, "logps/rejected": -410.43609619140625, "loss": 0.5061, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4173829555511475, "rewards/margins": 2.592723846435547, "rewards/rejected": -4.010106563568115, "step": 398 }, { "epoch": 0.08, "learning_rate": 1.8407563025210084e-05, "logits/chosen": -1.9393033981323242, "logits/rejected": -2.15283203125, "logps/chosen": -218.0631866455078, "logps/rejected": -263.5722351074219, "loss": 0.306, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2121131420135498, "rewards/margins": 2.416569948196411, "rewards/rejected": -3.628683090209961, "step": 399 }, { "epoch": 0.08, "learning_rate": 1.8403361344537814e-05, "logits/chosen": -2.161271095275879, "logits/rejected": -2.035555124282837, "logps/chosen": -225.12344360351562, "logps/rejected": -254.11572265625, "loss": 0.212, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8717067241668701, "rewards/margins": 3.1039505004882812, "rewards/rejected": -3.9756572246551514, "step": 400 }, { "epoch": 0.08, "learning_rate": 1.8399159663865548e-05, "logits/chosen": -2.0231268405914307, "logits/rejected": -1.8708224296569824, "logps/chosen": -254.9494171142578, "logps/rejected": -279.9722900390625, "loss": 0.4026, "rewards/accuracies": 0.875, "rewards/chosen": -0.8832752704620361, "rewards/margins": 3.6449685096740723, "rewards/rejected": -4.528243541717529, "step": 401 }, { "epoch": 0.08, "learning_rate": 1.839495798319328e-05, "logits/chosen": -1.9824044704437256, "logits/rejected": -2.199185371398926, "logps/chosen": -332.078125, "logps/rejected": -417.3800354003906, "loss": 0.6519, "rewards/accuracies": 0.625, "rewards/chosen": -1.5442051887512207, "rewards/margins": 1.857395887374878, "rewards/rejected": -3.4016010761260986, "step": 402 }, { "epoch": 0.08, "learning_rate": 1.839075630252101e-05, "logits/chosen": -1.8881011009216309, "logits/rejected": -2.0036232471466064, "logps/chosen": -272.0240783691406, "logps/rejected": -298.3892822265625, "loss": 0.4006, "rewards/accuracies": 0.75, "rewards/chosen": -1.4687628746032715, "rewards/margins": 3.4236550331115723, "rewards/rejected": -4.892417907714844, "step": 403 }, { "epoch": 0.08, "learning_rate": 1.838655462184874e-05, "logits/chosen": -2.3152854442596436, "logits/rejected": -1.3793350458145142, "logps/chosen": -408.720947265625, "logps/rejected": -259.06597900390625, "loss": 0.2866, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6584242582321167, "rewards/margins": 3.0207679271698, "rewards/rejected": -4.679192066192627, "step": 404 }, { "epoch": 0.08, "learning_rate": 1.8382352941176472e-05, "logits/chosen": -2.289783477783203, "logits/rejected": -1.8729841709136963, "logps/chosen": -449.4166259765625, "logps/rejected": -440.9778747558594, "loss": 0.3789, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3342485427856445, "rewards/margins": 2.9533896446228027, "rewards/rejected": -4.287638187408447, "step": 405 }, { "epoch": 0.08, "learning_rate": 1.8378151260504203e-05, "logits/chosen": -2.270742416381836, "logits/rejected": -2.2102785110473633, "logps/chosen": -264.51678466796875, "logps/rejected": -236.71121215820312, "loss": 0.4186, "rewards/accuracies": 0.875, "rewards/chosen": -0.9885307550430298, "rewards/margins": 1.7186174392700195, "rewards/rejected": -2.7071480751037598, "step": 406 }, { "epoch": 0.09, "learning_rate": 1.8373949579831933e-05, "logits/chosen": -2.402445077896118, "logits/rejected": -1.7826098203659058, "logps/chosen": -416.5032043457031, "logps/rejected": -298.052001953125, "loss": 0.3506, "rewards/accuracies": 0.875, "rewards/chosen": -0.5602412223815918, "rewards/margins": 2.2944812774658203, "rewards/rejected": -2.854722499847412, "step": 407 }, { "epoch": 0.09, "learning_rate": 1.8369747899159663e-05, "logits/chosen": -2.239947557449341, "logits/rejected": -2.2207236289978027, "logps/chosen": -381.25054931640625, "logps/rejected": -301.436767578125, "loss": 0.4853, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1972332000732422, "rewards/margins": 2.953791856765747, "rewards/rejected": -4.15102481842041, "step": 408 }, { "epoch": 0.09, "learning_rate": 1.8365546218487397e-05, "logits/chosen": -2.1951308250427246, "logits/rejected": -1.747607707977295, "logps/chosen": -286.4284973144531, "logps/rejected": -260.4895324707031, "loss": 0.503, "rewards/accuracies": 0.6875, "rewards/chosen": -1.4415708780288696, "rewards/margins": 1.7284908294677734, "rewards/rejected": -3.1700613498687744, "step": 409 }, { "epoch": 0.09, "learning_rate": 1.8361344537815127e-05, "logits/chosen": -1.9372808933258057, "logits/rejected": -2.1047611236572266, "logps/chosen": -199.15914916992188, "logps/rejected": -289.2158203125, "loss": 0.605, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5658094882965088, "rewards/margins": 2.872337818145752, "rewards/rejected": -3.43814754486084, "step": 410 }, { "epoch": 0.09, "learning_rate": 1.8357142857142857e-05, "logits/chosen": -2.3187527656555176, "logits/rejected": -1.9968873262405396, "logps/chosen": -387.2622375488281, "logps/rejected": -331.4651794433594, "loss": 0.1481, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3018587827682495, "rewards/margins": 3.6443562507629395, "rewards/rejected": -3.9462149143218994, "step": 411 }, { "epoch": 0.09, "learning_rate": 1.8352941176470587e-05, "logits/chosen": -2.2263684272766113, "logits/rejected": -1.7158617973327637, "logps/chosen": -283.41741943359375, "logps/rejected": -374.18658447265625, "loss": 0.3547, "rewards/accuracies": 0.6875, "rewards/chosen": 0.10880038887262344, "rewards/margins": 2.185023069381714, "rewards/rejected": -2.0762226581573486, "step": 412 }, { "epoch": 0.09, "learning_rate": 1.834873949579832e-05, "logits/chosen": -2.1612541675567627, "logits/rejected": -2.051351547241211, "logps/chosen": -413.21234130859375, "logps/rejected": -358.8020324707031, "loss": 0.4937, "rewards/accuracies": 0.75, "rewards/chosen": 0.3436061143875122, "rewards/margins": 3.0096888542175293, "rewards/rejected": -2.6660826206207275, "step": 413 }, { "epoch": 0.09, "learning_rate": 1.834453781512605e-05, "logits/chosen": -2.178018569946289, "logits/rejected": -1.7989444732666016, "logps/chosen": -366.8251953125, "logps/rejected": -301.50555419921875, "loss": 0.1768, "rewards/accuracies": 0.9375, "rewards/chosen": 0.47563356161117554, "rewards/margins": 2.7578303813934326, "rewards/rejected": -2.282196521759033, "step": 414 }, { "epoch": 0.09, "learning_rate": 1.834033613445378e-05, "logits/chosen": -2.2351861000061035, "logits/rejected": -1.9579159021377563, "logps/chosen": -255.4895782470703, "logps/rejected": -261.81390380859375, "loss": 0.3326, "rewards/accuracies": 0.8125, "rewards/chosen": -0.47969090938568115, "rewards/margins": 2.6430249214172363, "rewards/rejected": -3.122715950012207, "step": 415 }, { "epoch": 0.09, "learning_rate": 1.8336134453781515e-05, "logits/chosen": -1.8611608743667603, "logits/rejected": -2.120920419692993, "logps/chosen": -257.4296875, "logps/rejected": -257.55029296875, "loss": 0.192, "rewards/accuracies": 1.0, "rewards/chosen": -0.24374975264072418, "rewards/margins": 3.0062265396118164, "rewards/rejected": -3.24997615814209, "step": 416 }, { "epoch": 0.09, "learning_rate": 1.8331932773109245e-05, "logits/chosen": -1.9984084367752075, "logits/rejected": -1.6272146701812744, "logps/chosen": -280.01177978515625, "logps/rejected": -275.6769714355469, "loss": 0.4414, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8667396306991577, "rewards/margins": 2.1595852375030518, "rewards/rejected": -3.02632474899292, "step": 417 }, { "epoch": 0.09, "learning_rate": 1.8327731092436975e-05, "logits/chosen": -1.8926503658294678, "logits/rejected": -1.1714746952056885, "logps/chosen": -479.9905700683594, "logps/rejected": -263.75341796875, "loss": 0.4008, "rewards/accuracies": 0.6875, "rewards/chosen": 1.0290093421936035, "rewards/margins": 2.638532876968384, "rewards/rejected": -1.6095231771469116, "step": 418 }, { "epoch": 0.09, "learning_rate": 1.8323529411764706e-05, "logits/chosen": -2.300912380218506, "logits/rejected": -2.214662551879883, "logps/chosen": -399.84088134765625, "logps/rejected": -403.6318664550781, "loss": 0.2769, "rewards/accuracies": 0.875, "rewards/chosen": 0.5819576978683472, "rewards/margins": 2.674020767211914, "rewards/rejected": -2.0920627117156982, "step": 419 }, { "epoch": 0.09, "learning_rate": 1.831932773109244e-05, "logits/chosen": -1.975816011428833, "logits/rejected": -2.032226085662842, "logps/chosen": -272.3787841796875, "logps/rejected": -283.6316833496094, "loss": 0.4659, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2719263732433319, "rewards/margins": 1.9665776491165161, "rewards/rejected": -2.238503932952881, "step": 420 }, { "epoch": 0.09, "learning_rate": 1.831512605042017e-05, "logits/chosen": -2.149604558944702, "logits/rejected": -1.8828272819519043, "logps/chosen": -262.6552734375, "logps/rejected": -235.02587890625, "loss": 0.2882, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3024459183216095, "rewards/margins": 3.7423324584960938, "rewards/rejected": -4.044778823852539, "step": 421 }, { "epoch": 0.09, "learning_rate": 1.83109243697479e-05, "logits/chosen": -2.101395845413208, "logits/rejected": -1.5884850025177002, "logps/chosen": -332.09197998046875, "logps/rejected": -277.72662353515625, "loss": 0.3119, "rewards/accuracies": 0.875, "rewards/chosen": 0.133995920419693, "rewards/margins": 2.948085069656372, "rewards/rejected": -2.81408953666687, "step": 422 }, { "epoch": 0.09, "learning_rate": 1.830672268907563e-05, "logits/chosen": -1.9242068529129028, "logits/rejected": -2.0045411586761475, "logps/chosen": -353.66900634765625, "logps/rejected": -406.51416015625, "loss": 0.3477, "rewards/accuracies": 0.75, "rewards/chosen": -0.5238945484161377, "rewards/margins": 2.9817824363708496, "rewards/rejected": -3.5056772232055664, "step": 423 }, { "epoch": 0.09, "learning_rate": 1.8302521008403364e-05, "logits/chosen": -2.0353612899780273, "logits/rejected": -1.8938074111938477, "logps/chosen": -286.2059631347656, "logps/rejected": -292.628173828125, "loss": 0.5462, "rewards/accuracies": 0.625, "rewards/chosen": 0.02371799945831299, "rewards/margins": 2.6758546829223633, "rewards/rejected": -2.65213680267334, "step": 424 }, { "epoch": 0.09, "learning_rate": 1.8298319327731094e-05, "logits/chosen": -2.05151629447937, "logits/rejected": -1.985163688659668, "logps/chosen": -400.56103515625, "logps/rejected": -320.9740295410156, "loss": 0.4048, "rewards/accuracies": 0.75, "rewards/chosen": 0.5659486055374146, "rewards/margins": 2.076845645904541, "rewards/rejected": -1.5108968019485474, "step": 425 }, { "epoch": 0.09, "learning_rate": 1.8294117647058824e-05, "logits/chosen": -2.1431612968444824, "logits/rejected": -2.0062804222106934, "logps/chosen": -378.94451904296875, "logps/rejected": -320.79449462890625, "loss": 0.2322, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05997392535209656, "rewards/margins": 2.9121718406677246, "rewards/rejected": -2.8521978855133057, "step": 426 }, { "epoch": 0.09, "learning_rate": 1.8289915966386554e-05, "logits/chosen": -1.6912953853607178, "logits/rejected": -1.7182713747024536, "logps/chosen": -303.0714111328125, "logps/rejected": -350.28192138671875, "loss": 0.2803, "rewards/accuracies": 0.8125, "rewards/chosen": -0.1679016798734665, "rewards/margins": 2.768211603164673, "rewards/rejected": -2.936113119125366, "step": 427 }, { "epoch": 0.09, "learning_rate": 1.8285714285714288e-05, "logits/chosen": -2.379438877105713, "logits/rejected": -2.1407203674316406, "logps/chosen": -374.27264404296875, "logps/rejected": -381.09228515625, "loss": 0.2139, "rewards/accuracies": 0.875, "rewards/chosen": 0.26474815607070923, "rewards/margins": 2.9242446422576904, "rewards/rejected": -2.659496307373047, "step": 428 }, { "epoch": 0.09, "learning_rate": 1.8281512605042018e-05, "logits/chosen": -1.895861268043518, "logits/rejected": -2.0332565307617188, "logps/chosen": -337.95281982421875, "logps/rejected": -378.36053466796875, "loss": 0.2621, "rewards/accuracies": 0.875, "rewards/chosen": 0.4939233660697937, "rewards/margins": 2.6256091594696045, "rewards/rejected": -2.131685733795166, "step": 429 }, { "epoch": 0.09, "learning_rate": 1.8277310924369748e-05, "logits/chosen": -1.9959743022918701, "logits/rejected": -1.8283321857452393, "logps/chosen": -230.7371826171875, "logps/rejected": -399.9266052246094, "loss": 0.2077, "rewards/accuracies": 0.875, "rewards/chosen": -0.5777879357337952, "rewards/margins": 3.7699341773986816, "rewards/rejected": -4.347722053527832, "step": 430 }, { "epoch": 0.09, "learning_rate": 1.827310924369748e-05, "logits/chosen": -2.3392908573150635, "logits/rejected": -2.0552642345428467, "logps/chosen": -420.57196044921875, "logps/rejected": -356.7542419433594, "loss": 0.4148, "rewards/accuracies": 0.875, "rewards/chosen": 0.16800589859485626, "rewards/margins": 3.497415542602539, "rewards/rejected": -3.3294098377227783, "step": 431 }, { "epoch": 0.09, "learning_rate": 1.8268907563025212e-05, "logits/chosen": -2.1076271533966064, "logits/rejected": -1.7760194540023804, "logps/chosen": -438.2403869628906, "logps/rejected": -340.4200134277344, "loss": 0.2498, "rewards/accuracies": 0.875, "rewards/chosen": 0.29134923219680786, "rewards/margins": 3.066493034362793, "rewards/rejected": -2.77514386177063, "step": 432 }, { "epoch": 0.09, "learning_rate": 1.8264705882352942e-05, "logits/chosen": -1.9294633865356445, "logits/rejected": -1.7531614303588867, "logps/chosen": -268.9462890625, "logps/rejected": -265.12286376953125, "loss": 0.4439, "rewards/accuracies": 0.75, "rewards/chosen": 0.32981252670288086, "rewards/margins": 2.1313040256500244, "rewards/rejected": -1.8014914989471436, "step": 433 }, { "epoch": 0.09, "learning_rate": 1.8260504201680673e-05, "logits/chosen": -2.2013320922851562, "logits/rejected": -2.003314971923828, "logps/chosen": -243.44723510742188, "logps/rejected": -226.79901123046875, "loss": 0.4459, "rewards/accuracies": 0.75, "rewards/chosen": 0.1876690834760666, "rewards/margins": 2.4671425819396973, "rewards/rejected": -2.279473304748535, "step": 434 }, { "epoch": 0.09, "learning_rate": 1.8256302521008403e-05, "logits/chosen": -2.0985069274902344, "logits/rejected": -2.01230788230896, "logps/chosen": -327.3497009277344, "logps/rejected": -303.96533203125, "loss": 0.3102, "rewards/accuracies": 0.9375, "rewards/chosen": 0.7715959548950195, "rewards/margins": 3.2752957344055176, "rewards/rejected": -2.503699779510498, "step": 435 }, { "epoch": 0.09, "learning_rate": 1.8252100840336136e-05, "logits/chosen": -2.1409311294555664, "logits/rejected": -1.8834095001220703, "logps/chosen": -344.905029296875, "logps/rejected": -325.209716796875, "loss": 0.3905, "rewards/accuracies": 0.8125, "rewards/chosen": 0.20048293471336365, "rewards/margins": 1.9339041709899902, "rewards/rejected": -1.7334210872650146, "step": 436 }, { "epoch": 0.09, "learning_rate": 1.8247899159663867e-05, "logits/chosen": -2.15683650970459, "logits/rejected": -1.881533145904541, "logps/chosen": -356.26531982421875, "logps/rejected": -272.372314453125, "loss": 0.4086, "rewards/accuracies": 0.75, "rewards/chosen": -0.31405243277549744, "rewards/margins": 2.866708755493164, "rewards/rejected": -3.1807610988616943, "step": 437 }, { "epoch": 0.09, "learning_rate": 1.8243697478991597e-05, "logits/chosen": -2.448089599609375, "logits/rejected": -1.938776969909668, "logps/chosen": -255.0645294189453, "logps/rejected": -228.92654418945312, "loss": 0.4238, "rewards/accuracies": 0.875, "rewards/chosen": 0.18938884139060974, "rewards/margins": 2.685791254043579, "rewards/rejected": -2.4964022636413574, "step": 438 }, { "epoch": 0.09, "learning_rate": 1.823949579831933e-05, "logits/chosen": -2.3020715713500977, "logits/rejected": -1.7897909879684448, "logps/chosen": -369.67706298828125, "logps/rejected": -308.80322265625, "loss": 0.2207, "rewards/accuracies": 0.875, "rewards/chosen": -0.018297463655471802, "rewards/margins": 2.9154388904571533, "rewards/rejected": -2.9337363243103027, "step": 439 }, { "epoch": 0.09, "learning_rate": 1.823529411764706e-05, "logits/chosen": -2.04315185546875, "logits/rejected": -2.176276206970215, "logps/chosen": -344.2831115722656, "logps/rejected": -365.1346435546875, "loss": 0.2441, "rewards/accuracies": 0.875, "rewards/chosen": 0.8508395552635193, "rewards/margins": 3.6053500175476074, "rewards/rejected": -2.7545104026794434, "step": 440 }, { "epoch": 0.09, "learning_rate": 1.823109243697479e-05, "logits/chosen": -1.9498186111450195, "logits/rejected": -1.7883284091949463, "logps/chosen": -214.4251708984375, "logps/rejected": -277.6398620605469, "loss": 0.3085, "rewards/accuracies": 0.75, "rewards/chosen": 0.42159321904182434, "rewards/margins": 2.0666284561157227, "rewards/rejected": -1.6450350284576416, "step": 441 }, { "epoch": 0.09, "learning_rate": 1.822689075630252e-05, "logits/chosen": -2.3132596015930176, "logits/rejected": -2.1059367656707764, "logps/chosen": -330.3541564941406, "logps/rejected": -354.6991271972656, "loss": 0.5807, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4959174692630768, "rewards/margins": 2.7793803215026855, "rewards/rejected": -3.2752978801727295, "step": 442 }, { "epoch": 0.09, "learning_rate": 1.8222689075630255e-05, "logits/chosen": -1.8083010911941528, "logits/rejected": -1.611409306526184, "logps/chosen": -264.4505920410156, "logps/rejected": -268.5172424316406, "loss": 0.2615, "rewards/accuracies": 0.9375, "rewards/chosen": -0.11492379009723663, "rewards/margins": 3.0617222785949707, "rewards/rejected": -3.1766459941864014, "step": 443 }, { "epoch": 0.09, "learning_rate": 1.8218487394957985e-05, "logits/chosen": -2.0951881408691406, "logits/rejected": -1.8586359024047852, "logps/chosen": -311.2127990722656, "logps/rejected": -386.22259521484375, "loss": 0.5194, "rewards/accuracies": 0.8125, "rewards/chosen": 0.17915694415569305, "rewards/margins": 3.9140496253967285, "rewards/rejected": -3.7348928451538086, "step": 444 }, { "epoch": 0.09, "learning_rate": 1.8214285714285715e-05, "logits/chosen": -2.2957074642181396, "logits/rejected": -2.258702278137207, "logps/chosen": -312.64788818359375, "logps/rejected": -272.65411376953125, "loss": 0.3874, "rewards/accuracies": 0.75, "rewards/chosen": -0.40231961011886597, "rewards/margins": 2.468316078186035, "rewards/rejected": -2.870635747909546, "step": 445 }, { "epoch": 0.09, "learning_rate": 1.8210084033613445e-05, "logits/chosen": -2.11244535446167, "logits/rejected": -1.9430699348449707, "logps/chosen": -316.4300537109375, "logps/rejected": -272.95037841796875, "loss": 0.3922, "rewards/accuracies": 0.8125, "rewards/chosen": 0.45706817507743835, "rewards/margins": 2.1827540397644043, "rewards/rejected": -1.7256858348846436, "step": 446 }, { "epoch": 0.09, "learning_rate": 1.820588235294118e-05, "logits/chosen": -1.9815583229064941, "logits/rejected": -1.6495671272277832, "logps/chosen": -288.887451171875, "logps/rejected": -277.43377685546875, "loss": 0.2881, "rewards/accuracies": 0.875, "rewards/chosen": -0.031989581882953644, "rewards/margins": 2.7415804862976074, "rewards/rejected": -2.7735700607299805, "step": 447 }, { "epoch": 0.09, "learning_rate": 1.820168067226891e-05, "logits/chosen": -1.7236675024032593, "logits/rejected": -1.9960949420928955, "logps/chosen": -238.47738647460938, "logps/rejected": -283.5688781738281, "loss": 0.7905, "rewards/accuracies": 0.5625, "rewards/chosen": -0.5538796782493591, "rewards/margins": 0.8750126361846924, "rewards/rejected": -1.4288922548294067, "step": 448 }, { "epoch": 0.09, "learning_rate": 1.819747899159664e-05, "logits/chosen": -1.5379657745361328, "logits/rejected": -1.375888705253601, "logps/chosen": -241.681396484375, "logps/rejected": -278.0999755859375, "loss": 0.4768, "rewards/accuracies": 0.75, "rewards/chosen": -0.11749270558357239, "rewards/margins": 2.251631498336792, "rewards/rejected": -2.369123935699463, "step": 449 }, { "epoch": 0.09, "learning_rate": 1.819327731092437e-05, "logits/chosen": -1.990583062171936, "logits/rejected": -2.044398546218872, "logps/chosen": -333.5953369140625, "logps/rejected": -397.26483154296875, "loss": 0.4174, "rewards/accuracies": 0.75, "rewards/chosen": -0.16924190521240234, "rewards/margins": 1.9998054504394531, "rewards/rejected": -2.1690473556518555, "step": 450 }, { "epoch": 0.09, "learning_rate": 1.8189075630252103e-05, "logits/chosen": -2.0269851684570312, "logits/rejected": -2.1474108695983887, "logps/chosen": -273.5993957519531, "logps/rejected": -385.10882568359375, "loss": 0.2793, "rewards/accuracies": 0.875, "rewards/chosen": -0.12756304442882538, "rewards/margins": 2.1246533393859863, "rewards/rejected": -2.2522165775299072, "step": 451 }, { "epoch": 0.09, "learning_rate": 1.8184873949579833e-05, "logits/chosen": -2.1646690368652344, "logits/rejected": -1.8509190082550049, "logps/chosen": -248.11123657226562, "logps/rejected": -261.6076965332031, "loss": 0.2691, "rewards/accuracies": 0.8125, "rewards/chosen": 0.18941614031791687, "rewards/margins": 2.55245304107666, "rewards/rejected": -2.363036632537842, "step": 452 }, { "epoch": 0.09, "learning_rate": 1.8180672268907564e-05, "logits/chosen": -2.0878331661224365, "logits/rejected": -1.8863886594772339, "logps/chosen": -250.40542602539062, "logps/rejected": -275.64508056640625, "loss": 0.406, "rewards/accuracies": 0.6875, "rewards/chosen": -0.1500859409570694, "rewards/margins": 2.256984233856201, "rewards/rejected": -2.4070701599121094, "step": 453 }, { "epoch": 0.09, "learning_rate": 1.8176470588235294e-05, "logits/chosen": -1.9686896800994873, "logits/rejected": -1.6723570823669434, "logps/chosen": -274.258544921875, "logps/rejected": -314.515869140625, "loss": 0.4388, "rewards/accuracies": 0.6875, "rewards/chosen": 0.009008590131998062, "rewards/margins": 1.8709897994995117, "rewards/rejected": -1.8619811534881592, "step": 454 }, { "epoch": 0.1, "learning_rate": 1.8172268907563027e-05, "logits/chosen": -2.16030216217041, "logits/rejected": -1.8089762926101685, "logps/chosen": -468.20416259765625, "logps/rejected": -275.9560546875, "loss": 0.2804, "rewards/accuracies": 0.875, "rewards/chosen": 1.0918165445327759, "rewards/margins": 3.3607137203216553, "rewards/rejected": -2.26889705657959, "step": 455 }, { "epoch": 0.1, "learning_rate": 1.8168067226890758e-05, "logits/chosen": -1.7544831037521362, "logits/rejected": -1.9637465476989746, "logps/chosen": -186.10647583007812, "logps/rejected": -273.6681213378906, "loss": 0.477, "rewards/accuracies": 0.8125, "rewards/chosen": 0.006809163838624954, "rewards/margins": 1.667669415473938, "rewards/rejected": -1.660860300064087, "step": 456 }, { "epoch": 0.1, "learning_rate": 1.8163865546218488e-05, "logits/chosen": -2.226810932159424, "logits/rejected": -1.9543696641921997, "logps/chosen": -408.641845703125, "logps/rejected": -340.444580078125, "loss": 0.2819, "rewards/accuracies": 0.875, "rewards/chosen": 0.40470558404922485, "rewards/margins": 2.5785889625549316, "rewards/rejected": -2.1738831996917725, "step": 457 }, { "epoch": 0.1, "learning_rate": 1.815966386554622e-05, "logits/chosen": -1.8382527828216553, "logits/rejected": -1.8056399822235107, "logps/chosen": -353.8582763671875, "logps/rejected": -348.326171875, "loss": 0.5055, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05595725029706955, "rewards/margins": 1.7954365015029907, "rewards/rejected": -1.851393699645996, "step": 458 }, { "epoch": 0.1, "learning_rate": 1.8155462184873952e-05, "logits/chosen": -2.136549472808838, "logits/rejected": -1.4456157684326172, "logps/chosen": -251.87380981445312, "logps/rejected": -241.96295166015625, "loss": 0.3531, "rewards/accuracies": 0.875, "rewards/chosen": -0.05324000120162964, "rewards/margins": 2.4561290740966797, "rewards/rejected": -2.509368896484375, "step": 459 }, { "epoch": 0.1, "learning_rate": 1.8151260504201682e-05, "logits/chosen": -1.7990928888320923, "logits/rejected": -1.5568106174468994, "logps/chosen": -321.53485107421875, "logps/rejected": -309.19940185546875, "loss": 0.5021, "rewards/accuracies": 0.75, "rewards/chosen": -0.378719300031662, "rewards/margins": 2.2479031085968018, "rewards/rejected": -2.626622438430786, "step": 460 }, { "epoch": 0.1, "learning_rate": 1.8147058823529412e-05, "logits/chosen": -2.1900739669799805, "logits/rejected": -1.9575533866882324, "logps/chosen": -321.0294189453125, "logps/rejected": -284.5794677734375, "loss": 0.3111, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2660277485847473, "rewards/margins": 1.9817547798156738, "rewards/rejected": -2.2477824687957764, "step": 461 }, { "epoch": 0.1, "learning_rate": 1.8142857142857146e-05, "logits/chosen": -2.106145143508911, "logits/rejected": -1.78156316280365, "logps/chosen": -314.9206848144531, "logps/rejected": -266.0330810546875, "loss": 0.3376, "rewards/accuracies": 0.8125, "rewards/chosen": 0.03151342272758484, "rewards/margins": 2.3189806938171387, "rewards/rejected": -2.2874674797058105, "step": 462 }, { "epoch": 0.1, "learning_rate": 1.8138655462184876e-05, "logits/chosen": -2.070035219192505, "logits/rejected": -1.6459128856658936, "logps/chosen": -354.7567138671875, "logps/rejected": -307.1123352050781, "loss": 0.3962, "rewards/accuracies": 0.75, "rewards/chosen": 0.32077494263648987, "rewards/margins": 1.6847877502441406, "rewards/rejected": -1.3640128374099731, "step": 463 }, { "epoch": 0.1, "learning_rate": 1.8134453781512606e-05, "logits/chosen": -2.2774393558502197, "logits/rejected": -1.9349925518035889, "logps/chosen": -282.53900146484375, "logps/rejected": -297.27093505859375, "loss": 0.3103, "rewards/accuracies": 0.875, "rewards/chosen": 0.005994489416480064, "rewards/margins": 2.17958664894104, "rewards/rejected": -2.1735920906066895, "step": 464 }, { "epoch": 0.1, "learning_rate": 1.8130252100840336e-05, "logits/chosen": -2.030294895172119, "logits/rejected": -1.8782742023468018, "logps/chosen": -285.802001953125, "logps/rejected": -292.132568359375, "loss": 0.3012, "rewards/accuracies": 0.8125, "rewards/chosen": 0.04754617065191269, "rewards/margins": 1.7985600233078003, "rewards/rejected": -1.7510137557983398, "step": 465 }, { "epoch": 0.1, "learning_rate": 1.812605042016807e-05, "logits/chosen": -1.79093599319458, "logits/rejected": -1.7948484420776367, "logps/chosen": -288.230224609375, "logps/rejected": -319.9525146484375, "loss": 0.2704, "rewards/accuracies": 0.875, "rewards/chosen": -0.11549008637666702, "rewards/margins": 2.987337112426758, "rewards/rejected": -3.102827310562134, "step": 466 }, { "epoch": 0.1, "learning_rate": 1.81218487394958e-05, "logits/chosen": -2.0728960037231445, "logits/rejected": -2.0928308963775635, "logps/chosen": -272.58770751953125, "logps/rejected": -307.96490478515625, "loss": 0.1698, "rewards/accuracies": 0.875, "rewards/chosen": 0.3147255480289459, "rewards/margins": 3.7766573429107666, "rewards/rejected": -3.4619317054748535, "step": 467 }, { "epoch": 0.1, "learning_rate": 1.811764705882353e-05, "logits/chosen": -2.0510151386260986, "logits/rejected": -1.9429408311843872, "logps/chosen": -323.97467041015625, "logps/rejected": -315.77252197265625, "loss": 0.2078, "rewards/accuracies": 0.875, "rewards/chosen": 0.547203540802002, "rewards/margins": 3.029226303100586, "rewards/rejected": -2.482023000717163, "step": 468 }, { "epoch": 0.1, "learning_rate": 1.811344537815126e-05, "logits/chosen": -2.0363616943359375, "logits/rejected": -1.8428747653961182, "logps/chosen": -343.7933654785156, "logps/rejected": -302.945068359375, "loss": 0.2906, "rewards/accuracies": 0.875, "rewards/chosen": -0.2114700973033905, "rewards/margins": 3.440668821334839, "rewards/rejected": -3.6521387100219727, "step": 469 }, { "epoch": 0.1, "learning_rate": 1.8109243697478994e-05, "logits/chosen": -2.177489757537842, "logits/rejected": -1.9540671110153198, "logps/chosen": -345.616455078125, "logps/rejected": -389.21484375, "loss": 0.2852, "rewards/accuracies": 0.9375, "rewards/chosen": 0.23222655057907104, "rewards/margins": 2.5556788444519043, "rewards/rejected": -2.3234524726867676, "step": 470 }, { "epoch": 0.1, "learning_rate": 1.8105042016806725e-05, "logits/chosen": -2.24438214302063, "logits/rejected": -2.225999593734741, "logps/chosen": -306.83306884765625, "logps/rejected": -363.3036804199219, "loss": 0.5031, "rewards/accuracies": 0.75, "rewards/chosen": -0.5617220997810364, "rewards/margins": 2.1759278774261475, "rewards/rejected": -2.737650156021118, "step": 471 }, { "epoch": 0.1, "learning_rate": 1.8100840336134455e-05, "logits/chosen": -2.2119107246398926, "logits/rejected": -2.1409425735473633, "logps/chosen": -456.27923583984375, "logps/rejected": -367.75433349609375, "loss": 0.237, "rewards/accuracies": 0.875, "rewards/chosen": -0.6669508218765259, "rewards/margins": 2.9997236728668213, "rewards/rejected": -3.6666743755340576, "step": 472 }, { "epoch": 0.1, "learning_rate": 1.8096638655462185e-05, "logits/chosen": -1.991231918334961, "logits/rejected": -1.4948508739471436, "logps/chosen": -297.85968017578125, "logps/rejected": -290.2069396972656, "loss": 0.3433, "rewards/accuracies": 0.875, "rewards/chosen": -0.5998925566673279, "rewards/margins": 1.7855578660964966, "rewards/rejected": -2.3854503631591797, "step": 473 }, { "epoch": 0.1, "learning_rate": 1.809243697478992e-05, "logits/chosen": -2.2086966037750244, "logits/rejected": -2.0955843925476074, "logps/chosen": -221.19500732421875, "logps/rejected": -242.82528686523438, "loss": 0.5517, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9625298380851746, "rewards/margins": 2.123574733734131, "rewards/rejected": -3.08610463142395, "step": 474 }, { "epoch": 0.1, "learning_rate": 1.808823529411765e-05, "logits/chosen": -2.027224063873291, "logits/rejected": -1.9769313335418701, "logps/chosen": -160.25396728515625, "logps/rejected": -188.99205017089844, "loss": 0.425, "rewards/accuracies": 0.75, "rewards/chosen": -0.7655349969863892, "rewards/margins": 2.5798983573913574, "rewards/rejected": -3.345433473587036, "step": 475 }, { "epoch": 0.1, "learning_rate": 1.808403361344538e-05, "logits/chosen": -1.9810261726379395, "logits/rejected": -1.7768886089324951, "logps/chosen": -362.0196533203125, "logps/rejected": -354.4373779296875, "loss": 0.5212, "rewards/accuracies": 0.75, "rewards/chosen": -0.6842369437217712, "rewards/margins": 1.4541566371917725, "rewards/rejected": -2.1383936405181885, "step": 476 }, { "epoch": 0.1, "learning_rate": 1.807983193277311e-05, "logits/chosen": -1.9409639835357666, "logits/rejected": -1.6685116291046143, "logps/chosen": -352.915283203125, "logps/rejected": -366.5359191894531, "loss": 0.3444, "rewards/accuracies": 0.8125, "rewards/chosen": 0.1256626546382904, "rewards/margins": 3.746347665786743, "rewards/rejected": -3.62068510055542, "step": 477 }, { "epoch": 0.1, "learning_rate": 1.8075630252100843e-05, "logits/chosen": -1.9778801202774048, "logits/rejected": -1.526355266571045, "logps/chosen": -396.0119934082031, "logps/rejected": -335.4312438964844, "loss": 0.1799, "rewards/accuracies": 0.9375, "rewards/chosen": 0.05994391441345215, "rewards/margins": 4.086123943328857, "rewards/rejected": -4.026180267333984, "step": 478 }, { "epoch": 0.1, "learning_rate": 1.8071428571428573e-05, "logits/chosen": -2.014559507369995, "logits/rejected": -1.9628279209136963, "logps/chosen": -287.56622314453125, "logps/rejected": -300.8746032714844, "loss": 0.2029, "rewards/accuracies": 0.9375, "rewards/chosen": -0.18346592783927917, "rewards/margins": 3.5677192211151123, "rewards/rejected": -3.751185178756714, "step": 479 }, { "epoch": 0.1, "learning_rate": 1.8067226890756303e-05, "logits/chosen": -2.129014015197754, "logits/rejected": -1.7606970071792603, "logps/chosen": -255.42312622070312, "logps/rejected": -274.4722900390625, "loss": 0.3294, "rewards/accuracies": 0.75, "rewards/chosen": -0.8223708271980286, "rewards/margins": 2.844892978668213, "rewards/rejected": -3.6672637462615967, "step": 480 }, { "epoch": 0.1, "learning_rate": 1.8063025210084037e-05, "logits/chosen": -2.204853057861328, "logits/rejected": -1.9907968044281006, "logps/chosen": -490.4839782714844, "logps/rejected": -380.83978271484375, "loss": 0.142, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12432041764259338, "rewards/margins": 3.2527475357055664, "rewards/rejected": -3.377067804336548, "step": 481 }, { "epoch": 0.1, "learning_rate": 1.8058823529411767e-05, "logits/chosen": -2.3475499153137207, "logits/rejected": -1.9604389667510986, "logps/chosen": -304.75970458984375, "logps/rejected": -287.97637939453125, "loss": 0.2535, "rewards/accuracies": 0.875, "rewards/chosen": -0.9087990522384644, "rewards/margins": 3.345973491668701, "rewards/rejected": -4.254772186279297, "step": 482 }, { "epoch": 0.1, "learning_rate": 1.8054621848739497e-05, "logits/chosen": -2.079780340194702, "logits/rejected": -2.1623263359069824, "logps/chosen": -299.0748596191406, "logps/rejected": -359.23858642578125, "loss": 0.3861, "rewards/accuracies": 0.875, "rewards/chosen": -0.4798990786075592, "rewards/margins": 3.2230770587921143, "rewards/rejected": -3.7029762268066406, "step": 483 }, { "epoch": 0.1, "learning_rate": 1.8050420168067228e-05, "logits/chosen": -1.9328267574310303, "logits/rejected": -1.971990942955017, "logps/chosen": -213.33895874023438, "logps/rejected": -288.5398864746094, "loss": 0.4663, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7918972373008728, "rewards/margins": 2.957124710083008, "rewards/rejected": -3.749021530151367, "step": 484 }, { "epoch": 0.1, "learning_rate": 1.804621848739496e-05, "logits/chosen": -2.017963171005249, "logits/rejected": -1.8460841178894043, "logps/chosen": -289.8305969238281, "logps/rejected": -289.7749938964844, "loss": 1.0281, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8532503843307495, "rewards/margins": 1.8140392303466797, "rewards/rejected": -3.6672897338867188, "step": 485 }, { "epoch": 0.1, "learning_rate": 1.804201680672269e-05, "logits/chosen": -2.2805466651916504, "logits/rejected": -1.931815266609192, "logps/chosen": -319.70196533203125, "logps/rejected": -238.758056640625, "loss": 0.3343, "rewards/accuracies": 0.8125, "rewards/chosen": -0.938738226890564, "rewards/margins": 3.0914947986602783, "rewards/rejected": -4.030232906341553, "step": 486 }, { "epoch": 0.1, "learning_rate": 1.803781512605042e-05, "logits/chosen": -2.1851680278778076, "logits/rejected": -2.0835630893707275, "logps/chosen": -274.6766052246094, "logps/rejected": -432.77618408203125, "loss": 0.2779, "rewards/accuracies": 0.875, "rewards/chosen": -0.8493441939353943, "rewards/margins": 3.7885684967041016, "rewards/rejected": -4.637912750244141, "step": 487 }, { "epoch": 0.1, "learning_rate": 1.8033613445378152e-05, "logits/chosen": -1.6215686798095703, "logits/rejected": -1.8605685234069824, "logps/chosen": -204.21066284179688, "logps/rejected": -253.59251403808594, "loss": 0.1575, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0725224018096924, "rewards/margins": 3.6719846725463867, "rewards/rejected": -4.744507312774658, "step": 488 }, { "epoch": 0.1, "learning_rate": 1.8029411764705886e-05, "logits/chosen": -1.9747123718261719, "logits/rejected": -1.6317942142486572, "logps/chosen": -457.21624755859375, "logps/rejected": -340.9676208496094, "loss": 0.3963, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4580353796482086, "rewards/margins": 2.5620951652526855, "rewards/rejected": -3.0201303958892822, "step": 489 }, { "epoch": 0.1, "learning_rate": 1.8025210084033616e-05, "logits/chosen": -2.1669583320617676, "logits/rejected": -1.611142635345459, "logps/chosen": -335.9529113769531, "logps/rejected": -296.7880554199219, "loss": 0.2719, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3707687556743622, "rewards/margins": 2.8880085945129395, "rewards/rejected": -3.258777141571045, "step": 490 }, { "epoch": 0.1, "learning_rate": 1.8021008403361346e-05, "logits/chosen": -2.119159460067749, "logits/rejected": -1.8584749698638916, "logps/chosen": -445.0024108886719, "logps/rejected": -498.0438232421875, "loss": 0.2557, "rewards/accuracies": 0.875, "rewards/chosen": 0.10292220115661621, "rewards/margins": 3.0323965549468994, "rewards/rejected": -2.929474353790283, "step": 491 }, { "epoch": 0.1, "learning_rate": 1.8016806722689076e-05, "logits/chosen": -2.186915874481201, "logits/rejected": -2.149617910385132, "logps/chosen": -254.51690673828125, "logps/rejected": -338.875244140625, "loss": 0.3422, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8599196672439575, "rewards/margins": 3.1991493701934814, "rewards/rejected": -4.0590691566467285, "step": 492 }, { "epoch": 0.1, "learning_rate": 1.801260504201681e-05, "logits/chosen": -2.3135881423950195, "logits/rejected": -2.0308024883270264, "logps/chosen": -365.0280456542969, "logps/rejected": -309.6629638671875, "loss": 0.3966, "rewards/accuracies": 0.875, "rewards/chosen": -0.1584479808807373, "rewards/margins": 3.358734607696533, "rewards/rejected": -3.5171828269958496, "step": 493 }, { "epoch": 0.1, "learning_rate": 1.800840336134454e-05, "logits/chosen": -1.8752378225326538, "logits/rejected": -1.6269867420196533, "logps/chosen": -290.3400573730469, "logps/rejected": -318.9837951660156, "loss": 0.5858, "rewards/accuracies": 0.625, "rewards/chosen": -1.4499101638793945, "rewards/margins": 1.7428889274597168, "rewards/rejected": -3.1927988529205322, "step": 494 }, { "epoch": 0.1, "learning_rate": 1.800420168067227e-05, "logits/chosen": -2.2372565269470215, "logits/rejected": -2.0063397884368896, "logps/chosen": -354.43402099609375, "logps/rejected": -352.0789489746094, "loss": 0.6456, "rewards/accuracies": 0.5625, "rewards/chosen": -0.7475398778915405, "rewards/margins": 1.893190622329712, "rewards/rejected": -2.640730381011963, "step": 495 }, { "epoch": 0.1, "learning_rate": 1.8e-05, "logits/chosen": -1.9513461589813232, "logits/rejected": -1.806603193283081, "logps/chosen": -261.93560791015625, "logps/rejected": -308.74566650390625, "loss": 0.6531, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3341524600982666, "rewards/margins": 1.4642958641052246, "rewards/rejected": -2.798448085784912, "step": 496 }, { "epoch": 0.1, "learning_rate": 1.7995798319327734e-05, "logits/chosen": -1.8942919969558716, "logits/rejected": -1.8682172298431396, "logps/chosen": -290.3580017089844, "logps/rejected": -304.41546630859375, "loss": 0.1651, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6255410313606262, "rewards/margins": 3.0988292694091797, "rewards/rejected": -3.7243704795837402, "step": 497 }, { "epoch": 0.1, "learning_rate": 1.7991596638655464e-05, "logits/chosen": -2.0446813106536865, "logits/rejected": -1.956098198890686, "logps/chosen": -370.5096740722656, "logps/rejected": -436.359130859375, "loss": 0.3016, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5994164943695068, "rewards/margins": 2.8163557052612305, "rewards/rejected": -3.4157724380493164, "step": 498 }, { "epoch": 0.1, "learning_rate": 1.7987394957983195e-05, "logits/chosen": -2.373622417449951, "logits/rejected": -2.1277596950531006, "logps/chosen": -410.5650634765625, "logps/rejected": -336.9373474121094, "loss": 0.1102, "rewards/accuracies": 0.9375, "rewards/chosen": 0.22633305191993713, "rewards/margins": 3.778165340423584, "rewards/rejected": -3.5518321990966797, "step": 499 }, { "epoch": 0.1, "learning_rate": 1.7983193277310925e-05, "logits/chosen": -2.2056350708007812, "logits/rejected": -1.6106029748916626, "logps/chosen": -333.8369140625, "logps/rejected": -255.19503784179688, "loss": 0.1935, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2770302891731262, "rewards/margins": 3.305591106414795, "rewards/rejected": -3.5826215744018555, "step": 500 }, { "epoch": 0.1, "learning_rate": 1.797899159663866e-05, "logits/chosen": -2.2231571674346924, "logits/rejected": -2.1447365283966064, "logps/chosen": -351.8971862792969, "logps/rejected": -331.91748046875, "loss": 0.6916, "rewards/accuracies": 0.75, "rewards/chosen": -0.7122281789779663, "rewards/margins": 2.016400098800659, "rewards/rejected": -2.728628396987915, "step": 501 }, { "epoch": 0.11, "learning_rate": 1.797478991596639e-05, "logits/chosen": -2.379485607147217, "logits/rejected": -1.968024492263794, "logps/chosen": -301.2986145019531, "logps/rejected": -281.73876953125, "loss": 0.3369, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6425855159759521, "rewards/margins": 3.2059216499328613, "rewards/rejected": -3.8485074043273926, "step": 502 }, { "epoch": 0.11, "learning_rate": 1.797058823529412e-05, "logits/chosen": -2.228400468826294, "logits/rejected": -1.5500074625015259, "logps/chosen": -419.002685546875, "logps/rejected": -314.27691650390625, "loss": 0.3732, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3209814429283142, "rewards/margins": 2.987415075302124, "rewards/rejected": -3.308396339416504, "step": 503 }, { "epoch": 0.11, "learning_rate": 1.7966386554621852e-05, "logits/chosen": -1.9893072843551636, "logits/rejected": -2.028350353240967, "logps/chosen": -378.90264892578125, "logps/rejected": -348.1063232421875, "loss": 0.3905, "rewards/accuracies": 0.875, "rewards/chosen": 0.0967445820569992, "rewards/margins": 2.305095911026001, "rewards/rejected": -2.2083513736724854, "step": 504 }, { "epoch": 0.11, "learning_rate": 1.7962184873949583e-05, "logits/chosen": -2.348510265350342, "logits/rejected": -2.255265712738037, "logps/chosen": -321.7799377441406, "logps/rejected": -277.0984191894531, "loss": 0.2913, "rewards/accuracies": 0.875, "rewards/chosen": -0.3429746627807617, "rewards/margins": 3.408268928527832, "rewards/rejected": -3.7512435913085938, "step": 505 }, { "epoch": 0.11, "learning_rate": 1.7957983193277313e-05, "logits/chosen": -2.207750082015991, "logits/rejected": -2.218050003051758, "logps/chosen": -267.4234313964844, "logps/rejected": -365.1224365234375, "loss": 0.4753, "rewards/accuracies": 0.6875, "rewards/chosen": -0.36581629514694214, "rewards/margins": 1.6360876560211182, "rewards/rejected": -2.001904010772705, "step": 506 }, { "epoch": 0.11, "learning_rate": 1.7953781512605043e-05, "logits/chosen": -2.389249086380005, "logits/rejected": -2.1676321029663086, "logps/chosen": -297.28271484375, "logps/rejected": -332.85797119140625, "loss": 0.1328, "rewards/accuracies": 1.0, "rewards/chosen": -0.2257842868566513, "rewards/margins": 3.5806705951690674, "rewards/rejected": -3.806454658508301, "step": 507 }, { "epoch": 0.11, "learning_rate": 1.7949579831932777e-05, "logits/chosen": -2.205676317214966, "logits/rejected": -2.053122043609619, "logps/chosen": -449.7177734375, "logps/rejected": -374.27874755859375, "loss": 0.5052, "rewards/accuracies": 0.8125, "rewards/chosen": -0.2652510106563568, "rewards/margins": 2.2145097255706787, "rewards/rejected": -2.4797608852386475, "step": 508 }, { "epoch": 0.11, "learning_rate": 1.7945378151260507e-05, "logits/chosen": -2.2396016120910645, "logits/rejected": -1.7451249361038208, "logps/chosen": -240.06060791015625, "logps/rejected": -221.070068359375, "loss": 0.1928, "rewards/accuracies": 0.875, "rewards/chosen": -0.20131206512451172, "rewards/margins": 3.187617778778076, "rewards/rejected": -3.388929843902588, "step": 509 }, { "epoch": 0.11, "learning_rate": 1.7941176470588237e-05, "logits/chosen": -2.279101848602295, "logits/rejected": -1.6466021537780762, "logps/chosen": -363.7454528808594, "logps/rejected": -322.2547607421875, "loss": 0.1723, "rewards/accuracies": 0.9375, "rewards/chosen": -0.05343270301818848, "rewards/margins": 4.262198448181152, "rewards/rejected": -4.315630912780762, "step": 510 }, { "epoch": 0.11, "learning_rate": 1.7936974789915967e-05, "logits/chosen": -2.101118803024292, "logits/rejected": -2.1309876441955566, "logps/chosen": -376.3255920410156, "logps/rejected": -325.61444091796875, "loss": 0.7477, "rewards/accuracies": 0.75, "rewards/chosen": -0.6300605535507202, "rewards/margins": 1.8778836727142334, "rewards/rejected": -2.507944107055664, "step": 511 }, { "epoch": 0.11, "learning_rate": 1.79327731092437e-05, "logits/chosen": -1.7612611055374146, "logits/rejected": -1.7461483478546143, "logps/chosen": -290.1326904296875, "logps/rejected": -362.45892333984375, "loss": 0.2199, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9684104323387146, "rewards/margins": 3.9241747856140137, "rewards/rejected": -4.892585277557373, "step": 512 }, { "epoch": 0.11, "learning_rate": 1.792857142857143e-05, "logits/chosen": -2.28304123878479, "logits/rejected": -2.103008270263672, "logps/chosen": -384.8870849609375, "logps/rejected": -371.3544616699219, "loss": 0.562, "rewards/accuracies": 0.75, "rewards/chosen": -0.7352423667907715, "rewards/margins": 2.027202606201172, "rewards/rejected": -2.7624449729919434, "step": 513 }, { "epoch": 0.11, "learning_rate": 1.792436974789916e-05, "logits/chosen": -2.233563184738159, "logits/rejected": -2.147578001022339, "logps/chosen": -337.4376525878906, "logps/rejected": -295.6685485839844, "loss": 0.2794, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6074300408363342, "rewards/margins": 3.869246006011963, "rewards/rejected": -4.476675987243652, "step": 514 }, { "epoch": 0.11, "learning_rate": 1.792016806722689e-05, "logits/chosen": -1.6828869581222534, "logits/rejected": -1.786900281906128, "logps/chosen": -221.24606323242188, "logps/rejected": -391.76629638671875, "loss": 0.4184, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0562387704849243, "rewards/margins": 3.1828885078430176, "rewards/rejected": -4.239127159118652, "step": 515 }, { "epoch": 0.11, "learning_rate": 1.7915966386554625e-05, "logits/chosen": -2.1881747245788574, "logits/rejected": -2.139512777328491, "logps/chosen": -307.0408020019531, "logps/rejected": -322.06536865234375, "loss": 0.2386, "rewards/accuracies": 0.9375, "rewards/chosen": -0.27792808413505554, "rewards/margins": 2.9610986709594727, "rewards/rejected": -3.2390267848968506, "step": 516 }, { "epoch": 0.11, "learning_rate": 1.7911764705882355e-05, "logits/chosen": -2.1356711387634277, "logits/rejected": -2.504804849624634, "logps/chosen": -209.44581604003906, "logps/rejected": -273.0061950683594, "loss": 0.3096, "rewards/accuracies": 0.75, "rewards/chosen": -0.7350555062294006, "rewards/margins": 2.5185976028442383, "rewards/rejected": -3.253653049468994, "step": 517 }, { "epoch": 0.11, "learning_rate": 1.7907563025210086e-05, "logits/chosen": -2.1660633087158203, "logits/rejected": -2.1959242820739746, "logps/chosen": -333.2518615722656, "logps/rejected": -337.03033447265625, "loss": 0.4121, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2149726152420044, "rewards/margins": 1.8880847692489624, "rewards/rejected": -3.103057384490967, "step": 518 }, { "epoch": 0.11, "learning_rate": 1.7903361344537816e-05, "logits/chosen": -2.237673044204712, "logits/rejected": -2.227508544921875, "logps/chosen": -384.73095703125, "logps/rejected": -402.16607666015625, "loss": 0.7928, "rewards/accuracies": 0.6875, "rewards/chosen": -0.4252049922943115, "rewards/margins": 2.3365488052368164, "rewards/rejected": -2.761753559112549, "step": 519 }, { "epoch": 0.11, "learning_rate": 1.789915966386555e-05, "logits/chosen": -2.355433464050293, "logits/rejected": -2.2755563259124756, "logps/chosen": -283.7584533691406, "logps/rejected": -316.8488464355469, "loss": 0.2614, "rewards/accuracies": 0.9375, "rewards/chosen": -0.29245471954345703, "rewards/margins": 2.292234420776367, "rewards/rejected": -2.584689140319824, "step": 520 }, { "epoch": 0.11, "learning_rate": 1.789495798319328e-05, "logits/chosen": -2.184380292892456, "logits/rejected": -1.882248878479004, "logps/chosen": -272.8014221191406, "logps/rejected": -222.315673828125, "loss": 0.2975, "rewards/accuracies": 0.75, "rewards/chosen": -0.46726298332214355, "rewards/margins": 2.8321585655212402, "rewards/rejected": -3.299421548843384, "step": 521 }, { "epoch": 0.11, "learning_rate": 1.789075630252101e-05, "logits/chosen": -2.010697841644287, "logits/rejected": -1.7495272159576416, "logps/chosen": -349.9699401855469, "logps/rejected": -330.77490234375, "loss": 0.7439, "rewards/accuracies": 0.625, "rewards/chosen": -0.896056056022644, "rewards/margins": 1.1062815189361572, "rewards/rejected": -2.0023374557495117, "step": 522 }, { "epoch": 0.11, "learning_rate": 1.788655462184874e-05, "logits/chosen": -2.327357530593872, "logits/rejected": -1.9403951168060303, "logps/chosen": -405.6573486328125, "logps/rejected": -302.9195861816406, "loss": 0.5474, "rewards/accuracies": 0.875, "rewards/chosen": -0.7912900447845459, "rewards/margins": 3.085538625717163, "rewards/rejected": -3.876828670501709, "step": 523 }, { "epoch": 0.11, "learning_rate": 1.7882352941176474e-05, "logits/chosen": -1.9623956680297852, "logits/rejected": -1.873540997505188, "logps/chosen": -307.3180847167969, "logps/rejected": -324.9881286621094, "loss": 0.5614, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1170825958251953, "rewards/margins": 2.3464694023132324, "rewards/rejected": -3.463552236557007, "step": 524 }, { "epoch": 0.11, "learning_rate": 1.7878151260504204e-05, "logits/chosen": -2.4614877700805664, "logits/rejected": -2.22268009185791, "logps/chosen": -344.09326171875, "logps/rejected": -350.6828918457031, "loss": 0.2951, "rewards/accuracies": 0.875, "rewards/chosen": -0.10511809587478638, "rewards/margins": 2.6819238662719727, "rewards/rejected": -2.787041664123535, "step": 525 }, { "epoch": 0.11, "learning_rate": 1.7873949579831934e-05, "logits/chosen": -1.8780057430267334, "logits/rejected": -1.9672216176986694, "logps/chosen": -231.87127685546875, "logps/rejected": -419.1820373535156, "loss": 0.4756, "rewards/accuracies": 0.875, "rewards/chosen": -0.7887938022613525, "rewards/margins": 1.867379069328308, "rewards/rejected": -2.65617299079895, "step": 526 }, { "epoch": 0.11, "learning_rate": 1.7869747899159668e-05, "logits/chosen": -2.363224744796753, "logits/rejected": -1.1982814073562622, "logps/chosen": -480.41229248046875, "logps/rejected": -321.1180419921875, "loss": 0.6134, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0522228479385376, "rewards/margins": 2.965284824371338, "rewards/rejected": -4.017507553100586, "step": 527 }, { "epoch": 0.11, "learning_rate": 1.7865546218487398e-05, "logits/chosen": -1.6889622211456299, "logits/rejected": -1.8687525987625122, "logps/chosen": -249.06980895996094, "logps/rejected": -330.18609619140625, "loss": 0.1438, "rewards/accuracies": 1.0, "rewards/chosen": 0.12231326103210449, "rewards/margins": 4.1892313957214355, "rewards/rejected": -4.066917896270752, "step": 528 }, { "epoch": 0.11, "learning_rate": 1.7861344537815128e-05, "logits/chosen": -2.195996046066284, "logits/rejected": -2.0173168182373047, "logps/chosen": -237.76255798339844, "logps/rejected": -277.70904541015625, "loss": 0.1484, "rewards/accuracies": 0.9375, "rewards/chosen": -0.40628090500831604, "rewards/margins": 4.171627998352051, "rewards/rejected": -4.577908515930176, "step": 529 }, { "epoch": 0.11, "learning_rate": 1.785714285714286e-05, "logits/chosen": -2.086538553237915, "logits/rejected": -2.025927782058716, "logps/chosen": -330.7870788574219, "logps/rejected": -331.86041259765625, "loss": 0.202, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0680372714996338, "rewards/margins": 2.367128372192383, "rewards/rejected": -3.4351658821105957, "step": 530 }, { "epoch": 0.11, "learning_rate": 1.7852941176470592e-05, "logits/chosen": -2.1370768547058105, "logits/rejected": -1.6344835758209229, "logps/chosen": -359.1519775390625, "logps/rejected": -354.7419738769531, "loss": 0.4844, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3396291434764862, "rewards/margins": 2.7243471145629883, "rewards/rejected": -3.063976287841797, "step": 531 }, { "epoch": 0.11, "learning_rate": 1.7848739495798322e-05, "logits/chosen": -2.042631149291992, "logits/rejected": -1.798729658126831, "logps/chosen": -284.9844665527344, "logps/rejected": -292.60137939453125, "loss": 0.4142, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0338668823242188, "rewards/margins": 2.046664237976074, "rewards/rejected": -3.080531120300293, "step": 532 }, { "epoch": 0.11, "learning_rate": 1.7844537815126053e-05, "logits/chosen": -2.2620251178741455, "logits/rejected": -2.0029969215393066, "logps/chosen": -419.5677490234375, "logps/rejected": -338.8554992675781, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/chosen": -0.45283305644989014, "rewards/margins": 4.5514655113220215, "rewards/rejected": -5.004298686981201, "step": 533 }, { "epoch": 0.11, "learning_rate": 1.7840336134453783e-05, "logits/chosen": -2.1992368698120117, "logits/rejected": -1.9105582237243652, "logps/chosen": -324.9189453125, "logps/rejected": -299.4775085449219, "loss": 0.1979, "rewards/accuracies": 0.875, "rewards/chosen": -1.2988405227661133, "rewards/margins": 4.087227821350098, "rewards/rejected": -5.386068344116211, "step": 534 }, { "epoch": 0.11, "learning_rate": 1.7836134453781516e-05, "logits/chosen": -2.2962145805358887, "logits/rejected": -2.087346076965332, "logps/chosen": -348.236572265625, "logps/rejected": -322.84429931640625, "loss": 0.5034, "rewards/accuracies": 0.8125, "rewards/chosen": -1.693610668182373, "rewards/margins": 2.676043748855591, "rewards/rejected": -4.369654655456543, "step": 535 }, { "epoch": 0.11, "learning_rate": 1.7831932773109247e-05, "logits/chosen": -1.9971843957901, "logits/rejected": -1.7039964199066162, "logps/chosen": -224.19512939453125, "logps/rejected": -237.74679565429688, "loss": 0.3386, "rewards/accuracies": 0.875, "rewards/chosen": -0.9335631132125854, "rewards/margins": 3.0680341720581055, "rewards/rejected": -4.0015974044799805, "step": 536 }, { "epoch": 0.11, "learning_rate": 1.7827731092436977e-05, "logits/chosen": -2.22658109664917, "logits/rejected": -1.9773976802825928, "logps/chosen": -395.08038330078125, "logps/rejected": -408.44561767578125, "loss": 0.2763, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8340373039245605, "rewards/margins": 4.448478698730469, "rewards/rejected": -5.2825164794921875, "step": 537 }, { "epoch": 0.11, "learning_rate": 1.7823529411764707e-05, "logits/chosen": -2.2323849201202393, "logits/rejected": -1.7051142454147339, "logps/chosen": -393.07769775390625, "logps/rejected": -348.2321472167969, "loss": 0.4894, "rewards/accuracies": 0.75, "rewards/chosen": -1.5521345138549805, "rewards/margins": 3.7498769760131836, "rewards/rejected": -5.302011489868164, "step": 538 }, { "epoch": 0.11, "learning_rate": 1.781932773109244e-05, "logits/chosen": -2.0877904891967773, "logits/rejected": -2.138853073120117, "logps/chosen": -446.02630615234375, "logps/rejected": -451.53948974609375, "loss": 0.6178, "rewards/accuracies": 0.75, "rewards/chosen": -2.223884344100952, "rewards/margins": 2.2587437629699707, "rewards/rejected": -4.482627868652344, "step": 539 }, { "epoch": 0.11, "learning_rate": 1.781512605042017e-05, "logits/chosen": -2.0213232040405273, "logits/rejected": -1.9649556875228882, "logps/chosen": -306.36065673828125, "logps/rejected": -388.1372375488281, "loss": 0.1717, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5134642124176025, "rewards/margins": 3.3441970348358154, "rewards/rejected": -4.857661247253418, "step": 540 }, { "epoch": 0.11, "learning_rate": 1.78109243697479e-05, "logits/chosen": -2.0425426959991455, "logits/rejected": -2.32568621635437, "logps/chosen": -323.7206726074219, "logps/rejected": -368.44854736328125, "loss": 0.3465, "rewards/accuracies": 0.875, "rewards/chosen": -2.2193355560302734, "rewards/margins": 3.305173397064209, "rewards/rejected": -5.524508953094482, "step": 541 }, { "epoch": 0.11, "learning_rate": 1.780672268907563e-05, "logits/chosen": -1.8869844675064087, "logits/rejected": -1.5312440395355225, "logps/chosen": -273.87091064453125, "logps/rejected": -291.2333984375, "loss": 0.4641, "rewards/accuracies": 0.875, "rewards/chosen": -1.3759486675262451, "rewards/margins": 2.814509630203247, "rewards/rejected": -4.190458297729492, "step": 542 }, { "epoch": 0.11, "learning_rate": 1.780252100840336e-05, "logits/chosen": -2.1331233978271484, "logits/rejected": -1.5030254125595093, "logps/chosen": -392.5062561035156, "logps/rejected": -298.1678161621094, "loss": 0.5079, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5962162017822266, "rewards/margins": 2.7842936515808105, "rewards/rejected": -4.380509853363037, "step": 543 }, { "epoch": 0.11, "learning_rate": 1.7798319327731092e-05, "logits/chosen": -2.0721113681793213, "logits/rejected": -2.161677598953247, "logps/chosen": -285.09503173828125, "logps/rejected": -439.75732421875, "loss": 0.1517, "rewards/accuracies": 0.875, "rewards/chosen": -1.3982731103897095, "rewards/margins": 4.4617719650268555, "rewards/rejected": -5.860044956207275, "step": 544 }, { "epoch": 0.11, "learning_rate": 1.7794117647058825e-05, "logits/chosen": -2.1232032775878906, "logits/rejected": -2.0298287868499756, "logps/chosen": -457.37432861328125, "logps/rejected": -397.9346923828125, "loss": 0.5972, "rewards/accuracies": 0.8125, "rewards/chosen": -1.329658031463623, "rewards/margins": 2.617422103881836, "rewards/rejected": -3.947080135345459, "step": 545 }, { "epoch": 0.11, "learning_rate": 1.7789915966386556e-05, "logits/chosen": -1.8775863647460938, "logits/rejected": -1.6165963411331177, "logps/chosen": -399.70208740234375, "logps/rejected": -358.88043212890625, "loss": 0.7381, "rewards/accuracies": 0.6875, "rewards/chosen": -2.5014736652374268, "rewards/margins": 2.4977059364318848, "rewards/rejected": -4.999179840087891, "step": 546 }, { "epoch": 0.11, "learning_rate": 1.7785714285714286e-05, "logits/chosen": -1.9926302433013916, "logits/rejected": -1.9439363479614258, "logps/chosen": -380.2137756347656, "logps/rejected": -404.1746826171875, "loss": 0.309, "rewards/accuracies": 0.875, "rewards/chosen": -2.624013900756836, "rewards/margins": 3.4728505611419678, "rewards/rejected": -6.096864700317383, "step": 547 }, { "epoch": 0.11, "learning_rate": 1.7781512605042016e-05, "logits/chosen": -2.035512685775757, "logits/rejected": -1.6787346601486206, "logps/chosen": -229.11582946777344, "logps/rejected": -218.19442749023438, "loss": 0.5592, "rewards/accuracies": 0.75, "rewards/chosen": -2.526768207550049, "rewards/margins": 2.394801616668701, "rewards/rejected": -4.921569347381592, "step": 548 }, { "epoch": 0.11, "learning_rate": 1.777731092436975e-05, "logits/chosen": -1.9025330543518066, "logits/rejected": -2.224428176879883, "logps/chosen": -271.4559631347656, "logps/rejected": -312.6767578125, "loss": 0.1611, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7880305051803589, "rewards/margins": 4.676723003387451, "rewards/rejected": -6.4647536277771, "step": 549 }, { "epoch": 0.12, "learning_rate": 1.777310924369748e-05, "logits/chosen": -1.925479531288147, "logits/rejected": -1.9021575450897217, "logps/chosen": -276.25048828125, "logps/rejected": -224.90701293945312, "loss": 0.3973, "rewards/accuracies": 0.8125, "rewards/chosen": -3.0463192462921143, "rewards/margins": 2.7438788414001465, "rewards/rejected": -5.790197372436523, "step": 550 }, { "epoch": 0.12, "learning_rate": 1.776890756302521e-05, "logits/chosen": -2.0473976135253906, "logits/rejected": -1.4948766231536865, "logps/chosen": -350.2103271484375, "logps/rejected": -359.45599365234375, "loss": 0.2173, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7156624794006348, "rewards/margins": 3.4159865379333496, "rewards/rejected": -5.131649017333984, "step": 551 }, { "epoch": 0.12, "learning_rate": 1.776470588235294e-05, "logits/chosen": -1.8817487955093384, "logits/rejected": -1.5503897666931152, "logps/chosen": -342.044189453125, "logps/rejected": -267.8489990234375, "loss": 0.297, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5470385551452637, "rewards/margins": 3.4172656536102295, "rewards/rejected": -4.964304447174072, "step": 552 }, { "epoch": 0.12, "learning_rate": 1.7760504201680674e-05, "logits/chosen": -2.0708799362182617, "logits/rejected": -1.9367434978485107, "logps/chosen": -333.230712890625, "logps/rejected": -265.77862548828125, "loss": 0.2934, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9380170106887817, "rewards/margins": 2.589078187942505, "rewards/rejected": -4.527094841003418, "step": 553 }, { "epoch": 0.12, "learning_rate": 1.7756302521008404e-05, "logits/chosen": -1.7775119543075562, "logits/rejected": -1.761277675628662, "logps/chosen": -295.5238037109375, "logps/rejected": -280.0086364746094, "loss": 0.4768, "rewards/accuracies": 0.75, "rewards/chosen": -1.8351261615753174, "rewards/margins": 3.1918423175811768, "rewards/rejected": -5.026968002319336, "step": 554 }, { "epoch": 0.12, "learning_rate": 1.7752100840336134e-05, "logits/chosen": -1.835569143295288, "logits/rejected": -1.7730984687805176, "logps/chosen": -369.357666015625, "logps/rejected": -363.00396728515625, "loss": 0.3492, "rewards/accuracies": 0.8125, "rewards/chosen": -2.367192029953003, "rewards/margins": 2.3578941822052, "rewards/rejected": -4.725086212158203, "step": 555 }, { "epoch": 0.12, "learning_rate": 1.7747899159663865e-05, "logits/chosen": -1.9479680061340332, "logits/rejected": -1.6829419136047363, "logps/chosen": -450.7618713378906, "logps/rejected": -423.020751953125, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": -1.9476662874221802, "rewards/margins": 3.371506690979004, "rewards/rejected": -5.319173336029053, "step": 556 }, { "epoch": 0.12, "learning_rate": 1.7743697478991598e-05, "logits/chosen": -2.0395281314849854, "logits/rejected": -1.7483015060424805, "logps/chosen": -455.34112548828125, "logps/rejected": -335.931640625, "loss": 0.7752, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4735007286071777, "rewards/margins": 1.4180452823638916, "rewards/rejected": -3.8915462493896484, "step": 557 }, { "epoch": 0.12, "learning_rate": 1.773949579831933e-05, "logits/chosen": -2.1122360229492188, "logits/rejected": -1.830791711807251, "logps/chosen": -376.23486328125, "logps/rejected": -285.22601318359375, "loss": 0.5763, "rewards/accuracies": 0.875, "rewards/chosen": -2.0258774757385254, "rewards/margins": 2.2812435626983643, "rewards/rejected": -4.3071208000183105, "step": 558 }, { "epoch": 0.12, "learning_rate": 1.773529411764706e-05, "logits/chosen": -2.440333366394043, "logits/rejected": -1.9048335552215576, "logps/chosen": -411.12677001953125, "logps/rejected": -333.35699462890625, "loss": 0.0741, "rewards/accuracies": 1.0, "rewards/chosen": -0.7845588326454163, "rewards/margins": 4.232915878295898, "rewards/rejected": -5.017475128173828, "step": 559 }, { "epoch": 0.12, "learning_rate": 1.7731092436974792e-05, "logits/chosen": -2.020054340362549, "logits/rejected": -2.4066474437713623, "logps/chosen": -221.4602813720703, "logps/rejected": -356.1713562011719, "loss": 0.1724, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1847712993621826, "rewards/margins": 4.0312042236328125, "rewards/rejected": -6.215975761413574, "step": 560 }, { "epoch": 0.12, "learning_rate": 1.7726890756302522e-05, "logits/chosen": -2.2433838844299316, "logits/rejected": -1.7616820335388184, "logps/chosen": -411.79901123046875, "logps/rejected": -398.7806701660156, "loss": 0.7543, "rewards/accuracies": 0.625, "rewards/chosen": -2.376258611679077, "rewards/margins": 2.3949642181396484, "rewards/rejected": -4.7712225914001465, "step": 561 }, { "epoch": 0.12, "learning_rate": 1.7722689075630253e-05, "logits/chosen": -2.210214853286743, "logits/rejected": -1.9913015365600586, "logps/chosen": -373.44744873046875, "logps/rejected": -340.349609375, "loss": 0.5858, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6961324214935303, "rewards/margins": 1.6905564069747925, "rewards/rejected": -3.3866889476776123, "step": 562 }, { "epoch": 0.12, "learning_rate": 1.7718487394957983e-05, "logits/chosen": -2.246351480484009, "logits/rejected": -1.8784517049789429, "logps/chosen": -427.2381286621094, "logps/rejected": -321.5286865234375, "loss": 0.2151, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4049296975135803, "rewards/margins": 3.590799331665039, "rewards/rejected": -3.9957292079925537, "step": 563 }, { "epoch": 0.12, "learning_rate": 1.7714285714285717e-05, "logits/chosen": -1.972480297088623, "logits/rejected": -1.6647220849990845, "logps/chosen": -322.11083984375, "logps/rejected": -264.3584289550781, "loss": 0.0943, "rewards/accuracies": 0.9375, "rewards/chosen": -1.063344120979309, "rewards/margins": 4.47200870513916, "rewards/rejected": -5.535353183746338, "step": 564 }, { "epoch": 0.12, "learning_rate": 1.7710084033613447e-05, "logits/chosen": -2.041421413421631, "logits/rejected": -1.713474988937378, "logps/chosen": -275.64727783203125, "logps/rejected": -248.07948303222656, "loss": 0.4807, "rewards/accuracies": 0.75, "rewards/chosen": -1.7853559255599976, "rewards/margins": 2.016425132751465, "rewards/rejected": -3.801781177520752, "step": 565 }, { "epoch": 0.12, "learning_rate": 1.7705882352941177e-05, "logits/chosen": -2.329613447189331, "logits/rejected": -2.1407434940338135, "logps/chosen": -287.83526611328125, "logps/rejected": -281.6817626953125, "loss": 0.7844, "rewards/accuracies": 0.75, "rewards/chosen": -1.984637975692749, "rewards/margins": 1.642470359802246, "rewards/rejected": -3.627108335494995, "step": 566 }, { "epoch": 0.12, "learning_rate": 1.7701680672268907e-05, "logits/chosen": -1.9577643871307373, "logits/rejected": -2.136990547180176, "logps/chosen": -217.54519653320312, "logps/rejected": -321.43487548828125, "loss": 0.2944, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3599539995193481, "rewards/margins": 2.861196994781494, "rewards/rejected": -4.221151351928711, "step": 567 }, { "epoch": 0.12, "learning_rate": 1.769747899159664e-05, "logits/chosen": -2.3956363201141357, "logits/rejected": -2.0991134643554688, "logps/chosen": -331.6529541015625, "logps/rejected": -287.12664794921875, "loss": 0.2447, "rewards/accuracies": 0.875, "rewards/chosen": -1.0588245391845703, "rewards/margins": 3.1444296836853027, "rewards/rejected": -4.203254222869873, "step": 568 }, { "epoch": 0.12, "learning_rate": 1.769327731092437e-05, "logits/chosen": -1.9715251922607422, "logits/rejected": -2.0345516204833984, "logps/chosen": -362.01776123046875, "logps/rejected": -347.6793518066406, "loss": 0.2147, "rewards/accuracies": 0.875, "rewards/chosen": -0.9462583661079407, "rewards/margins": 3.158165693283081, "rewards/rejected": -4.104423522949219, "step": 569 }, { "epoch": 0.12, "learning_rate": 1.76890756302521e-05, "logits/chosen": -2.0222535133361816, "logits/rejected": -1.746554970741272, "logps/chosen": -347.338134765625, "logps/rejected": -332.7222595214844, "loss": 0.6918, "rewards/accuracies": 0.75, "rewards/chosen": -2.6448588371276855, "rewards/margins": 2.356677532196045, "rewards/rejected": -5.0015363693237305, "step": 570 }, { "epoch": 0.12, "learning_rate": 1.768487394957983e-05, "logits/chosen": -2.1338720321655273, "logits/rejected": -2.0698888301849365, "logps/chosen": -310.0467224121094, "logps/rejected": -341.3106689453125, "loss": 0.3514, "rewards/accuracies": 0.875, "rewards/chosen": -1.5019992589950562, "rewards/margins": 3.719092607498169, "rewards/rejected": -5.221092224121094, "step": 571 }, { "epoch": 0.12, "learning_rate": 1.7680672268907565e-05, "logits/chosen": -2.1733458042144775, "logits/rejected": -2.0725340843200684, "logps/chosen": -337.7464599609375, "logps/rejected": -284.135986328125, "loss": 0.3166, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2706047296524048, "rewards/margins": 3.42317795753479, "rewards/rejected": -4.693782806396484, "step": 572 }, { "epoch": 0.12, "learning_rate": 1.7676470588235295e-05, "logits/chosen": -1.994964361190796, "logits/rejected": -2.023188352584839, "logps/chosen": -328.5353698730469, "logps/rejected": -343.7881774902344, "loss": 0.4296, "rewards/accuracies": 0.75, "rewards/chosen": -0.9336496591567993, "rewards/margins": 2.9438352584838867, "rewards/rejected": -3.8774852752685547, "step": 573 }, { "epoch": 0.12, "learning_rate": 1.7672268907563026e-05, "logits/chosen": -1.9034863710403442, "logits/rejected": -1.650263786315918, "logps/chosen": -322.71533203125, "logps/rejected": -297.9567565917969, "loss": 0.1074, "rewards/accuracies": 1.0, "rewards/chosen": -1.0696885585784912, "rewards/margins": 4.231565952301025, "rewards/rejected": -5.301254749298096, "step": 574 }, { "epoch": 0.12, "learning_rate": 1.7668067226890756e-05, "logits/chosen": -2.0441155433654785, "logits/rejected": -1.703157663345337, "logps/chosen": -284.8507385253906, "logps/rejected": -342.7308044433594, "loss": 0.6736, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4934263229370117, "rewards/margins": 2.0427916049957275, "rewards/rejected": -3.53621768951416, "step": 575 }, { "epoch": 0.12, "learning_rate": 1.766386554621849e-05, "logits/chosen": -2.3282277584075928, "logits/rejected": -1.7080771923065186, "logps/chosen": -300.5726318359375, "logps/rejected": -210.52487182617188, "loss": 0.2109, "rewards/accuracies": 0.9375, "rewards/chosen": -1.126968502998352, "rewards/margins": 2.4636807441711426, "rewards/rejected": -3.590648889541626, "step": 576 }, { "epoch": 0.12, "learning_rate": 1.765966386554622e-05, "logits/chosen": -2.173309326171875, "logits/rejected": -1.8335094451904297, "logps/chosen": -375.16552734375, "logps/rejected": -304.8026428222656, "loss": 0.3875, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5488709211349487, "rewards/margins": 2.5619935989379883, "rewards/rejected": -4.110864162445068, "step": 577 }, { "epoch": 0.12, "learning_rate": 1.765546218487395e-05, "logits/chosen": -1.9434499740600586, "logits/rejected": -1.5711898803710938, "logps/chosen": -274.57318115234375, "logps/rejected": -380.7144775390625, "loss": 0.4132, "rewards/accuracies": 0.75, "rewards/chosen": -1.1733087301254272, "rewards/margins": 2.676362991333008, "rewards/rejected": -3.8496716022491455, "step": 578 }, { "epoch": 0.12, "learning_rate": 1.765126050420168e-05, "logits/chosen": -1.9481709003448486, "logits/rejected": -2.0177364349365234, "logps/chosen": -151.82408142089844, "logps/rejected": -198.28517150878906, "loss": 0.23, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8105243444442749, "rewards/margins": 2.2585389614105225, "rewards/rejected": -3.069063186645508, "step": 579 }, { "epoch": 0.12, "learning_rate": 1.7647058823529414e-05, "logits/chosen": -2.138062000274658, "logits/rejected": -1.9974300861358643, "logps/chosen": -421.0310363769531, "logps/rejected": -362.08807373046875, "loss": 0.4583, "rewards/accuracies": 0.75, "rewards/chosen": -0.41938063502311707, "rewards/margins": 2.721446990966797, "rewards/rejected": -3.1408276557922363, "step": 580 }, { "epoch": 0.12, "learning_rate": 1.7642857142857144e-05, "logits/chosen": -2.303051710128784, "logits/rejected": -2.015049457550049, "logps/chosen": -326.5808410644531, "logps/rejected": -334.4703063964844, "loss": 0.4056, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1618808507919312, "rewards/margins": 2.3653814792633057, "rewards/rejected": -3.5272624492645264, "step": 581 }, { "epoch": 0.12, "learning_rate": 1.7638655462184874e-05, "logits/chosen": -1.76221764087677, "logits/rejected": -1.7318577766418457, "logps/chosen": -363.66455078125, "logps/rejected": -439.78448486328125, "loss": 0.3356, "rewards/accuracies": 0.9375, "rewards/chosen": -0.3488526940345764, "rewards/margins": 2.6168365478515625, "rewards/rejected": -2.965689182281494, "step": 582 }, { "epoch": 0.12, "learning_rate": 1.7634453781512608e-05, "logits/chosen": -1.7114746570587158, "logits/rejected": -1.5570493936538696, "logps/chosen": -279.304443359375, "logps/rejected": -313.8551330566406, "loss": 0.7882, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0155909061431885, "rewards/margins": 1.4448413848876953, "rewards/rejected": -3.4604320526123047, "step": 583 }, { "epoch": 0.12, "learning_rate": 1.7630252100840338e-05, "logits/chosen": -2.102205276489258, "logits/rejected": -2.0762243270874023, "logps/chosen": -368.6748046875, "logps/rejected": -386.53253173828125, "loss": 0.331, "rewards/accuracies": 0.875, "rewards/chosen": -0.8447445631027222, "rewards/margins": 2.411888599395752, "rewards/rejected": -3.2566332817077637, "step": 584 }, { "epoch": 0.12, "learning_rate": 1.7626050420168068e-05, "logits/chosen": -2.3064589500427246, "logits/rejected": -2.073137044906616, "logps/chosen": -448.7082824707031, "logps/rejected": -302.3025207519531, "loss": 0.3575, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8835690021514893, "rewards/margins": 1.9845120906829834, "rewards/rejected": -2.8680808544158936, "step": 585 }, { "epoch": 0.12, "learning_rate": 1.76218487394958e-05, "logits/chosen": -2.0600881576538086, "logits/rejected": -1.8609788417816162, "logps/chosen": -229.59158325195312, "logps/rejected": -287.3170166015625, "loss": 0.2262, "rewards/accuracies": 0.9375, "rewards/chosen": -1.468012809753418, "rewards/margins": 3.3571579456329346, "rewards/rejected": -4.825170516967773, "step": 586 }, { "epoch": 0.12, "learning_rate": 1.7617647058823532e-05, "logits/chosen": -1.8887302875518799, "logits/rejected": -1.94637131690979, "logps/chosen": -347.83673095703125, "logps/rejected": -359.481201171875, "loss": 0.1922, "rewards/accuracies": 0.875, "rewards/chosen": -0.9356111288070679, "rewards/margins": 3.4601073265075684, "rewards/rejected": -4.395718574523926, "step": 587 }, { "epoch": 0.12, "learning_rate": 1.7613445378151262e-05, "logits/chosen": -2.2874603271484375, "logits/rejected": -2.176391124725342, "logps/chosen": -313.77178955078125, "logps/rejected": -371.0591735839844, "loss": 0.359, "rewards/accuracies": 0.75, "rewards/chosen": -0.5926164388656616, "rewards/margins": 3.3008227348327637, "rewards/rejected": -3.893439292907715, "step": 588 }, { "epoch": 0.12, "learning_rate": 1.7609243697478992e-05, "logits/chosen": -2.123997211456299, "logits/rejected": -1.7127245664596558, "logps/chosen": -366.6797790527344, "logps/rejected": -322.2105712890625, "loss": 0.3332, "rewards/accuracies": 0.875, "rewards/chosen": -0.5245232582092285, "rewards/margins": 3.5848498344421387, "rewards/rejected": -4.109373092651367, "step": 589 }, { "epoch": 0.12, "learning_rate": 1.7605042016806723e-05, "logits/chosen": -2.1327927112579346, "logits/rejected": -1.6013575792312622, "logps/chosen": -393.24249267578125, "logps/rejected": -372.1217041015625, "loss": 0.1196, "rewards/accuracies": 1.0, "rewards/chosen": 0.17057377099990845, "rewards/margins": 4.495490550994873, "rewards/rejected": -4.324916839599609, "step": 590 }, { "epoch": 0.12, "learning_rate": 1.7600840336134456e-05, "logits/chosen": -2.1168289184570312, "logits/rejected": -1.937970757484436, "logps/chosen": -349.0628662109375, "logps/rejected": -370.396484375, "loss": 0.592, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3728522062301636, "rewards/margins": 1.8734581470489502, "rewards/rejected": -3.246310234069824, "step": 591 }, { "epoch": 0.12, "learning_rate": 1.7596638655462186e-05, "logits/chosen": -2.1187102794647217, "logits/rejected": -2.194070339202881, "logps/chosen": -336.94140625, "logps/rejected": -362.3601989746094, "loss": 0.3624, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5056046843528748, "rewards/margins": 2.729396343231201, "rewards/rejected": -3.2350008487701416, "step": 592 }, { "epoch": 0.12, "learning_rate": 1.7592436974789917e-05, "logits/chosen": -1.8995732069015503, "logits/rejected": -1.947818636894226, "logps/chosen": -327.80487060546875, "logps/rejected": -356.39263916015625, "loss": 0.1776, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5217307806015015, "rewards/margins": 3.156291961669922, "rewards/rejected": -4.678022384643555, "step": 593 }, { "epoch": 0.12, "learning_rate": 1.7588235294117647e-05, "logits/chosen": -1.85849130153656, "logits/rejected": -1.6905248165130615, "logps/chosen": -251.05442810058594, "logps/rejected": -282.79168701171875, "loss": 0.2727, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7055599689483643, "rewards/margins": 2.28725004196167, "rewards/rejected": -2.992809772491455, "step": 594 }, { "epoch": 0.12, "learning_rate": 1.758403361344538e-05, "logits/chosen": -1.76235830783844, "logits/rejected": -1.3964152336120605, "logps/chosen": -374.6567077636719, "logps/rejected": -350.7769470214844, "loss": 0.1998, "rewards/accuracies": 0.875, "rewards/chosen": -1.1414450407028198, "rewards/margins": 4.14404296875, "rewards/rejected": -5.285488128662109, "step": 595 }, { "epoch": 0.12, "learning_rate": 1.757983193277311e-05, "logits/chosen": -2.1745529174804688, "logits/rejected": -2.1625161170959473, "logps/chosen": -351.01861572265625, "logps/rejected": -325.62030029296875, "loss": 0.5352, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2620129585266113, "rewards/margins": 1.9831123352050781, "rewards/rejected": -3.2451255321502686, "step": 596 }, { "epoch": 0.12, "learning_rate": 1.757563025210084e-05, "logits/chosen": -2.062751531600952, "logits/rejected": -1.5992534160614014, "logps/chosen": -334.5269470214844, "logps/rejected": -334.413330078125, "loss": 0.2543, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2660620212554932, "rewards/margins": 2.692103385925293, "rewards/rejected": -3.958165168762207, "step": 597 }, { "epoch": 0.13, "learning_rate": 1.757142857142857e-05, "logits/chosen": -2.329374313354492, "logits/rejected": -1.8794341087341309, "logps/chosen": -413.318603515625, "logps/rejected": -356.1742248535156, "loss": 0.3024, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2969900369644165, "rewards/margins": 3.5236058235168457, "rewards/rejected": -4.820595741271973, "step": 598 }, { "epoch": 0.13, "learning_rate": 1.7567226890756305e-05, "logits/chosen": -2.0708303451538086, "logits/rejected": -1.8224103450775146, "logps/chosen": -296.98626708984375, "logps/rejected": -264.0754699707031, "loss": 0.4375, "rewards/accuracies": 0.75, "rewards/chosen": -1.3862814903259277, "rewards/margins": 3.3693251609802246, "rewards/rejected": -4.755606651306152, "step": 599 }, { "epoch": 0.13, "learning_rate": 1.7563025210084035e-05, "logits/chosen": -1.8793928623199463, "logits/rejected": -1.6184089183807373, "logps/chosen": -330.78729248046875, "logps/rejected": -307.60003662109375, "loss": 0.3772, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1317617893218994, "rewards/margins": 2.379058361053467, "rewards/rejected": -3.5108203887939453, "step": 600 }, { "epoch": 0.13, "learning_rate": 1.7558823529411765e-05, "logits/chosen": -2.0106217861175537, "logits/rejected": -1.9231221675872803, "logps/chosen": -381.6519775390625, "logps/rejected": -343.802490234375, "loss": 0.1988, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8961509466171265, "rewards/margins": 4.124539852142334, "rewards/rejected": -5.02069091796875, "step": 601 }, { "epoch": 0.13, "learning_rate": 1.7554621848739495e-05, "logits/chosen": -2.065702199935913, "logits/rejected": -1.61421799659729, "logps/chosen": -336.43902587890625, "logps/rejected": -274.4747314453125, "loss": 0.3495, "rewards/accuracies": 0.875, "rewards/chosen": -1.4075486660003662, "rewards/margins": 3.287841796875, "rewards/rejected": -4.695390701293945, "step": 602 }, { "epoch": 0.13, "learning_rate": 1.755042016806723e-05, "logits/chosen": -2.1952593326568604, "logits/rejected": -2.114231824874878, "logps/chosen": -265.59100341796875, "logps/rejected": -277.71368408203125, "loss": 0.5747, "rewards/accuracies": 0.625, "rewards/chosen": -1.651538372039795, "rewards/margins": 1.7672960758209229, "rewards/rejected": -3.4188344478607178, "step": 603 }, { "epoch": 0.13, "learning_rate": 1.754621848739496e-05, "logits/chosen": -1.7957954406738281, "logits/rejected": -1.6911286115646362, "logps/chosen": -237.08416748046875, "logps/rejected": -292.2733154296875, "loss": 0.1808, "rewards/accuracies": 0.875, "rewards/chosen": -1.252558946609497, "rewards/margins": 4.193234443664551, "rewards/rejected": -5.445793151855469, "step": 604 }, { "epoch": 0.13, "learning_rate": 1.754201680672269e-05, "logits/chosen": -2.3046746253967285, "logits/rejected": -1.6288542747497559, "logps/chosen": -378.6341857910156, "logps/rejected": -284.7864990234375, "loss": 0.3798, "rewards/accuracies": 0.75, "rewards/chosen": -1.3336551189422607, "rewards/margins": 3.163825273513794, "rewards/rejected": -4.497480392456055, "step": 605 }, { "epoch": 0.13, "learning_rate": 1.7537815126050423e-05, "logits/chosen": -2.25481915473938, "logits/rejected": -1.8874237537384033, "logps/chosen": -424.5133056640625, "logps/rejected": -318.4740295410156, "loss": 0.1827, "rewards/accuracies": 0.9375, "rewards/chosen": -1.140905737876892, "rewards/margins": 3.2062745094299316, "rewards/rejected": -4.347179889678955, "step": 606 }, { "epoch": 0.13, "learning_rate": 1.7533613445378153e-05, "logits/chosen": -2.022418260574341, "logits/rejected": -2.0001907348632812, "logps/chosen": -395.0960693359375, "logps/rejected": -314.1839294433594, "loss": 0.8382, "rewards/accuracies": 0.8125, "rewards/chosen": -1.50838041305542, "rewards/margins": 1.3427683115005493, "rewards/rejected": -2.8511486053466797, "step": 607 }, { "epoch": 0.13, "learning_rate": 1.7529411764705884e-05, "logits/chosen": -2.142299175262451, "logits/rejected": -1.4220865964889526, "logps/chosen": -392.3373107910156, "logps/rejected": -311.0252685546875, "loss": 0.2207, "rewards/accuracies": 0.875, "rewards/chosen": -0.12111552059650421, "rewards/margins": 3.1376452445983887, "rewards/rejected": -3.258760929107666, "step": 608 }, { "epoch": 0.13, "learning_rate": 1.7525210084033614e-05, "logits/chosen": -2.2132859230041504, "logits/rejected": -1.585438847541809, "logps/chosen": -351.05328369140625, "logps/rejected": -285.6705322265625, "loss": 0.1308, "rewards/accuracies": 1.0, "rewards/chosen": -0.3578495979309082, "rewards/margins": 3.9617502689361572, "rewards/rejected": -4.3196001052856445, "step": 609 }, { "epoch": 0.13, "learning_rate": 1.7521008403361347e-05, "logits/chosen": -2.41865873336792, "logits/rejected": -2.4663453102111816, "logps/chosen": -322.7279968261719, "logps/rejected": -354.6105041503906, "loss": 0.5306, "rewards/accuracies": 0.875, "rewards/chosen": -1.3983232975006104, "rewards/margins": 2.2518470287323, "rewards/rejected": -3.65017032623291, "step": 610 }, { "epoch": 0.13, "learning_rate": 1.7516806722689078e-05, "logits/chosen": -2.120915412902832, "logits/rejected": -2.240330696105957, "logps/chosen": -324.6904602050781, "logps/rejected": -527.383056640625, "loss": 0.3158, "rewards/accuracies": 0.75, "rewards/chosen": -0.266683429479599, "rewards/margins": 3.2078123092651367, "rewards/rejected": -3.4744958877563477, "step": 611 }, { "epoch": 0.13, "learning_rate": 1.7512605042016808e-05, "logits/chosen": -1.903422474861145, "logits/rejected": -2.035590648651123, "logps/chosen": -246.1698760986328, "logps/rejected": -290.1372375488281, "loss": 0.1029, "rewards/accuracies": 1.0, "rewards/chosen": -1.0302534103393555, "rewards/margins": 4.132861137390137, "rewards/rejected": -5.163114547729492, "step": 612 }, { "epoch": 0.13, "learning_rate": 1.7508403361344538e-05, "logits/chosen": -2.25305438041687, "logits/rejected": -1.8691236972808838, "logps/chosen": -447.41748046875, "logps/rejected": -392.9903564453125, "loss": 0.2915, "rewards/accuracies": 0.8125, "rewards/chosen": -0.10366980731487274, "rewards/margins": 3.635295867919922, "rewards/rejected": -3.7389655113220215, "step": 613 }, { "epoch": 0.13, "learning_rate": 1.750420168067227e-05, "logits/chosen": -1.9130713939666748, "logits/rejected": -1.7619062662124634, "logps/chosen": -299.79046630859375, "logps/rejected": -279.08502197265625, "loss": 0.3796, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2327542304992676, "rewards/margins": 2.162534713745117, "rewards/rejected": -3.3952889442443848, "step": 614 }, { "epoch": 0.13, "learning_rate": 1.7500000000000002e-05, "logits/chosen": -2.184145927429199, "logits/rejected": -1.763026237487793, "logps/chosen": -356.28399658203125, "logps/rejected": -326.8001708984375, "loss": 0.3708, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6756588816642761, "rewards/margins": 3.3722288608551025, "rewards/rejected": -4.047887802124023, "step": 615 }, { "epoch": 0.13, "learning_rate": 1.7495798319327732e-05, "logits/chosen": -2.224501609802246, "logits/rejected": -2.1770598888397217, "logps/chosen": -288.3049011230469, "logps/rejected": -297.07305908203125, "loss": 0.1933, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1636801958084106, "rewards/margins": 3.263641357421875, "rewards/rejected": -4.427321910858154, "step": 616 }, { "epoch": 0.13, "learning_rate": 1.7491596638655462e-05, "logits/chosen": -2.1870622634887695, "logits/rejected": -1.968467116355896, "logps/chosen": -302.8945617675781, "logps/rejected": -328.1313781738281, "loss": 0.3485, "rewards/accuracies": 0.875, "rewards/chosen": -1.3654404878616333, "rewards/margins": 2.6853227615356445, "rewards/rejected": -4.050763130187988, "step": 617 }, { "epoch": 0.13, "learning_rate": 1.7487394957983196e-05, "logits/chosen": -2.115370750427246, "logits/rejected": -2.1195783615112305, "logps/chosen": -218.62728881835938, "logps/rejected": -257.3113098144531, "loss": 0.4401, "rewards/accuracies": 0.6875, "rewards/chosen": -1.213369369506836, "rewards/margins": 1.8138105869293213, "rewards/rejected": -3.0271799564361572, "step": 618 }, { "epoch": 0.13, "learning_rate": 1.7483193277310926e-05, "logits/chosen": -1.9826796054840088, "logits/rejected": -1.8652582168579102, "logps/chosen": -309.51348876953125, "logps/rejected": -323.62872314453125, "loss": 0.5281, "rewards/accuracies": 0.6875, "rewards/chosen": -0.5351694822311401, "rewards/margins": 2.707502841949463, "rewards/rejected": -3.2426724433898926, "step": 619 }, { "epoch": 0.13, "learning_rate": 1.7478991596638656e-05, "logits/chosen": -1.883870244026184, "logits/rejected": -2.0177013874053955, "logps/chosen": -403.1067810058594, "logps/rejected": -446.9234924316406, "loss": 0.3197, "rewards/accuracies": 0.8125, "rewards/chosen": -0.6505222320556641, "rewards/margins": 2.757884979248047, "rewards/rejected": -3.408407211303711, "step": 620 }, { "epoch": 0.13, "learning_rate": 1.7474789915966387e-05, "logits/chosen": -1.7034316062927246, "logits/rejected": -1.8480875492095947, "logps/chosen": -245.02505493164062, "logps/rejected": -246.24557495117188, "loss": 0.5094, "rewards/accuracies": 0.6875, "rewards/chosen": -1.2714037895202637, "rewards/margins": 2.8076908588409424, "rewards/rejected": -4.079094886779785, "step": 621 }, { "epoch": 0.13, "learning_rate": 1.747058823529412e-05, "logits/chosen": -2.2557621002197266, "logits/rejected": -1.904726266860962, "logps/chosen": -379.7161865234375, "logps/rejected": -346.000732421875, "loss": 0.0642, "rewards/accuracies": 1.0, "rewards/chosen": -1.050634741783142, "rewards/margins": 4.45014762878418, "rewards/rejected": -5.500782012939453, "step": 622 }, { "epoch": 0.13, "learning_rate": 1.746638655462185e-05, "logits/chosen": -1.9042936563491821, "logits/rejected": -1.8329639434814453, "logps/chosen": -229.14935302734375, "logps/rejected": -316.176025390625, "loss": 0.4426, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9361891746520996, "rewards/margins": 2.475944757461548, "rewards/rejected": -3.4121341705322266, "step": 623 }, { "epoch": 0.13, "learning_rate": 1.746218487394958e-05, "logits/chosen": -1.8404033184051514, "logits/rejected": -2.1285698413848877, "logps/chosen": -326.9837341308594, "logps/rejected": -401.146484375, "loss": 0.2532, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0858327150344849, "rewards/margins": 3.590076446533203, "rewards/rejected": -4.675909042358398, "step": 624 }, { "epoch": 0.13, "learning_rate": 1.745798319327731e-05, "logits/chosen": -2.1854569911956787, "logits/rejected": -2.437499523162842, "logps/chosen": -320.7906494140625, "logps/rejected": -383.99737548828125, "loss": 0.2313, "rewards/accuracies": 0.875, "rewards/chosen": -0.4466704726219177, "rewards/margins": 3.720686435699463, "rewards/rejected": -4.167356491088867, "step": 625 }, { "epoch": 0.13, "learning_rate": 1.7453781512605044e-05, "logits/chosen": -2.2070181369781494, "logits/rejected": -1.920109748840332, "logps/chosen": -276.9339904785156, "logps/rejected": -240.9857177734375, "loss": 0.2328, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3672330379486084, "rewards/margins": 4.07345724105835, "rewards/rejected": -6.440690517425537, "step": 626 }, { "epoch": 0.13, "learning_rate": 1.7449579831932775e-05, "logits/chosen": -2.192082166671753, "logits/rejected": -2.0379700660705566, "logps/chosen": -244.08450317382812, "logps/rejected": -302.1907043457031, "loss": 0.3003, "rewards/accuracies": 0.875, "rewards/chosen": -1.371840238571167, "rewards/margins": 2.1844491958618164, "rewards/rejected": -3.5562894344329834, "step": 627 }, { "epoch": 0.13, "learning_rate": 1.7445378151260505e-05, "logits/chosen": -2.074871778488159, "logits/rejected": -2.1112983226776123, "logps/chosen": -191.67086791992188, "logps/rejected": -337.5142822265625, "loss": 0.2809, "rewards/accuracies": 0.875, "rewards/chosen": -1.1002492904663086, "rewards/margins": 3.577155113220215, "rewards/rejected": -4.677404403686523, "step": 628 }, { "epoch": 0.13, "learning_rate": 1.744117647058824e-05, "logits/chosen": -1.948610544204712, "logits/rejected": -2.1481761932373047, "logps/chosen": -320.94384765625, "logps/rejected": -350.3824768066406, "loss": 0.4805, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7741295695304871, "rewards/margins": 1.5979022979736328, "rewards/rejected": -2.3720319271087646, "step": 629 }, { "epoch": 0.13, "learning_rate": 1.743697478991597e-05, "logits/chosen": -2.1214332580566406, "logits/rejected": -2.0843162536621094, "logps/chosen": -267.44232177734375, "logps/rejected": -327.6312255859375, "loss": 0.2766, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9848929047584534, "rewards/margins": 3.2845354080200195, "rewards/rejected": -4.269428253173828, "step": 630 }, { "epoch": 0.13, "learning_rate": 1.74327731092437e-05, "logits/chosen": -2.274620771408081, "logits/rejected": -2.1911637783050537, "logps/chosen": -288.55218505859375, "logps/rejected": -324.39996337890625, "loss": 0.7385, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6419466733932495, "rewards/margins": 1.8183467388153076, "rewards/rejected": -3.4602932929992676, "step": 631 }, { "epoch": 0.13, "learning_rate": 1.742857142857143e-05, "logits/chosen": -1.9665277004241943, "logits/rejected": -1.8588104248046875, "logps/chosen": -386.43743896484375, "logps/rejected": -345.87762451171875, "loss": 0.3855, "rewards/accuracies": 0.75, "rewards/chosen": -0.8597573041915894, "rewards/margins": 2.6692023277282715, "rewards/rejected": -3.5289597511291504, "step": 632 }, { "epoch": 0.13, "learning_rate": 1.7424369747899163e-05, "logits/chosen": -2.173203229904175, "logits/rejected": -1.7052295207977295, "logps/chosen": -243.45892333984375, "logps/rejected": -221.82313537597656, "loss": 0.1552, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9518831968307495, "rewards/margins": 4.023397922515869, "rewards/rejected": -4.975281238555908, "step": 633 }, { "epoch": 0.13, "learning_rate": 1.7420168067226893e-05, "logits/chosen": -2.4001569747924805, "logits/rejected": -2.0408408641815186, "logps/chosen": -389.422119140625, "logps/rejected": -320.9310302734375, "loss": 0.4071, "rewards/accuracies": 0.8125, "rewards/chosen": -1.07696533203125, "rewards/margins": 3.422020435333252, "rewards/rejected": -4.498985767364502, "step": 634 }, { "epoch": 0.13, "learning_rate": 1.7415966386554623e-05, "logits/chosen": -1.846866250038147, "logits/rejected": -2.1061363220214844, "logps/chosen": -261.47821044921875, "logps/rejected": -300.9818420410156, "loss": 0.5803, "rewards/accuracies": 0.75, "rewards/chosen": -1.8103018999099731, "rewards/margins": 2.213616132736206, "rewards/rejected": -4.023918151855469, "step": 635 }, { "epoch": 0.13, "learning_rate": 1.7411764705882353e-05, "logits/chosen": -2.018479824066162, "logits/rejected": -1.6680288314819336, "logps/chosen": -267.2178039550781, "logps/rejected": -275.43145751953125, "loss": 0.2268, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9537438154220581, "rewards/margins": 3.1539361476898193, "rewards/rejected": -4.107679843902588, "step": 636 }, { "epoch": 0.13, "learning_rate": 1.7407563025210087e-05, "logits/chosen": -1.9982413053512573, "logits/rejected": -2.01298189163208, "logps/chosen": -197.8479766845703, "logps/rejected": -280.20562744140625, "loss": 0.2362, "rewards/accuracies": 0.875, "rewards/chosen": -1.2701283693313599, "rewards/margins": 3.0283617973327637, "rewards/rejected": -4.298490047454834, "step": 637 }, { "epoch": 0.13, "learning_rate": 1.7403361344537817e-05, "logits/chosen": -2.0463712215423584, "logits/rejected": -1.6519734859466553, "logps/chosen": -375.9577331542969, "logps/rejected": -357.0042724609375, "loss": 0.4341, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6750936508178711, "rewards/margins": 3.336115837097168, "rewards/rejected": -4.011209487915039, "step": 638 }, { "epoch": 0.13, "learning_rate": 1.7399159663865548e-05, "logits/chosen": -2.1566858291625977, "logits/rejected": -1.8198866844177246, "logps/chosen": -523.0704956054688, "logps/rejected": -479.570556640625, "loss": 0.1695, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3096842765808105, "rewards/margins": 4.248785972595215, "rewards/rejected": -5.558470249176025, "step": 639 }, { "epoch": 0.13, "learning_rate": 1.7394957983193278e-05, "logits/chosen": -2.3514232635498047, "logits/rejected": -1.799801230430603, "logps/chosen": -402.88671875, "logps/rejected": -378.341064453125, "loss": 0.1681, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9623078107833862, "rewards/margins": 3.933157444000244, "rewards/rejected": -4.89546537399292, "step": 640 }, { "epoch": 0.13, "learning_rate": 1.739075630252101e-05, "logits/chosen": -2.018665075302124, "logits/rejected": -1.97682523727417, "logps/chosen": -335.3333435058594, "logps/rejected": -375.4092102050781, "loss": 0.2325, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0102876424789429, "rewards/margins": 2.6628689765930176, "rewards/rejected": -3.673156499862671, "step": 641 }, { "epoch": 0.13, "learning_rate": 1.738655462184874e-05, "logits/chosen": -2.100670576095581, "logits/rejected": -1.8882553577423096, "logps/chosen": -307.13751220703125, "logps/rejected": -260.1299743652344, "loss": 0.0737, "rewards/accuracies": 1.0, "rewards/chosen": -1.2428934574127197, "rewards/margins": 3.9339916706085205, "rewards/rejected": -5.17688512802124, "step": 642 }, { "epoch": 0.13, "learning_rate": 1.7382352941176472e-05, "logits/chosen": -2.0367884635925293, "logits/rejected": -1.9806405305862427, "logps/chosen": -326.42205810546875, "logps/rejected": -299.490478515625, "loss": 0.3915, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1731911897659302, "rewards/margins": 3.3622539043426514, "rewards/rejected": -4.535445213317871, "step": 643 }, { "epoch": 0.13, "learning_rate": 1.7378151260504202e-05, "logits/chosen": -2.2748634815216064, "logits/rejected": -1.988891363143921, "logps/chosen": -289.9861145019531, "logps/rejected": -246.89251708984375, "loss": 0.4475, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3171441555023193, "rewards/margins": 2.587984085083008, "rewards/rejected": -3.905128240585327, "step": 644 }, { "epoch": 0.13, "learning_rate": 1.7373949579831936e-05, "logits/chosen": -1.8365641832351685, "logits/rejected": -1.6036657094955444, "logps/chosen": -248.7274932861328, "logps/rejected": -237.21253967285156, "loss": 0.4101, "rewards/accuracies": 0.75, "rewards/chosen": -1.5616984367370605, "rewards/margins": 2.34481143951416, "rewards/rejected": -3.9065098762512207, "step": 645 }, { "epoch": 0.14, "learning_rate": 1.7369747899159666e-05, "logits/chosen": -2.258124828338623, "logits/rejected": -1.7239854335784912, "logps/chosen": -402.8916015625, "logps/rejected": -326.09576416015625, "loss": 0.1693, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5601903200149536, "rewards/margins": 3.3523125648498535, "rewards/rejected": -3.9125027656555176, "step": 646 }, { "epoch": 0.14, "learning_rate": 1.7365546218487396e-05, "logits/chosen": -2.1163954734802246, "logits/rejected": -1.822643756866455, "logps/chosen": -258.8406677246094, "logps/rejected": -279.0697326660156, "loss": 0.1607, "rewards/accuracies": 0.9375, "rewards/chosen": -1.72913658618927, "rewards/margins": 4.059009075164795, "rewards/rejected": -5.788146018981934, "step": 647 }, { "epoch": 0.14, "learning_rate": 1.7361344537815126e-05, "logits/chosen": -1.8672274351119995, "logits/rejected": -1.8529818058013916, "logps/chosen": -333.4593505859375, "logps/rejected": -325.0284423828125, "loss": 0.2338, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6187670230865479, "rewards/margins": 2.986903190612793, "rewards/rejected": -4.605669975280762, "step": 648 }, { "epoch": 0.14, "learning_rate": 1.735714285714286e-05, "logits/chosen": -2.069216251373291, "logits/rejected": -1.6111959218978882, "logps/chosen": -424.6082458496094, "logps/rejected": -294.0614318847656, "loss": 0.5072, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6537176370620728, "rewards/margins": 2.450573205947876, "rewards/rejected": -4.104290962219238, "step": 649 }, { "epoch": 0.14, "learning_rate": 1.735294117647059e-05, "logits/chosen": -2.2140450477600098, "logits/rejected": -2.270001173019409, "logps/chosen": -415.84710693359375, "logps/rejected": -348.36083984375, "loss": 0.5278, "rewards/accuracies": 0.8125, "rewards/chosen": -1.184394121170044, "rewards/margins": 3.0564780235290527, "rewards/rejected": -4.240871906280518, "step": 650 }, { "epoch": 0.14, "learning_rate": 1.734873949579832e-05, "logits/chosen": -2.2667994499206543, "logits/rejected": -1.8827555179595947, "logps/chosen": -440.50213623046875, "logps/rejected": -470.83642578125, "loss": 0.1936, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8355374932289124, "rewards/margins": 3.2577900886535645, "rewards/rejected": -4.093327522277832, "step": 651 }, { "epoch": 0.14, "learning_rate": 1.7344537815126054e-05, "logits/chosen": -2.031855344772339, "logits/rejected": -1.4163709878921509, "logps/chosen": -429.764892578125, "logps/rejected": -299.6787414550781, "loss": 0.1584, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1578457355499268, "rewards/margins": 3.0670769214630127, "rewards/rejected": -4.224922180175781, "step": 652 }, { "epoch": 0.14, "learning_rate": 1.7340336134453784e-05, "logits/chosen": -2.3417391777038574, "logits/rejected": -2.0120458602905273, "logps/chosen": -378.86724853515625, "logps/rejected": -320.24847412109375, "loss": 0.4301, "rewards/accuracies": 0.875, "rewards/chosen": -0.8696843981742859, "rewards/margins": 3.079339027404785, "rewards/rejected": -3.9490230083465576, "step": 653 }, { "epoch": 0.14, "learning_rate": 1.7336134453781514e-05, "logits/chosen": -1.899755835533142, "logits/rejected": -2.2152485847473145, "logps/chosen": -289.23052978515625, "logps/rejected": -277.4675598144531, "loss": 0.2919, "rewards/accuracies": 0.875, "rewards/chosen": -0.9454280138015747, "rewards/margins": 2.594362258911133, "rewards/rejected": -3.539790391921997, "step": 654 }, { "epoch": 0.14, "learning_rate": 1.7331932773109245e-05, "logits/chosen": -2.056635618209839, "logits/rejected": -2.1157376766204834, "logps/chosen": -267.3724670410156, "logps/rejected": -340.13580322265625, "loss": 0.4387, "rewards/accuracies": 0.8125, "rewards/chosen": -2.224498987197876, "rewards/margins": 2.2869489192962646, "rewards/rejected": -4.511447906494141, "step": 655 }, { "epoch": 0.14, "learning_rate": 1.7327731092436978e-05, "logits/chosen": -1.9028781652450562, "logits/rejected": -2.004976511001587, "logps/chosen": -274.9028015136719, "logps/rejected": -434.4044189453125, "loss": 0.2837, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2780661582946777, "rewards/margins": 3.7909231185913086, "rewards/rejected": -5.068988800048828, "step": 656 }, { "epoch": 0.14, "learning_rate": 1.732352941176471e-05, "logits/chosen": -1.7320234775543213, "logits/rejected": -2.093090534210205, "logps/chosen": -261.589599609375, "logps/rejected": -349.3472900390625, "loss": 0.2373, "rewards/accuracies": 0.875, "rewards/chosen": -1.0084866285324097, "rewards/margins": 4.010622024536133, "rewards/rejected": -5.019108772277832, "step": 657 }, { "epoch": 0.14, "learning_rate": 1.731932773109244e-05, "logits/chosen": -1.9890551567077637, "logits/rejected": -1.9334732294082642, "logps/chosen": -264.3675231933594, "logps/rejected": -375.4937438964844, "loss": 0.4021, "rewards/accuracies": 0.875, "rewards/chosen": -1.6627682447433472, "rewards/margins": 3.1620559692382812, "rewards/rejected": -4.824824333190918, "step": 658 }, { "epoch": 0.14, "learning_rate": 1.731512605042017e-05, "logits/chosen": -2.1906607151031494, "logits/rejected": -2.0650694370269775, "logps/chosen": -410.758544921875, "logps/rejected": -411.5650939941406, "loss": 0.5833, "rewards/accuracies": 0.75, "rewards/chosen": -1.2554771900177002, "rewards/margins": 2.2665514945983887, "rewards/rejected": -3.5220284461975098, "step": 659 }, { "epoch": 0.14, "learning_rate": 1.7310924369747902e-05, "logits/chosen": -1.9395146369934082, "logits/rejected": -1.8063230514526367, "logps/chosen": -442.1985778808594, "logps/rejected": -372.2989196777344, "loss": 0.2103, "rewards/accuracies": 0.875, "rewards/chosen": -1.5384433269500732, "rewards/margins": 2.69295072555542, "rewards/rejected": -4.231394290924072, "step": 660 }, { "epoch": 0.14, "learning_rate": 1.7306722689075633e-05, "logits/chosen": -1.9820976257324219, "logits/rejected": -2.0732693672180176, "logps/chosen": -185.29586791992188, "logps/rejected": -235.94895935058594, "loss": 0.6932, "rewards/accuracies": 0.75, "rewards/chosen": -1.7201175689697266, "rewards/margins": 3.0689449310302734, "rewards/rejected": -4.7890625, "step": 661 }, { "epoch": 0.14, "learning_rate": 1.7302521008403363e-05, "logits/chosen": -1.9752837419509888, "logits/rejected": -1.9079817533493042, "logps/chosen": -369.05499267578125, "logps/rejected": -361.75048828125, "loss": 0.1479, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1471731662750244, "rewards/margins": 4.216258525848389, "rewards/rejected": -5.363431930541992, "step": 662 }, { "epoch": 0.14, "learning_rate": 1.7298319327731093e-05, "logits/chosen": -1.8032164573669434, "logits/rejected": -1.7057702541351318, "logps/chosen": -329.5226745605469, "logps/rejected": -334.4352111816406, "loss": 0.3653, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8003126382827759, "rewards/margins": 3.5742008686065674, "rewards/rejected": -5.374513149261475, "step": 663 }, { "epoch": 0.14, "learning_rate": 1.7294117647058827e-05, "logits/chosen": -2.2539303302764893, "logits/rejected": -1.832952618598938, "logps/chosen": -356.5443115234375, "logps/rejected": -388.2044372558594, "loss": 0.1476, "rewards/accuracies": 0.875, "rewards/chosen": -0.22541813552379608, "rewards/margins": 4.171304225921631, "rewards/rejected": -4.396722316741943, "step": 664 }, { "epoch": 0.14, "learning_rate": 1.7289915966386557e-05, "logits/chosen": -2.0836853981018066, "logits/rejected": -1.8866572380065918, "logps/chosen": -367.36224365234375, "logps/rejected": -300.6065673828125, "loss": 0.4465, "rewards/accuracies": 0.875, "rewards/chosen": -1.2593399286270142, "rewards/margins": 2.918307304382324, "rewards/rejected": -4.177647113800049, "step": 665 }, { "epoch": 0.14, "learning_rate": 1.7285714285714287e-05, "logits/chosen": -2.4005470275878906, "logits/rejected": -1.7531957626342773, "logps/chosen": -428.22235107421875, "logps/rejected": -350.5605773925781, "loss": 0.1366, "rewards/accuracies": 1.0, "rewards/chosen": -0.9964143633842468, "rewards/margins": 3.841459274291992, "rewards/rejected": -4.837873935699463, "step": 666 }, { "epoch": 0.14, "learning_rate": 1.7281512605042017e-05, "logits/chosen": -2.4517130851745605, "logits/rejected": -2.0735883712768555, "logps/chosen": -390.8885803222656, "logps/rejected": -300.8526916503906, "loss": 0.2846, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7490054368972778, "rewards/margins": 2.4245247840881348, "rewards/rejected": -4.173530101776123, "step": 667 }, { "epoch": 0.14, "learning_rate": 1.727731092436975e-05, "logits/chosen": -2.218165636062622, "logits/rejected": -1.9664578437805176, "logps/chosen": -390.86871337890625, "logps/rejected": -365.5804443359375, "loss": 0.3374, "rewards/accuracies": 0.75, "rewards/chosen": -0.48010826110839844, "rewards/margins": 3.423478603363037, "rewards/rejected": -3.9035871028900146, "step": 668 }, { "epoch": 0.14, "learning_rate": 1.727310924369748e-05, "logits/chosen": -2.1214077472686768, "logits/rejected": -1.6956534385681152, "logps/chosen": -267.4032897949219, "logps/rejected": -285.0607604980469, "loss": 0.2044, "rewards/accuracies": 0.875, "rewards/chosen": -0.6615678668022156, "rewards/margins": 3.4231882095336914, "rewards/rejected": -4.084755897521973, "step": 669 }, { "epoch": 0.14, "learning_rate": 1.726890756302521e-05, "logits/chosen": -1.928623914718628, "logits/rejected": -1.538848638534546, "logps/chosen": -241.68421936035156, "logps/rejected": -205.2398223876953, "loss": 0.744, "rewards/accuracies": 0.8125, "rewards/chosen": -1.82919442653656, "rewards/margins": 1.6892874240875244, "rewards/rejected": -3.518481731414795, "step": 670 }, { "epoch": 0.14, "learning_rate": 1.7264705882352945e-05, "logits/chosen": -2.073869466781616, "logits/rejected": -1.9732764959335327, "logps/chosen": -356.5965576171875, "logps/rejected": -307.3775634765625, "loss": 0.2087, "rewards/accuracies": 0.875, "rewards/chosen": -0.8155758380889893, "rewards/margins": 2.8724477291107178, "rewards/rejected": -3.688023567199707, "step": 671 }, { "epoch": 0.14, "learning_rate": 1.7260504201680675e-05, "logits/chosen": -1.957749605178833, "logits/rejected": -1.5791001319885254, "logps/chosen": -336.7607116699219, "logps/rejected": -325.90692138671875, "loss": 0.142, "rewards/accuracies": 1.0, "rewards/chosen": -0.8536182641983032, "rewards/margins": 3.7742176055908203, "rewards/rejected": -4.627835273742676, "step": 672 }, { "epoch": 0.14, "learning_rate": 1.7256302521008406e-05, "logits/chosen": -1.7034693956375122, "logits/rejected": -1.8518991470336914, "logps/chosen": -230.3818359375, "logps/rejected": -261.7847900390625, "loss": 0.508, "rewards/accuracies": 0.875, "rewards/chosen": -1.9512046575546265, "rewards/margins": 1.807443380355835, "rewards/rejected": -3.758648157119751, "step": 673 }, { "epoch": 0.14, "learning_rate": 1.7252100840336136e-05, "logits/chosen": -2.013270378112793, "logits/rejected": -1.9225053787231445, "logps/chosen": -399.438720703125, "logps/rejected": -281.4459228515625, "loss": 0.4501, "rewards/accuracies": 0.875, "rewards/chosen": -1.171720027923584, "rewards/margins": 1.7998218536376953, "rewards/rejected": -2.9715418815612793, "step": 674 }, { "epoch": 0.14, "learning_rate": 1.724789915966387e-05, "logits/chosen": -2.1820831298828125, "logits/rejected": -2.063739776611328, "logps/chosen": -281.82366943359375, "logps/rejected": -291.52923583984375, "loss": 0.6628, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3655743598937988, "rewards/margins": 3.2180187702178955, "rewards/rejected": -4.583592891693115, "step": 675 }, { "epoch": 0.14, "learning_rate": 1.72436974789916e-05, "logits/chosen": -2.045133113861084, "logits/rejected": -1.7008068561553955, "logps/chosen": -278.39117431640625, "logps/rejected": -265.5382080078125, "loss": 0.3397, "rewards/accuracies": 0.75, "rewards/chosen": -0.613903284072876, "rewards/margins": 2.2477810382843018, "rewards/rejected": -2.861684560775757, "step": 676 }, { "epoch": 0.14, "learning_rate": 1.723949579831933e-05, "logits/chosen": -1.9759258031845093, "logits/rejected": -1.7019156217575073, "logps/chosen": -283.2732238769531, "logps/rejected": -333.37255859375, "loss": 0.3952, "rewards/accuracies": 0.8125, "rewards/chosen": -1.075766921043396, "rewards/margins": 2.7298686504364014, "rewards/rejected": -3.805635690689087, "step": 677 }, { "epoch": 0.14, "learning_rate": 1.723529411764706e-05, "logits/chosen": -2.427515983581543, "logits/rejected": -2.209681749343872, "logps/chosen": -410.6153564453125, "logps/rejected": -492.728759765625, "loss": 0.1751, "rewards/accuracies": 0.9375, "rewards/chosen": 0.1429438591003418, "rewards/margins": 4.598230361938477, "rewards/rejected": -4.455286026000977, "step": 678 }, { "epoch": 0.14, "learning_rate": 1.7231092436974794e-05, "logits/chosen": -2.0737991333007812, "logits/rejected": -2.0608856678009033, "logps/chosen": -266.1748046875, "logps/rejected": -291.5542907714844, "loss": 0.2251, "rewards/accuracies": 0.875, "rewards/chosen": -0.7223702669143677, "rewards/margins": 3.3233089447021484, "rewards/rejected": -4.045679092407227, "step": 679 }, { "epoch": 0.14, "learning_rate": 1.7226890756302524e-05, "logits/chosen": -2.1106553077697754, "logits/rejected": -1.6831328868865967, "logps/chosen": -427.328857421875, "logps/rejected": -325.7439270019531, "loss": 0.5774, "rewards/accuracies": 0.6875, "rewards/chosen": -1.104979395866394, "rewards/margins": 3.377889633178711, "rewards/rejected": -4.482868671417236, "step": 680 }, { "epoch": 0.14, "learning_rate": 1.7222689075630254e-05, "logits/chosen": -1.9135695695877075, "logits/rejected": -2.0843729972839355, "logps/chosen": -343.29833984375, "logps/rejected": -408.4560546875, "loss": 0.335, "rewards/accuracies": 0.75, "rewards/chosen": -0.5545223951339722, "rewards/margins": 2.8233156204223633, "rewards/rejected": -3.377838134765625, "step": 681 }, { "epoch": 0.14, "learning_rate": 1.7218487394957984e-05, "logits/chosen": -1.7749607563018799, "logits/rejected": -1.782073974609375, "logps/chosen": -310.2999572753906, "logps/rejected": -361.42608642578125, "loss": 0.1943, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8468155860900879, "rewards/margins": 3.1006357669830322, "rewards/rejected": -3.947451114654541, "step": 682 }, { "epoch": 0.14, "learning_rate": 1.7214285714285718e-05, "logits/chosen": -1.9808976650238037, "logits/rejected": -2.119267463684082, "logps/chosen": -258.61102294921875, "logps/rejected": -317.12506103515625, "loss": 0.1242, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6111612915992737, "rewards/margins": 3.6671926975250244, "rewards/rejected": -4.278353691101074, "step": 683 }, { "epoch": 0.14, "learning_rate": 1.7210084033613448e-05, "logits/chosen": -2.331777572631836, "logits/rejected": -2.1018166542053223, "logps/chosen": -336.4613037109375, "logps/rejected": -345.8485412597656, "loss": 0.3093, "rewards/accuracies": 0.875, "rewards/chosen": -0.6613254547119141, "rewards/margins": 3.3645129203796387, "rewards/rejected": -4.025838375091553, "step": 684 }, { "epoch": 0.14, "learning_rate": 1.720588235294118e-05, "logits/chosen": -2.323230266571045, "logits/rejected": -1.4636880159378052, "logps/chosen": -343.62652587890625, "logps/rejected": -265.6925964355469, "loss": 0.1822, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8246605396270752, "rewards/margins": 3.6590194702148438, "rewards/rejected": -4.48367977142334, "step": 685 }, { "epoch": 0.14, "learning_rate": 1.720168067226891e-05, "logits/chosen": -1.9645280838012695, "logits/rejected": -2.000112533569336, "logps/chosen": -412.9718933105469, "logps/rejected": -385.92730712890625, "loss": 0.2232, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06948405504226685, "rewards/margins": 3.859463691711426, "rewards/rejected": -3.928947687149048, "step": 686 }, { "epoch": 0.14, "learning_rate": 1.7197478991596642e-05, "logits/chosen": -2.3017377853393555, "logits/rejected": -2.226318359375, "logps/chosen": -371.52813720703125, "logps/rejected": -386.92352294921875, "loss": 0.569, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8945224285125732, "rewards/margins": 2.5431067943573, "rewards/rejected": -3.437629222869873, "step": 687 }, { "epoch": 0.14, "learning_rate": 1.7193277310924372e-05, "logits/chosen": -2.076875686645508, "logits/rejected": -2.1938681602478027, "logps/chosen": -236.74893188476562, "logps/rejected": -295.1387939453125, "loss": 0.4666, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1517714262008667, "rewards/margins": 3.644257068634033, "rewards/rejected": -4.7960286140441895, "step": 688 }, { "epoch": 0.14, "learning_rate": 1.7189075630252103e-05, "logits/chosen": -1.9153176546096802, "logits/rejected": -1.8392949104309082, "logps/chosen": -319.8885498046875, "logps/rejected": -331.46771240234375, "loss": 0.5233, "rewards/accuracies": 0.75, "rewards/chosen": -0.9958435297012329, "rewards/margins": 2.5478975772857666, "rewards/rejected": -3.543741226196289, "step": 689 }, { "epoch": 0.14, "learning_rate": 1.7184873949579833e-05, "logits/chosen": -2.0593974590301514, "logits/rejected": -1.7980951070785522, "logps/chosen": -396.2750244140625, "logps/rejected": -330.88067626953125, "loss": 0.1758, "rewards/accuracies": 1.0, "rewards/chosen": -0.18590497970581055, "rewards/margins": 2.9292798042297363, "rewards/rejected": -3.115185022354126, "step": 690 }, { "epoch": 0.14, "learning_rate": 1.7180672268907563e-05, "logits/chosen": -2.0920934677124023, "logits/rejected": -1.928424596786499, "logps/chosen": -305.40850830078125, "logps/rejected": -352.9814147949219, "loss": 0.4255, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8967874050140381, "rewards/margins": 3.9730615615844727, "rewards/rejected": -4.86984920501709, "step": 691 }, { "epoch": 0.14, "learning_rate": 1.7176470588235293e-05, "logits/chosen": -1.8887488842010498, "logits/rejected": -1.8493366241455078, "logps/chosen": -380.24029541015625, "logps/rejected": -452.76116943359375, "loss": 0.1294, "rewards/accuracies": 1.0, "rewards/chosen": -0.5439873337745667, "rewards/margins": 3.9800472259521484, "rewards/rejected": -4.5240349769592285, "step": 692 }, { "epoch": 0.14, "learning_rate": 1.7172268907563027e-05, "logits/chosen": -1.9121209383010864, "logits/rejected": -1.6266415119171143, "logps/chosen": -254.69651794433594, "logps/rejected": -304.2913513183594, "loss": 0.2047, "rewards/accuracies": 0.875, "rewards/chosen": -0.43852972984313965, "rewards/margins": 3.38332462310791, "rewards/rejected": -3.82185435295105, "step": 693 }, { "epoch": 0.15, "learning_rate": 1.7168067226890757e-05, "logits/chosen": -1.6789984703063965, "logits/rejected": -1.8904715776443481, "logps/chosen": -421.5914001464844, "logps/rejected": -411.797119140625, "loss": 0.3554, "rewards/accuracies": 0.875, "rewards/chosen": -0.552094578742981, "rewards/margins": 3.0789880752563477, "rewards/rejected": -3.6310830116271973, "step": 694 }, { "epoch": 0.15, "learning_rate": 1.7163865546218487e-05, "logits/chosen": -2.0443294048309326, "logits/rejected": -1.7869457006454468, "logps/chosen": -344.9375, "logps/rejected": -263.63287353515625, "loss": 0.2826, "rewards/accuracies": 0.875, "rewards/chosen": -0.7583024501800537, "rewards/margins": 3.060530662536621, "rewards/rejected": -3.818833112716675, "step": 695 }, { "epoch": 0.15, "learning_rate": 1.7159663865546218e-05, "logits/chosen": -2.0378646850585938, "logits/rejected": -2.0197155475616455, "logps/chosen": -322.33917236328125, "logps/rejected": -353.748291015625, "loss": 0.7203, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6857253313064575, "rewards/margins": 1.2424486875534058, "rewards/rejected": -2.9281740188598633, "step": 696 }, { "epoch": 0.15, "learning_rate": 1.715546218487395e-05, "logits/chosen": -2.2373576164245605, "logits/rejected": -2.3120899200439453, "logps/chosen": -371.76483154296875, "logps/rejected": -389.9080810546875, "loss": 0.3095, "rewards/accuracies": 0.8125, "rewards/chosen": -0.5310362577438354, "rewards/margins": 3.078679084777832, "rewards/rejected": -3.609715700149536, "step": 697 }, { "epoch": 0.15, "learning_rate": 1.715126050420168e-05, "logits/chosen": -2.0614230632781982, "logits/rejected": -2.0350730419158936, "logps/chosen": -329.12042236328125, "logps/rejected": -334.38427734375, "loss": 0.1935, "rewards/accuracies": 0.9375, "rewards/chosen": -0.19396349787712097, "rewards/margins": 3.8307032585144043, "rewards/rejected": -4.024666786193848, "step": 698 }, { "epoch": 0.15, "learning_rate": 1.714705882352941e-05, "logits/chosen": -2.2367663383483887, "logits/rejected": -1.8684735298156738, "logps/chosen": -333.8081359863281, "logps/rejected": -328.0357666015625, "loss": 0.2564, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1592206954956055, "rewards/margins": 3.8257079124450684, "rewards/rejected": -4.984929084777832, "step": 699 }, { "epoch": 0.15, "learning_rate": 1.7142857142857142e-05, "logits/chosen": -2.156998634338379, "logits/rejected": -1.6880600452423096, "logps/chosen": -379.6072082519531, "logps/rejected": -370.2983093261719, "loss": 0.2015, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7204073667526245, "rewards/margins": 4.4051666259765625, "rewards/rejected": -5.125573635101318, "step": 700 }, { "epoch": 0.15, "learning_rate": 1.7138655462184875e-05, "logits/chosen": -2.0436623096466064, "logits/rejected": -1.5975549221038818, "logps/chosen": -311.1147155761719, "logps/rejected": -296.11279296875, "loss": 0.2477, "rewards/accuracies": 0.875, "rewards/chosen": -1.372866153717041, "rewards/margins": 3.583177089691162, "rewards/rejected": -4.956043243408203, "step": 701 }, { "epoch": 0.15, "learning_rate": 1.7134453781512606e-05, "logits/chosen": -2.10982346534729, "logits/rejected": -1.935610294342041, "logps/chosen": -346.2196960449219, "logps/rejected": -352.525146484375, "loss": 0.3001, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9308915734291077, "rewards/margins": 3.532325029373169, "rewards/rejected": -4.463216781616211, "step": 702 }, { "epoch": 0.15, "learning_rate": 1.7130252100840336e-05, "logits/chosen": -2.3135061264038086, "logits/rejected": -2.128934621810913, "logps/chosen": -383.6361083984375, "logps/rejected": -321.07574462890625, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": -0.6225247979164124, "rewards/margins": 2.862924337387085, "rewards/rejected": -3.4854490756988525, "step": 703 }, { "epoch": 0.15, "learning_rate": 1.7126050420168066e-05, "logits/chosen": -2.0983588695526123, "logits/rejected": -2.1279656887054443, "logps/chosen": -465.5097351074219, "logps/rejected": -394.8746032714844, "loss": 0.0862, "rewards/accuracies": 1.0, "rewards/chosen": -0.588475227355957, "rewards/margins": 3.6594579219818115, "rewards/rejected": -4.2479329109191895, "step": 704 }, { "epoch": 0.15, "learning_rate": 1.71218487394958e-05, "logits/chosen": -2.2282066345214844, "logits/rejected": -1.9480252265930176, "logps/chosen": -341.08441162109375, "logps/rejected": -319.0345153808594, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/chosen": -0.9014024138450623, "rewards/margins": 4.911703109741211, "rewards/rejected": -5.813106060028076, "step": 705 }, { "epoch": 0.15, "learning_rate": 1.711764705882353e-05, "logits/chosen": -2.0968925952911377, "logits/rejected": -1.6364997625350952, "logps/chosen": -340.4033203125, "logps/rejected": -342.8914794921875, "loss": 0.2481, "rewards/accuracies": 0.9375, "rewards/chosen": -0.586866557598114, "rewards/margins": 3.365581512451172, "rewards/rejected": -3.9524483680725098, "step": 706 }, { "epoch": 0.15, "learning_rate": 1.711344537815126e-05, "logits/chosen": -2.2105777263641357, "logits/rejected": -2.0825812816619873, "logps/chosen": -353.2305908203125, "logps/rejected": -412.3689880371094, "loss": 0.3513, "rewards/accuracies": 0.8125, "rewards/chosen": -0.39845770597457886, "rewards/margins": 3.2391905784606934, "rewards/rejected": -3.637648582458496, "step": 707 }, { "epoch": 0.15, "learning_rate": 1.7109243697478994e-05, "logits/chosen": -2.11026668548584, "logits/rejected": -1.5614155530929565, "logps/chosen": -384.54168701171875, "logps/rejected": -286.07867431640625, "loss": 0.1942, "rewards/accuracies": 0.875, "rewards/chosen": -0.9091488122940063, "rewards/margins": 3.0858840942382812, "rewards/rejected": -3.995033025741577, "step": 708 }, { "epoch": 0.15, "learning_rate": 1.7105042016806724e-05, "logits/chosen": -2.072533130645752, "logits/rejected": -1.6032205820083618, "logps/chosen": -278.2815856933594, "logps/rejected": -250.24232482910156, "loss": 0.1631, "rewards/accuracies": 1.0, "rewards/chosen": -0.03345600143074989, "rewards/margins": 3.1667966842651367, "rewards/rejected": -3.2002525329589844, "step": 709 }, { "epoch": 0.15, "learning_rate": 1.7100840336134454e-05, "logits/chosen": -2.1043851375579834, "logits/rejected": -1.4964663982391357, "logps/chosen": -335.7789306640625, "logps/rejected": -273.5032653808594, "loss": 0.2496, "rewards/accuracies": 0.9375, "rewards/chosen": -0.2616996765136719, "rewards/margins": 2.377262592315674, "rewards/rejected": -2.6389622688293457, "step": 710 }, { "epoch": 0.15, "learning_rate": 1.7096638655462184e-05, "logits/chosen": -1.977784276008606, "logits/rejected": -2.0842790603637695, "logps/chosen": -374.2403259277344, "logps/rejected": -359.81427001953125, "loss": 0.2648, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0386358499526978, "rewards/margins": 3.4374914169311523, "rewards/rejected": -4.476127624511719, "step": 711 }, { "epoch": 0.15, "learning_rate": 1.7092436974789918e-05, "logits/chosen": -2.151043176651001, "logits/rejected": -1.6124058961868286, "logps/chosen": -352.8233642578125, "logps/rejected": -310.1890869140625, "loss": 0.107, "rewards/accuracies": 0.9375, "rewards/chosen": -1.38482666015625, "rewards/margins": 4.073087215423584, "rewards/rejected": -5.457913398742676, "step": 712 }, { "epoch": 0.15, "learning_rate": 1.7088235294117648e-05, "logits/chosen": -1.9535274505615234, "logits/rejected": -1.2877719402313232, "logps/chosen": -284.9102783203125, "logps/rejected": -276.1152648925781, "loss": 0.382, "rewards/accuracies": 0.75, "rewards/chosen": -1.4412153959274292, "rewards/margins": 2.972545862197876, "rewards/rejected": -4.413761138916016, "step": 713 }, { "epoch": 0.15, "learning_rate": 1.708403361344538e-05, "logits/chosen": -2.0154826641082764, "logits/rejected": -1.9935276508331299, "logps/chosen": -400.90594482421875, "logps/rejected": -342.13616943359375, "loss": 0.2915, "rewards/accuracies": 0.875, "rewards/chosen": -1.4212172031402588, "rewards/margins": 3.207465171813965, "rewards/rejected": -4.6286821365356445, "step": 714 }, { "epoch": 0.15, "learning_rate": 1.707983193277311e-05, "logits/chosen": -1.8846263885498047, "logits/rejected": -1.741307020187378, "logps/chosen": -346.0419921875, "logps/rejected": -457.7156066894531, "loss": 0.6746, "rewards/accuracies": 0.5625, "rewards/chosen": -2.2680182456970215, "rewards/margins": 2.1655664443969727, "rewards/rejected": -4.433585166931152, "step": 715 }, { "epoch": 0.15, "learning_rate": 1.7075630252100842e-05, "logits/chosen": -1.9114598035812378, "logits/rejected": -2.1001482009887695, "logps/chosen": -385.4615478515625, "logps/rejected": -457.49261474609375, "loss": 0.2002, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6733380556106567, "rewards/margins": 4.430871963500977, "rewards/rejected": -6.104209899902344, "step": 716 }, { "epoch": 0.15, "learning_rate": 1.7071428571428573e-05, "logits/chosen": -2.184648036956787, "logits/rejected": -1.9081792831420898, "logps/chosen": -359.1806640625, "logps/rejected": -432.8729248046875, "loss": 0.1026, "rewards/accuracies": 1.0, "rewards/chosen": -1.5722174644470215, "rewards/margins": 4.043186187744141, "rewards/rejected": -5.615403175354004, "step": 717 }, { "epoch": 0.15, "learning_rate": 1.7067226890756303e-05, "logits/chosen": -1.9436229467391968, "logits/rejected": -1.7883977890014648, "logps/chosen": -342.4736022949219, "logps/rejected": -300.35491943359375, "loss": 0.4697, "rewards/accuracies": 0.75, "rewards/chosen": -1.4920488595962524, "rewards/margins": 2.424128532409668, "rewards/rejected": -3.916177272796631, "step": 718 }, { "epoch": 0.15, "learning_rate": 1.7063025210084033e-05, "logits/chosen": -1.891648530960083, "logits/rejected": -1.6593637466430664, "logps/chosen": -192.44589233398438, "logps/rejected": -186.4752197265625, "loss": 0.2824, "rewards/accuracies": 0.875, "rewards/chosen": -2.433328628540039, "rewards/margins": 2.645397663116455, "rewards/rejected": -5.078725814819336, "step": 719 }, { "epoch": 0.15, "learning_rate": 1.7058823529411767e-05, "logits/chosen": -1.926894187927246, "logits/rejected": -1.8717057704925537, "logps/chosen": -340.2047119140625, "logps/rejected": -311.362060546875, "loss": 0.6939, "rewards/accuracies": 0.625, "rewards/chosen": -1.599405288696289, "rewards/margins": 2.407620429992676, "rewards/rejected": -4.007025718688965, "step": 720 }, { "epoch": 0.15, "learning_rate": 1.7054621848739497e-05, "logits/chosen": -2.0984959602355957, "logits/rejected": -1.996431827545166, "logps/chosen": -403.31640625, "logps/rejected": -362.71942138671875, "loss": 0.2613, "rewards/accuracies": 0.875, "rewards/chosen": -1.9902312755584717, "rewards/margins": 3.4481632709503174, "rewards/rejected": -5.438394546508789, "step": 721 }, { "epoch": 0.15, "learning_rate": 1.7050420168067227e-05, "logits/chosen": -1.9579122066497803, "logits/rejected": -2.276113748550415, "logps/chosen": -257.9245300292969, "logps/rejected": -358.4361877441406, "loss": 0.4497, "rewards/accuracies": 0.6875, "rewards/chosen": -1.913058876991272, "rewards/margins": 3.0072388648986816, "rewards/rejected": -4.920297622680664, "step": 722 }, { "epoch": 0.15, "learning_rate": 1.7046218487394957e-05, "logits/chosen": -2.154367208480835, "logits/rejected": -1.9137614965438843, "logps/chosen": -345.5387268066406, "logps/rejected": -380.2763671875, "loss": 0.6701, "rewards/accuracies": 0.875, "rewards/chosen": -2.485901355743408, "rewards/margins": 3.1181859970092773, "rewards/rejected": -5.604086875915527, "step": 723 }, { "epoch": 0.15, "learning_rate": 1.704201680672269e-05, "logits/chosen": -2.0980918407440186, "logits/rejected": -1.8121957778930664, "logps/chosen": -268.64654541015625, "logps/rejected": -273.2153015136719, "loss": 0.4118, "rewards/accuracies": 0.8125, "rewards/chosen": -2.173196792602539, "rewards/margins": 2.406545877456665, "rewards/rejected": -4.579742431640625, "step": 724 }, { "epoch": 0.15, "learning_rate": 1.703781512605042e-05, "logits/chosen": -1.8285319805145264, "logits/rejected": -2.174788475036621, "logps/chosen": -206.33349609375, "logps/rejected": -345.8277282714844, "loss": 0.5907, "rewards/accuracies": 0.625, "rewards/chosen": -1.9394997358322144, "rewards/margins": 2.211231231689453, "rewards/rejected": -4.150731086730957, "step": 725 }, { "epoch": 0.15, "learning_rate": 1.703361344537815e-05, "logits/chosen": -2.2193386554718018, "logits/rejected": -2.2776079177856445, "logps/chosen": -248.9822235107422, "logps/rejected": -306.0496826171875, "loss": 0.2325, "rewards/accuracies": 0.875, "rewards/chosen": -2.1564745903015137, "rewards/margins": 4.135500907897949, "rewards/rejected": -6.291975021362305, "step": 726 }, { "epoch": 0.15, "learning_rate": 1.702941176470588e-05, "logits/chosen": -2.177299737930298, "logits/rejected": -2.1963884830474854, "logps/chosen": -312.93438720703125, "logps/rejected": -401.07379150390625, "loss": 0.1808, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3164889812469482, "rewards/margins": 5.062575340270996, "rewards/rejected": -6.379064559936523, "step": 727 }, { "epoch": 0.15, "learning_rate": 1.7025210084033615e-05, "logits/chosen": -1.960242509841919, "logits/rejected": -1.5620043277740479, "logps/chosen": -314.60357666015625, "logps/rejected": -320.797119140625, "loss": 0.4408, "rewards/accuracies": 0.8125, "rewards/chosen": -2.5881104469299316, "rewards/margins": 3.4220874309539795, "rewards/rejected": -6.010197639465332, "step": 728 }, { "epoch": 0.15, "learning_rate": 1.7021008403361345e-05, "logits/chosen": -2.244079113006592, "logits/rejected": -1.9179579019546509, "logps/chosen": -334.6098327636719, "logps/rejected": -318.0796203613281, "loss": 0.2865, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4356093406677246, "rewards/margins": 5.762797832489014, "rewards/rejected": -8.198407173156738, "step": 729 }, { "epoch": 0.15, "learning_rate": 1.7016806722689076e-05, "logits/chosen": -2.2572948932647705, "logits/rejected": -2.0175554752349854, "logps/chosen": -336.3603515625, "logps/rejected": -308.03411865234375, "loss": 0.6594, "rewards/accuracies": 0.75, "rewards/chosen": -2.2969253063201904, "rewards/margins": 2.8958585262298584, "rewards/rejected": -5.192783832550049, "step": 730 }, { "epoch": 0.15, "learning_rate": 1.701260504201681e-05, "logits/chosen": -1.9566154479980469, "logits/rejected": -2.1031384468078613, "logps/chosen": -163.4652557373047, "logps/rejected": -218.31568908691406, "loss": 0.4784, "rewards/accuracies": 0.8125, "rewards/chosen": -2.990065574645996, "rewards/margins": 2.536547899246216, "rewards/rejected": -5.526613235473633, "step": 731 }, { "epoch": 0.15, "learning_rate": 1.700840336134454e-05, "logits/chosen": -1.7026079893112183, "logits/rejected": -2.092538595199585, "logps/chosen": -382.9058837890625, "logps/rejected": -344.6308898925781, "loss": 0.3592, "rewards/accuracies": 0.875, "rewards/chosen": -2.7175614833831787, "rewards/margins": 3.662135124206543, "rewards/rejected": -6.379697322845459, "step": 732 }, { "epoch": 0.15, "learning_rate": 1.700420168067227e-05, "logits/chosen": -2.3577661514282227, "logits/rejected": -1.7167326211929321, "logps/chosen": -291.07647705078125, "logps/rejected": -293.70135498046875, "loss": 0.3766, "rewards/accuracies": 0.875, "rewards/chosen": -2.145514726638794, "rewards/margins": 3.0079023838043213, "rewards/rejected": -5.153417110443115, "step": 733 }, { "epoch": 0.15, "learning_rate": 1.7e-05, "logits/chosen": -1.9224162101745605, "logits/rejected": -2.0145130157470703, "logps/chosen": -301.79229736328125, "logps/rejected": -379.64312744140625, "loss": 0.3541, "rewards/accuracies": 0.8125, "rewards/chosen": -2.432405948638916, "rewards/margins": 3.0382261276245117, "rewards/rejected": -5.4706315994262695, "step": 734 }, { "epoch": 0.15, "learning_rate": 1.6995798319327733e-05, "logits/chosen": -1.698994517326355, "logits/rejected": -1.6362982988357544, "logps/chosen": -369.7939758300781, "logps/rejected": -318.4671936035156, "loss": 0.5482, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7628350257873535, "rewards/margins": 2.9005603790283203, "rewards/rejected": -5.663394927978516, "step": 735 }, { "epoch": 0.15, "learning_rate": 1.6991596638655464e-05, "logits/chosen": -1.9325566291809082, "logits/rejected": -1.7474031448364258, "logps/chosen": -197.885498046875, "logps/rejected": -297.7406005859375, "loss": 0.3897, "rewards/accuracies": 0.875, "rewards/chosen": -2.1120567321777344, "rewards/margins": 2.5959575176239014, "rewards/rejected": -4.708014011383057, "step": 736 }, { "epoch": 0.15, "learning_rate": 1.6987394957983194e-05, "logits/chosen": -2.037433624267578, "logits/rejected": -1.5862784385681152, "logps/chosen": -353.72991943359375, "logps/rejected": -349.3614501953125, "loss": 0.3791, "rewards/accuracies": 0.875, "rewards/chosen": -1.8576717376708984, "rewards/margins": 2.9837183952331543, "rewards/rejected": -4.841390132904053, "step": 737 }, { "epoch": 0.15, "learning_rate": 1.6983193277310924e-05, "logits/chosen": -2.2920360565185547, "logits/rejected": -2.04499888420105, "logps/chosen": -351.6866455078125, "logps/rejected": -353.43218994140625, "loss": 0.4162, "rewards/accuracies": 0.75, "rewards/chosen": -1.3684329986572266, "rewards/margins": 1.8947117328643799, "rewards/rejected": -3.2631447315216064, "step": 738 }, { "epoch": 0.15, "learning_rate": 1.6978991596638658e-05, "logits/chosen": -2.11529541015625, "logits/rejected": -1.5084693431854248, "logps/chosen": -350.6539611816406, "logps/rejected": -284.243408203125, "loss": 0.4971, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7144124507904053, "rewards/margins": 2.403724431991577, "rewards/rejected": -4.118136405944824, "step": 739 }, { "epoch": 0.15, "learning_rate": 1.6974789915966388e-05, "logits/chosen": -2.2437610626220703, "logits/rejected": -2.058847665786743, "logps/chosen": -365.35491943359375, "logps/rejected": -385.7453918457031, "loss": 0.5045, "rewards/accuracies": 0.75, "rewards/chosen": -1.2335970401763916, "rewards/margins": 2.8418381214141846, "rewards/rejected": -4.075435161590576, "step": 740 }, { "epoch": 0.16, "learning_rate": 1.6970588235294118e-05, "logits/chosen": -2.191389799118042, "logits/rejected": -1.9575977325439453, "logps/chosen": -381.34228515625, "logps/rejected": -313.0478210449219, "loss": 0.6515, "rewards/accuracies": 0.75, "rewards/chosen": -1.6277685165405273, "rewards/margins": 1.6641924381256104, "rewards/rejected": -3.291961193084717, "step": 741 }, { "epoch": 0.16, "learning_rate": 1.696638655462185e-05, "logits/chosen": -2.0234718322753906, "logits/rejected": -1.8011419773101807, "logps/chosen": -251.2014923095703, "logps/rejected": -293.146728515625, "loss": 0.2082, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7777428030967712, "rewards/margins": 4.249463081359863, "rewards/rejected": -5.027205944061279, "step": 742 }, { "epoch": 0.16, "learning_rate": 1.6962184873949582e-05, "logits/chosen": -1.864234447479248, "logits/rejected": -2.0068678855895996, "logps/chosen": -274.41192626953125, "logps/rejected": -315.5820617675781, "loss": 0.542, "rewards/accuracies": 0.8125, "rewards/chosen": -1.331112265586853, "rewards/margins": 4.016895294189453, "rewards/rejected": -5.3480072021484375, "step": 743 }, { "epoch": 0.16, "learning_rate": 1.6957983193277312e-05, "logits/chosen": -2.2664895057678223, "logits/rejected": -2.3592734336853027, "logps/chosen": -264.84539794921875, "logps/rejected": -315.04931640625, "loss": 0.1673, "rewards/accuracies": 0.875, "rewards/chosen": -1.0168516635894775, "rewards/margins": 4.741488456726074, "rewards/rejected": -5.758340358734131, "step": 744 }, { "epoch": 0.16, "learning_rate": 1.6953781512605042e-05, "logits/chosen": -2.037034034729004, "logits/rejected": -1.3760628700256348, "logps/chosen": -334.2329406738281, "logps/rejected": -353.3927307128906, "loss": 0.4428, "rewards/accuracies": 0.875, "rewards/chosen": -1.82315993309021, "rewards/margins": 2.4147167205810547, "rewards/rejected": -4.237876892089844, "step": 745 }, { "epoch": 0.16, "learning_rate": 1.6949579831932773e-05, "logits/chosen": -1.9302103519439697, "logits/rejected": -2.135316848754883, "logps/chosen": -240.90794372558594, "logps/rejected": -403.2741394042969, "loss": 0.3176, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8007187247276306, "rewards/margins": 3.941528558731079, "rewards/rejected": -4.742247104644775, "step": 746 }, { "epoch": 0.16, "learning_rate": 1.6945378151260506e-05, "logits/chosen": -1.7697516679763794, "logits/rejected": -1.861335039138794, "logps/chosen": -274.8431701660156, "logps/rejected": -358.81903076171875, "loss": 0.5667, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0028183460235596, "rewards/margins": 3.1203484535217285, "rewards/rejected": -5.123167037963867, "step": 747 }, { "epoch": 0.16, "learning_rate": 1.6941176470588237e-05, "logits/chosen": -2.134904384613037, "logits/rejected": -1.4478917121887207, "logps/chosen": -364.1796569824219, "logps/rejected": -301.8296813964844, "loss": 0.282, "rewards/accuracies": 0.875, "rewards/chosen": -0.9386551380157471, "rewards/margins": 3.4062767028808594, "rewards/rejected": -4.3449320793151855, "step": 748 }, { "epoch": 0.16, "learning_rate": 1.6936974789915967e-05, "logits/chosen": -2.1162972450256348, "logits/rejected": -1.95829176902771, "logps/chosen": -316.54095458984375, "logps/rejected": -275.2511901855469, "loss": 0.9433, "rewards/accuracies": 0.5, "rewards/chosen": -1.344026803970337, "rewards/margins": 1.6161044836044312, "rewards/rejected": -2.9601311683654785, "step": 749 }, { "epoch": 0.16, "learning_rate": 1.69327731092437e-05, "logits/chosen": -2.0439603328704834, "logits/rejected": -2.044919013977051, "logps/chosen": -273.7200927734375, "logps/rejected": -262.03143310546875, "loss": 0.32, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1411241292953491, "rewards/margins": 2.6149826049804688, "rewards/rejected": -3.756106376647949, "step": 750 }, { "epoch": 0.16, "learning_rate": 1.692857142857143e-05, "logits/chosen": -2.0982487201690674, "logits/rejected": -2.0909159183502197, "logps/chosen": -224.33486938476562, "logps/rejected": -326.01397705078125, "loss": 0.1559, "rewards/accuracies": 0.9375, "rewards/chosen": -1.608189344406128, "rewards/margins": 4.219444274902344, "rewards/rejected": -5.827633857727051, "step": 751 }, { "epoch": 0.16, "learning_rate": 1.692436974789916e-05, "logits/chosen": -2.2449052333831787, "logits/rejected": -1.840789794921875, "logps/chosen": -396.5846862792969, "logps/rejected": -487.5141906738281, "loss": 0.6014, "rewards/accuracies": 0.6875, "rewards/chosen": -2.068540573120117, "rewards/margins": 2.9587674140930176, "rewards/rejected": -5.027308464050293, "step": 752 }, { "epoch": 0.16, "learning_rate": 1.692016806722689e-05, "logits/chosen": -2.167710781097412, "logits/rejected": -2.1640191078186035, "logps/chosen": -353.65362548828125, "logps/rejected": -397.40386962890625, "loss": 0.8355, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2302634716033936, "rewards/margins": 2.131781578063965, "rewards/rejected": -3.3620448112487793, "step": 753 }, { "epoch": 0.16, "learning_rate": 1.6915966386554625e-05, "logits/chosen": -1.7070573568344116, "logits/rejected": -1.8579171895980835, "logps/chosen": -252.21473693847656, "logps/rejected": -327.1890869140625, "loss": 0.3548, "rewards/accuracies": 0.75, "rewards/chosen": -1.6616603136062622, "rewards/margins": 3.498135566711426, "rewards/rejected": -5.159795761108398, "step": 754 }, { "epoch": 0.16, "learning_rate": 1.6911764705882355e-05, "logits/chosen": -2.249856472015381, "logits/rejected": -1.8819094896316528, "logps/chosen": -373.22430419921875, "logps/rejected": -338.3874206542969, "loss": 0.4051, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7668718099594116, "rewards/margins": 2.1771368980407715, "rewards/rejected": -2.9440085887908936, "step": 755 }, { "epoch": 0.16, "learning_rate": 1.6907563025210085e-05, "logits/chosen": -2.296297550201416, "logits/rejected": -1.9027172327041626, "logps/chosen": -415.1441650390625, "logps/rejected": -323.9563903808594, "loss": 0.2058, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8324196934700012, "rewards/margins": 4.69835090637207, "rewards/rejected": -5.530771255493164, "step": 756 }, { "epoch": 0.16, "learning_rate": 1.6903361344537815e-05, "logits/chosen": -2.232555866241455, "logits/rejected": -1.6718475818634033, "logps/chosen": -339.8697204589844, "logps/rejected": -313.59759521484375, "loss": 0.2137, "rewards/accuracies": 0.9375, "rewards/chosen": -0.11247356981039047, "rewards/margins": 3.270537853240967, "rewards/rejected": -3.38301157951355, "step": 757 }, { "epoch": 0.16, "learning_rate": 1.689915966386555e-05, "logits/chosen": -2.215729236602783, "logits/rejected": -1.5736956596374512, "logps/chosen": -286.5233459472656, "logps/rejected": -239.6471405029297, "loss": 0.3545, "rewards/accuracies": 0.875, "rewards/chosen": -1.2287719249725342, "rewards/margins": 2.747135877609253, "rewards/rejected": -3.975907802581787, "step": 758 }, { "epoch": 0.16, "learning_rate": 1.689495798319328e-05, "logits/chosen": -1.9748581647872925, "logits/rejected": -1.919254183769226, "logps/chosen": -288.23077392578125, "logps/rejected": -351.3341369628906, "loss": 0.2988, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7039434909820557, "rewards/margins": 3.1637625694274902, "rewards/rejected": -4.867705821990967, "step": 759 }, { "epoch": 0.16, "learning_rate": 1.689075630252101e-05, "logits/chosen": -1.8948873281478882, "logits/rejected": -1.9115080833435059, "logps/chosen": -259.0742492675781, "logps/rejected": -404.9477233886719, "loss": 0.2827, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1262116432189941, "rewards/margins": 3.5397770404815674, "rewards/rejected": -4.665988922119141, "step": 760 }, { "epoch": 0.16, "learning_rate": 1.688655462184874e-05, "logits/chosen": -2.2461085319519043, "logits/rejected": -1.8397977352142334, "logps/chosen": -323.16986083984375, "logps/rejected": -281.90606689453125, "loss": 0.2954, "rewards/accuracies": 0.875, "rewards/chosen": -1.00258469581604, "rewards/margins": 2.2382278442382812, "rewards/rejected": -3.2408125400543213, "step": 761 }, { "epoch": 0.16, "learning_rate": 1.6882352941176473e-05, "logits/chosen": -2.2737436294555664, "logits/rejected": -2.0214569568634033, "logps/chosen": -253.75103759765625, "logps/rejected": -264.873779296875, "loss": 0.6854, "rewards/accuracies": 0.5625, "rewards/chosen": -2.4338724613189697, "rewards/margins": 2.171159505844116, "rewards/rejected": -4.605031967163086, "step": 762 }, { "epoch": 0.16, "learning_rate": 1.6878151260504203e-05, "logits/chosen": -2.2242767810821533, "logits/rejected": -2.18857479095459, "logps/chosen": -441.96405029296875, "logps/rejected": -390.2977294921875, "loss": 0.5068, "rewards/accuracies": 0.75, "rewards/chosen": -1.5113966464996338, "rewards/margins": 2.2426154613494873, "rewards/rejected": -3.754012107849121, "step": 763 }, { "epoch": 0.16, "learning_rate": 1.6873949579831934e-05, "logits/chosen": -2.274514675140381, "logits/rejected": -1.9777116775512695, "logps/chosen": -320.8673400878906, "logps/rejected": -258.4037170410156, "loss": 0.5442, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5064418315887451, "rewards/margins": 2.0420022010803223, "rewards/rejected": -3.5484437942504883, "step": 764 }, { "epoch": 0.16, "learning_rate": 1.6869747899159664e-05, "logits/chosen": -2.1914801597595215, "logits/rejected": -2.0470032691955566, "logps/chosen": -340.25311279296875, "logps/rejected": -362.69256591796875, "loss": 0.185, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1276934146881104, "rewards/margins": 4.396705627441406, "rewards/rejected": -5.524399280548096, "step": 765 }, { "epoch": 0.16, "learning_rate": 1.6865546218487397e-05, "logits/chosen": -2.424621343612671, "logits/rejected": -1.9833660125732422, "logps/chosen": -382.83404541015625, "logps/rejected": -272.07977294921875, "loss": 0.4559, "rewards/accuracies": 0.6875, "rewards/chosen": -1.056641697883606, "rewards/margins": 1.9395010471343994, "rewards/rejected": -2.996142864227295, "step": 766 }, { "epoch": 0.16, "learning_rate": 1.6861344537815128e-05, "logits/chosen": -2.056659698486328, "logits/rejected": -2.1205928325653076, "logps/chosen": -261.20574951171875, "logps/rejected": -331.51470947265625, "loss": 0.2219, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2542498111724854, "rewards/margins": 3.1566002368927, "rewards/rejected": -4.410849571228027, "step": 767 }, { "epoch": 0.16, "learning_rate": 1.6857142857142858e-05, "logits/chosen": -1.8308494091033936, "logits/rejected": -1.9774675369262695, "logps/chosen": -333.05780029296875, "logps/rejected": -360.0506591796875, "loss": 0.1289, "rewards/accuracies": 1.0, "rewards/chosen": -0.9276551604270935, "rewards/margins": 3.8657102584838867, "rewards/rejected": -4.793365478515625, "step": 768 }, { "epoch": 0.16, "learning_rate": 1.6852941176470588e-05, "logits/chosen": -2.085810422897339, "logits/rejected": -2.072783946990967, "logps/chosen": -315.7756042480469, "logps/rejected": -279.2412109375, "loss": 0.2876, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6299153566360474, "rewards/margins": 2.3403263092041016, "rewards/rejected": -3.9702417850494385, "step": 769 }, { "epoch": 0.16, "learning_rate": 1.6848739495798322e-05, "logits/chosen": -2.5391578674316406, "logits/rejected": -1.9431068897247314, "logps/chosen": -314.40704345703125, "logps/rejected": -261.99078369140625, "loss": 0.6811, "rewards/accuracies": 0.75, "rewards/chosen": -1.90847647190094, "rewards/margins": 1.9313626289367676, "rewards/rejected": -3.839838981628418, "step": 770 }, { "epoch": 0.16, "learning_rate": 1.6844537815126052e-05, "logits/chosen": -2.3814117908477783, "logits/rejected": -1.6420822143554688, "logps/chosen": -422.2216796875, "logps/rejected": -282.3962707519531, "loss": 0.1183, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9640604257583618, "rewards/margins": 3.6622517108917236, "rewards/rejected": -4.626311779022217, "step": 771 }, { "epoch": 0.16, "learning_rate": 1.6840336134453782e-05, "logits/chosen": -1.9254976511001587, "logits/rejected": -2.0351967811584473, "logps/chosen": -272.7548828125, "logps/rejected": -327.5586853027344, "loss": 0.1996, "rewards/accuracies": 0.9375, "rewards/chosen": -1.559951663017273, "rewards/margins": 3.5174148082733154, "rewards/rejected": -5.077365875244141, "step": 772 }, { "epoch": 0.16, "learning_rate": 1.6836134453781516e-05, "logits/chosen": -2.0609374046325684, "logits/rejected": -1.9988148212432861, "logps/chosen": -364.296875, "logps/rejected": -342.71685791015625, "loss": 0.3271, "rewards/accuracies": 0.75, "rewards/chosen": -1.0422489643096924, "rewards/margins": 2.7478864192962646, "rewards/rejected": -3.790135383605957, "step": 773 }, { "epoch": 0.16, "learning_rate": 1.6831932773109246e-05, "logits/chosen": -2.2556371688842773, "logits/rejected": -1.8432490825653076, "logps/chosen": -359.98486328125, "logps/rejected": -363.55517578125, "loss": 0.4182, "rewards/accuracies": 0.875, "rewards/chosen": -1.2572693824768066, "rewards/margins": 2.8683996200561523, "rewards/rejected": -4.125669002532959, "step": 774 }, { "epoch": 0.16, "learning_rate": 1.6827731092436976e-05, "logits/chosen": -2.075491428375244, "logits/rejected": -1.6872649192810059, "logps/chosen": -248.0001220703125, "logps/rejected": -273.78009033203125, "loss": 0.4588, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9449328184127808, "rewards/margins": 2.391058921813965, "rewards/rejected": -4.335991859436035, "step": 775 }, { "epoch": 0.16, "learning_rate": 1.6823529411764706e-05, "logits/chosen": -2.1104815006256104, "logits/rejected": -1.71555495262146, "logps/chosen": -255.82373046875, "logps/rejected": -241.81765747070312, "loss": 0.2554, "rewards/accuracies": 0.875, "rewards/chosen": -1.7972748279571533, "rewards/margins": 3.034355640411377, "rewards/rejected": -4.831630706787109, "step": 776 }, { "epoch": 0.16, "learning_rate": 1.681932773109244e-05, "logits/chosen": -2.084442615509033, "logits/rejected": -1.8904378414154053, "logps/chosen": -375.68731689453125, "logps/rejected": -372.9009704589844, "loss": 0.362, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0301165580749512, "rewards/margins": 3.63201904296875, "rewards/rejected": -4.662135601043701, "step": 777 }, { "epoch": 0.16, "learning_rate": 1.681512605042017e-05, "logits/chosen": -2.0034523010253906, "logits/rejected": -2.2124459743499756, "logps/chosen": -207.46792602539062, "logps/rejected": -279.690673828125, "loss": 0.3807, "rewards/accuracies": 0.875, "rewards/chosen": -1.4096014499664307, "rewards/margins": 3.102025032043457, "rewards/rejected": -4.511626243591309, "step": 778 }, { "epoch": 0.16, "learning_rate": 1.68109243697479e-05, "logits/chosen": -2.0103211402893066, "logits/rejected": -1.9724972248077393, "logps/chosen": -331.57659912109375, "logps/rejected": -597.255126953125, "loss": 0.2127, "rewards/accuracies": 0.875, "rewards/chosen": -1.7209285497665405, "rewards/margins": 3.5068488121032715, "rewards/rejected": -5.227777481079102, "step": 779 }, { "epoch": 0.16, "learning_rate": 1.680672268907563e-05, "logits/chosen": -2.1506237983703613, "logits/rejected": -2.103100538253784, "logps/chosen": -268.5717468261719, "logps/rejected": -291.78289794921875, "loss": 0.4498, "rewards/accuracies": 0.75, "rewards/chosen": -2.254150390625, "rewards/margins": 2.6157407760620117, "rewards/rejected": -4.86989164352417, "step": 780 }, { "epoch": 0.16, "learning_rate": 1.6802521008403364e-05, "logits/chosen": -2.157309055328369, "logits/rejected": -1.8166359663009644, "logps/chosen": -476.0903015136719, "logps/rejected": -417.87567138671875, "loss": 0.4418, "rewards/accuracies": 0.875, "rewards/chosen": -1.5498747825622559, "rewards/margins": 2.613753318786621, "rewards/rejected": -4.163628101348877, "step": 781 }, { "epoch": 0.16, "learning_rate": 1.6798319327731095e-05, "logits/chosen": -2.344633102416992, "logits/rejected": -2.0754342079162598, "logps/chosen": -395.18255615234375, "logps/rejected": -377.35589599609375, "loss": 0.4607, "rewards/accuracies": 0.75, "rewards/chosen": -1.523148536682129, "rewards/margins": 2.0642309188842773, "rewards/rejected": -3.5873796939849854, "step": 782 }, { "epoch": 0.16, "learning_rate": 1.6794117647058825e-05, "logits/chosen": -2.101289987564087, "logits/rejected": -1.602689504623413, "logps/chosen": -312.36981201171875, "logps/rejected": -350.0341796875, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -1.7673592567443848, "rewards/margins": 3.626525640487671, "rewards/rejected": -5.393884658813477, "step": 783 }, { "epoch": 0.16, "learning_rate": 1.6789915966386555e-05, "logits/chosen": -2.4433979988098145, "logits/rejected": -2.4493441581726074, "logps/chosen": -371.55267333984375, "logps/rejected": -392.18963623046875, "loss": 0.3999, "rewards/accuracies": 0.875, "rewards/chosen": -1.6184519529342651, "rewards/margins": 1.6528499126434326, "rewards/rejected": -3.2713019847869873, "step": 784 }, { "epoch": 0.16, "learning_rate": 1.678571428571429e-05, "logits/chosen": -2.0333924293518066, "logits/rejected": -2.0256452560424805, "logps/chosen": -319.7632141113281, "logps/rejected": -403.7701110839844, "loss": 0.0919, "rewards/accuracies": 1.0, "rewards/chosen": -0.9684919118881226, "rewards/margins": 4.64276123046875, "rewards/rejected": -5.611252784729004, "step": 785 }, { "epoch": 0.16, "learning_rate": 1.678151260504202e-05, "logits/chosen": -1.9279687404632568, "logits/rejected": -1.769026279449463, "logps/chosen": -356.0840148925781, "logps/rejected": -363.72796630859375, "loss": 0.349, "rewards/accuracies": 0.875, "rewards/chosen": -1.9372475147247314, "rewards/margins": 3.156273365020752, "rewards/rejected": -5.093520641326904, "step": 786 }, { "epoch": 0.16, "learning_rate": 1.677731092436975e-05, "logits/chosen": -1.9117867946624756, "logits/rejected": -1.5678147077560425, "logps/chosen": -448.7518310546875, "logps/rejected": -339.37255859375, "loss": 0.5029, "rewards/accuracies": 0.75, "rewards/chosen": -2.287269115447998, "rewards/margins": 1.6798906326293945, "rewards/rejected": -3.9671595096588135, "step": 787 }, { "epoch": 0.16, "learning_rate": 1.677310924369748e-05, "logits/chosen": -2.0754055976867676, "logits/rejected": -1.9076216220855713, "logps/chosen": -281.4664611816406, "logps/rejected": -297.4395751953125, "loss": 0.4817, "rewards/accuracies": 0.6875, "rewards/chosen": -1.540342926979065, "rewards/margins": 3.485705614089966, "rewards/rejected": -5.02604866027832, "step": 788 }, { "epoch": 0.17, "learning_rate": 1.6768907563025213e-05, "logits/chosen": -2.2240195274353027, "logits/rejected": -1.9126520156860352, "logps/chosen": -228.79345703125, "logps/rejected": -254.69720458984375, "loss": 0.1187, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7896902561187744, "rewards/margins": 4.832139015197754, "rewards/rejected": -7.621829509735107, "step": 789 }, { "epoch": 0.17, "learning_rate": 1.6764705882352943e-05, "logits/chosen": -2.1068506240844727, "logits/rejected": -1.8672306537628174, "logps/chosen": -302.209716796875, "logps/rejected": -310.5289306640625, "loss": 0.2376, "rewards/accuracies": 0.875, "rewards/chosen": -2.1633200645446777, "rewards/margins": 3.0395374298095703, "rewards/rejected": -5.20285701751709, "step": 790 }, { "epoch": 0.17, "learning_rate": 1.6760504201680673e-05, "logits/chosen": -1.9448015689849854, "logits/rejected": -1.9556479454040527, "logps/chosen": -405.7635803222656, "logps/rejected": -362.47381591796875, "loss": 0.2758, "rewards/accuracies": 0.875, "rewards/chosen": -1.1993584632873535, "rewards/margins": 3.4291772842407227, "rewards/rejected": -4.628536224365234, "step": 791 }, { "epoch": 0.17, "learning_rate": 1.6756302521008404e-05, "logits/chosen": -1.7859869003295898, "logits/rejected": -1.378179669380188, "logps/chosen": -264.8746032714844, "logps/rejected": -270.6776428222656, "loss": 0.5102, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3737199306488037, "rewards/margins": 3.250246286392212, "rewards/rejected": -5.623966217041016, "step": 792 }, { "epoch": 0.17, "learning_rate": 1.6752100840336137e-05, "logits/chosen": -2.1382250785827637, "logits/rejected": -2.0095725059509277, "logps/chosen": -308.4726257324219, "logps/rejected": -288.9901428222656, "loss": 0.1775, "rewards/accuracies": 0.9375, "rewards/chosen": -1.295863151550293, "rewards/margins": 4.498048305511475, "rewards/rejected": -5.793911933898926, "step": 793 }, { "epoch": 0.17, "learning_rate": 1.6747899159663867e-05, "logits/chosen": -2.27700138092041, "logits/rejected": -1.8501858711242676, "logps/chosen": -448.6407470703125, "logps/rejected": -402.751220703125, "loss": 0.3421, "rewards/accuracies": 0.875, "rewards/chosen": -1.2299268245697021, "rewards/margins": 3.9400675296783447, "rewards/rejected": -5.169994354248047, "step": 794 }, { "epoch": 0.17, "learning_rate": 1.6743697478991598e-05, "logits/chosen": -1.8190721273422241, "logits/rejected": -1.5961840152740479, "logps/chosen": -237.38885498046875, "logps/rejected": -302.30084228515625, "loss": 0.1003, "rewards/accuracies": 1.0, "rewards/chosen": -2.062920570373535, "rewards/margins": 4.599336624145508, "rewards/rejected": -6.662257194519043, "step": 795 }, { "epoch": 0.17, "learning_rate": 1.673949579831933e-05, "logits/chosen": -2.145782947540283, "logits/rejected": -1.9014884233474731, "logps/chosen": -347.6669006347656, "logps/rejected": -322.4659118652344, "loss": 0.3878, "rewards/accuracies": 0.75, "rewards/chosen": -1.8265118598937988, "rewards/margins": 2.844614267349243, "rewards/rejected": -4.671126365661621, "step": 796 }, { "epoch": 0.17, "learning_rate": 1.673529411764706e-05, "logits/chosen": -2.083191394805908, "logits/rejected": -2.0543155670166016, "logps/chosen": -311.47314453125, "logps/rejected": -414.01861572265625, "loss": 0.2183, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5698487758636475, "rewards/margins": 3.643481731414795, "rewards/rejected": -5.213330268859863, "step": 797 }, { "epoch": 0.17, "learning_rate": 1.673109243697479e-05, "logits/chosen": -2.074673652648926, "logits/rejected": -1.9473825693130493, "logps/chosen": -284.04864501953125, "logps/rejected": -263.177734375, "loss": 0.5197, "rewards/accuracies": 0.875, "rewards/chosen": -2.5385236740112305, "rewards/margins": 1.558686375617981, "rewards/rejected": -4.097209930419922, "step": 798 }, { "epoch": 0.17, "learning_rate": 1.6726890756302522e-05, "logits/chosen": -1.9169516563415527, "logits/rejected": -2.1017541885375977, "logps/chosen": -209.96096801757812, "logps/rejected": -317.68707275390625, "loss": 0.3793, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8720102310180664, "rewards/margins": 3.108508348464966, "rewards/rejected": -4.980518341064453, "step": 799 }, { "epoch": 0.17, "learning_rate": 1.6722689075630255e-05, "logits/chosen": -1.9574940204620361, "logits/rejected": -1.3475319147109985, "logps/chosen": -296.0467834472656, "logps/rejected": -308.9198913574219, "loss": 0.4556, "rewards/accuracies": 0.75, "rewards/chosen": -1.5379273891448975, "rewards/margins": 3.087721347808838, "rewards/rejected": -4.625648498535156, "step": 800 }, { "epoch": 0.17, "learning_rate": 1.6718487394957986e-05, "logits/chosen": -2.219754457473755, "logits/rejected": -1.9547958374023438, "logps/chosen": -409.7639465332031, "logps/rejected": -411.67901611328125, "loss": 0.4537, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3477132320404053, "rewards/margins": 3.5002918243408203, "rewards/rejected": -4.848004341125488, "step": 801 }, { "epoch": 0.17, "learning_rate": 1.6714285714285716e-05, "logits/chosen": -2.143476724624634, "logits/rejected": -1.8891558647155762, "logps/chosen": -360.0707092285156, "logps/rejected": -391.6202087402344, "loss": 0.1866, "rewards/accuracies": 0.875, "rewards/chosen": -1.6756775379180908, "rewards/margins": 4.436486721038818, "rewards/rejected": -6.112164497375488, "step": 802 }, { "epoch": 0.17, "learning_rate": 1.6710084033613446e-05, "logits/chosen": -1.8714195489883423, "logits/rejected": -1.5319408178329468, "logps/chosen": -298.5542907714844, "logps/rejected": -298.84100341796875, "loss": 0.2397, "rewards/accuracies": 0.875, "rewards/chosen": -1.2462854385375977, "rewards/margins": 3.2517969608306885, "rewards/rejected": -4.498082160949707, "step": 803 }, { "epoch": 0.17, "learning_rate": 1.670588235294118e-05, "logits/chosen": -2.017505645751953, "logits/rejected": -1.9519598484039307, "logps/chosen": -360.55242919921875, "logps/rejected": -346.94952392578125, "loss": 0.3809, "rewards/accuracies": 0.75, "rewards/chosen": -1.0962398052215576, "rewards/margins": 3.0482583045959473, "rewards/rejected": -4.144497871398926, "step": 804 }, { "epoch": 0.17, "learning_rate": 1.670168067226891e-05, "logits/chosen": -2.340400218963623, "logits/rejected": -2.199429512023926, "logps/chosen": -311.14617919921875, "logps/rejected": -276.6678466796875, "loss": 0.455, "rewards/accuracies": 0.75, "rewards/chosen": -1.7301533222198486, "rewards/margins": 2.1489012241363525, "rewards/rejected": -3.8790547847747803, "step": 805 }, { "epoch": 0.17, "learning_rate": 1.669747899159664e-05, "logits/chosen": -1.7521651983261108, "logits/rejected": -2.2176084518432617, "logps/chosen": -144.79974365234375, "logps/rejected": -236.9078369140625, "loss": 0.3722, "rewards/accuracies": 0.75, "rewards/chosen": -1.937251329421997, "rewards/margins": 2.42926025390625, "rewards/rejected": -4.366511344909668, "step": 806 }, { "epoch": 0.17, "learning_rate": 1.669327731092437e-05, "logits/chosen": -2.192504644393921, "logits/rejected": -1.9060466289520264, "logps/chosen": -319.04949951171875, "logps/rejected": -304.0797424316406, "loss": 0.457, "rewards/accuracies": 0.75, "rewards/chosen": -1.3857057094573975, "rewards/margins": 2.898308277130127, "rewards/rejected": -4.284013748168945, "step": 807 }, { "epoch": 0.17, "learning_rate": 1.6689075630252104e-05, "logits/chosen": -2.053361177444458, "logits/rejected": -1.803290605545044, "logps/chosen": -276.19207763671875, "logps/rejected": -323.74530029296875, "loss": 0.6948, "rewards/accuracies": 0.8125, "rewards/chosen": -2.045436143875122, "rewards/margins": 1.7818390130996704, "rewards/rejected": -3.827275276184082, "step": 808 }, { "epoch": 0.17, "learning_rate": 1.6684873949579834e-05, "logits/chosen": -1.9378191232681274, "logits/rejected": -1.8617969751358032, "logps/chosen": -305.732666015625, "logps/rejected": -403.3728942871094, "loss": 0.1649, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0122315883636475, "rewards/margins": 3.1898858547210693, "rewards/rejected": -4.202117919921875, "step": 809 }, { "epoch": 0.17, "learning_rate": 1.6680672268907564e-05, "logits/chosen": -1.5843219757080078, "logits/rejected": -1.7740013599395752, "logps/chosen": -348.0705261230469, "logps/rejected": -352.1595458984375, "loss": 0.9016, "rewards/accuracies": 0.75, "rewards/chosen": -3.067608594894409, "rewards/margins": 1.5226538181304932, "rewards/rejected": -4.590262413024902, "step": 810 }, { "epoch": 0.17, "learning_rate": 1.6676470588235295e-05, "logits/chosen": -2.06152606010437, "logits/rejected": -1.969719409942627, "logps/chosen": -382.32061767578125, "logps/rejected": -324.88482666015625, "loss": 0.3366, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2894357442855835, "rewards/margins": 3.620511054992676, "rewards/rejected": -4.909946918487549, "step": 811 }, { "epoch": 0.17, "learning_rate": 1.6672268907563028e-05, "logits/chosen": -2.0924124717712402, "logits/rejected": -1.4267748594284058, "logps/chosen": -368.8747863769531, "logps/rejected": -271.54833984375, "loss": 0.7101, "rewards/accuracies": 0.6875, "rewards/chosen": -2.334439754486084, "rewards/margins": 2.4245223999023438, "rewards/rejected": -4.7589616775512695, "step": 812 }, { "epoch": 0.17, "learning_rate": 1.666806722689076e-05, "logits/chosen": -2.0999631881713867, "logits/rejected": -2.0761067867279053, "logps/chosen": -249.6148681640625, "logps/rejected": -257.709228515625, "loss": 0.2051, "rewards/accuracies": 0.875, "rewards/chosen": -1.5720138549804688, "rewards/margins": 3.9965007305145264, "rewards/rejected": -5.568514823913574, "step": 813 }, { "epoch": 0.17, "learning_rate": 1.666386554621849e-05, "logits/chosen": -1.9476782083511353, "logits/rejected": -1.9972881078720093, "logps/chosen": -261.1157531738281, "logps/rejected": -280.040283203125, "loss": 0.8267, "rewards/accuracies": 0.625, "rewards/chosen": -2.0977253913879395, "rewards/margins": 1.4153145551681519, "rewards/rejected": -3.5130398273468018, "step": 814 }, { "epoch": 0.17, "learning_rate": 1.665966386554622e-05, "logits/chosen": -1.8016812801361084, "logits/rejected": -2.0127463340759277, "logps/chosen": -167.29953002929688, "logps/rejected": -262.3382263183594, "loss": 0.347, "rewards/accuracies": 0.75, "rewards/chosen": -1.9207371473312378, "rewards/margins": 2.5700247287750244, "rewards/rejected": -4.490761756896973, "step": 815 }, { "epoch": 0.17, "learning_rate": 1.6655462184873953e-05, "logits/chosen": -2.1510488986968994, "logits/rejected": -1.751096487045288, "logps/chosen": -361.0743408203125, "logps/rejected": -333.98529052734375, "loss": 0.4481, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6933932304382324, "rewards/margins": 1.940618634223938, "rewards/rejected": -3.634012222290039, "step": 816 }, { "epoch": 0.17, "learning_rate": 1.6651260504201683e-05, "logits/chosen": -2.1181344985961914, "logits/rejected": -1.837376356124878, "logps/chosen": -268.8865966796875, "logps/rejected": -276.5599060058594, "loss": 0.1769, "rewards/accuracies": 1.0, "rewards/chosen": -1.2857040166854858, "rewards/margins": 2.93165922164917, "rewards/rejected": -4.217363357543945, "step": 817 }, { "epoch": 0.17, "learning_rate": 1.6647058823529413e-05, "logits/chosen": -2.012856960296631, "logits/rejected": -1.591303825378418, "logps/chosen": -417.49609375, "logps/rejected": -322.93426513671875, "loss": 0.3277, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9792369604110718, "rewards/margins": 3.5162994861602783, "rewards/rejected": -4.495536804199219, "step": 818 }, { "epoch": 0.17, "learning_rate": 1.6642857142857147e-05, "logits/chosen": -1.9322481155395508, "logits/rejected": -2.034684181213379, "logps/chosen": -213.40687561035156, "logps/rejected": -292.88848876953125, "loss": 0.3431, "rewards/accuracies": 0.8125, "rewards/chosen": -2.089569568634033, "rewards/margins": 2.6935977935791016, "rewards/rejected": -4.783167362213135, "step": 819 }, { "epoch": 0.17, "learning_rate": 1.6638655462184877e-05, "logits/chosen": -1.8864178657531738, "logits/rejected": -2.1955924034118652, "logps/chosen": -292.01617431640625, "logps/rejected": -304.77667236328125, "loss": 0.409, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6079450845718384, "rewards/margins": 2.152806043624878, "rewards/rejected": -3.760751247406006, "step": 820 }, { "epoch": 0.17, "learning_rate": 1.6634453781512607e-05, "logits/chosen": -1.7228481769561768, "logits/rejected": -2.0135490894317627, "logps/chosen": -197.5811309814453, "logps/rejected": -315.9188232421875, "loss": 0.1827, "rewards/accuracies": 0.9375, "rewards/chosen": -2.016920328140259, "rewards/margins": 3.6686692237854004, "rewards/rejected": -5.685589790344238, "step": 821 }, { "epoch": 0.17, "learning_rate": 1.6630252100840337e-05, "logits/chosen": -1.976762294769287, "logits/rejected": -1.8989331722259521, "logps/chosen": -391.1483154296875, "logps/rejected": -490.8774108886719, "loss": 0.1226, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8597344756126404, "rewards/margins": 4.330629348754883, "rewards/rejected": -5.190363883972168, "step": 822 }, { "epoch": 0.17, "learning_rate": 1.662605042016807e-05, "logits/chosen": -2.1878890991210938, "logits/rejected": -2.1649534702301025, "logps/chosen": -340.9357604980469, "logps/rejected": -296.375, "loss": 0.2849, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1962876319885254, "rewards/margins": 2.259341239929199, "rewards/rejected": -3.4556286334991455, "step": 823 }, { "epoch": 0.17, "learning_rate": 1.66218487394958e-05, "logits/chosen": -1.9601835012435913, "logits/rejected": -1.541343331336975, "logps/chosen": -321.4696350097656, "logps/rejected": -303.7713623046875, "loss": 0.2457, "rewards/accuracies": 0.875, "rewards/chosen": -0.7580763697624207, "rewards/margins": 4.0705742835998535, "rewards/rejected": -4.82865047454834, "step": 824 }, { "epoch": 0.17, "learning_rate": 1.661764705882353e-05, "logits/chosen": -2.2011148929595947, "logits/rejected": -1.8671495914459229, "logps/chosen": -333.15191650390625, "logps/rejected": -302.48480224609375, "loss": 0.5476, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1909797191619873, "rewards/margins": 2.1786837577819824, "rewards/rejected": -3.369663715362549, "step": 825 }, { "epoch": 0.17, "learning_rate": 1.661344537815126e-05, "logits/chosen": -1.973969578742981, "logits/rejected": -1.5397248268127441, "logps/chosen": -454.9148254394531, "logps/rejected": -419.6549377441406, "loss": 0.0814, "rewards/accuracies": 1.0, "rewards/chosen": -0.6346754431724548, "rewards/margins": 3.710078239440918, "rewards/rejected": -4.344753742218018, "step": 826 }, { "epoch": 0.17, "learning_rate": 1.6609243697478995e-05, "logits/chosen": -2.0503973960876465, "logits/rejected": -1.7811187505722046, "logps/chosen": -194.16696166992188, "logps/rejected": -224.06613159179688, "loss": 0.2646, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3351134061813354, "rewards/margins": 2.9045636653900146, "rewards/rejected": -4.2396769523620605, "step": 827 }, { "epoch": 0.17, "learning_rate": 1.6605042016806725e-05, "logits/chosen": -2.504589080810547, "logits/rejected": -1.7091355323791504, "logps/chosen": -417.6837158203125, "logps/rejected": -321.0303039550781, "loss": 0.2736, "rewards/accuracies": 0.8125, "rewards/chosen": -0.787346601486206, "rewards/margins": 3.2485744953155518, "rewards/rejected": -4.035921096801758, "step": 828 }, { "epoch": 0.17, "learning_rate": 1.6600840336134456e-05, "logits/chosen": -2.1851143836975098, "logits/rejected": -1.783128261566162, "logps/chosen": -325.93475341796875, "logps/rejected": -399.90081787109375, "loss": 0.2158, "rewards/accuracies": 0.875, "rewards/chosen": -1.199167251586914, "rewards/margins": 3.5577316284179688, "rewards/rejected": -4.756898880004883, "step": 829 }, { "epoch": 0.17, "learning_rate": 1.6596638655462186e-05, "logits/chosen": -1.9567270278930664, "logits/rejected": -1.649950385093689, "logps/chosen": -300.76953125, "logps/rejected": -272.15728759765625, "loss": 0.3177, "rewards/accuracies": 0.75, "rewards/chosen": -1.7872998714447021, "rewards/margins": 2.7889013290405273, "rewards/rejected": -4.576201438903809, "step": 830 }, { "epoch": 0.17, "learning_rate": 1.659243697478992e-05, "logits/chosen": -1.9760212898254395, "logits/rejected": -1.7709592580795288, "logps/chosen": -315.890380859375, "logps/rejected": -335.98797607421875, "loss": 0.5109, "rewards/accuracies": 0.875, "rewards/chosen": -1.7812278270721436, "rewards/margins": 3.208256721496582, "rewards/rejected": -4.989484786987305, "step": 831 }, { "epoch": 0.17, "learning_rate": 1.658823529411765e-05, "logits/chosen": -1.936383605003357, "logits/rejected": -1.6465673446655273, "logps/chosen": -330.939453125, "logps/rejected": -316.66668701171875, "loss": 0.165, "rewards/accuracies": 0.875, "rewards/chosen": -1.0802595615386963, "rewards/margins": 3.58834171295166, "rewards/rejected": -4.6686015129089355, "step": 832 }, { "epoch": 0.17, "learning_rate": 1.658403361344538e-05, "logits/chosen": -2.3320469856262207, "logits/rejected": -2.003618001937866, "logps/chosen": -337.9537048339844, "logps/rejected": -284.35986328125, "loss": 0.4036, "rewards/accuracies": 0.875, "rewards/chosen": -0.8827441930770874, "rewards/margins": 2.307610511779785, "rewards/rejected": -3.190354824066162, "step": 833 }, { "epoch": 0.17, "learning_rate": 1.657983193277311e-05, "logits/chosen": -2.048344612121582, "logits/rejected": -1.9014158248901367, "logps/chosen": -216.5763397216797, "logps/rejected": -269.81683349609375, "loss": 0.289, "rewards/accuracies": 0.875, "rewards/chosen": -1.364983081817627, "rewards/margins": 2.904245376586914, "rewards/rejected": -4.269228458404541, "step": 834 }, { "epoch": 0.17, "learning_rate": 1.6575630252100844e-05, "logits/chosen": -1.6831631660461426, "logits/rejected": -1.8635473251342773, "logps/chosen": -227.89779663085938, "logps/rejected": -315.51690673828125, "loss": 0.4791, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2472732067108154, "rewards/margins": 2.108879327774048, "rewards/rejected": -4.356152534484863, "step": 835 }, { "epoch": 0.17, "learning_rate": 1.6571428571428574e-05, "logits/chosen": -1.9859883785247803, "logits/rejected": -1.6807833909988403, "logps/chosen": -338.3689880371094, "logps/rejected": -323.92547607421875, "loss": 0.4065, "rewards/accuracies": 0.875, "rewards/chosen": -2.162870407104492, "rewards/margins": 3.001528263092041, "rewards/rejected": -5.164398670196533, "step": 836 }, { "epoch": 0.18, "learning_rate": 1.6567226890756304e-05, "logits/chosen": -1.9614168405532837, "logits/rejected": -1.7866511344909668, "logps/chosen": -331.42034912109375, "logps/rejected": -349.8808288574219, "loss": 0.5605, "rewards/accuracies": 0.75, "rewards/chosen": -2.6788291931152344, "rewards/margins": 2.541520595550537, "rewards/rejected": -5.220349311828613, "step": 837 }, { "epoch": 0.18, "learning_rate": 1.6563025210084034e-05, "logits/chosen": -2.057004928588867, "logits/rejected": -1.7994532585144043, "logps/chosen": -362.586669921875, "logps/rejected": -332.658447265625, "loss": 0.4787, "rewards/accuracies": 0.75, "rewards/chosen": -1.59127938747406, "rewards/margins": 2.264190196990967, "rewards/rejected": -3.8554694652557373, "step": 838 }, { "epoch": 0.18, "learning_rate": 1.6558823529411765e-05, "logits/chosen": -1.9931203126907349, "logits/rejected": -1.9703037738800049, "logps/chosen": -357.2671203613281, "logps/rejected": -421.91998291015625, "loss": 0.5829, "rewards/accuracies": 0.875, "rewards/chosen": -1.6607378721237183, "rewards/margins": 2.8475656509399414, "rewards/rejected": -4.508303642272949, "step": 839 }, { "epoch": 0.18, "learning_rate": 1.6554621848739495e-05, "logits/chosen": -1.7824808359146118, "logits/rejected": -2.113828659057617, "logps/chosen": -304.1899108886719, "logps/rejected": -386.22314453125, "loss": 0.2567, "rewards/accuracies": 0.875, "rewards/chosen": -1.4706379175186157, "rewards/margins": 3.265458583831787, "rewards/rejected": -4.736096382141113, "step": 840 }, { "epoch": 0.18, "learning_rate": 1.655042016806723e-05, "logits/chosen": -1.8116238117218018, "logits/rejected": -1.7838225364685059, "logps/chosen": -268.669921875, "logps/rejected": -266.62542724609375, "loss": 0.2902, "rewards/accuracies": 0.875, "rewards/chosen": -1.4213013648986816, "rewards/margins": 2.80891489982605, "rewards/rejected": -4.2302165031433105, "step": 841 }, { "epoch": 0.18, "learning_rate": 1.654621848739496e-05, "logits/chosen": -2.097501277923584, "logits/rejected": -1.7903889417648315, "logps/chosen": -397.4696960449219, "logps/rejected": -374.2898254394531, "loss": 0.4986, "rewards/accuracies": 0.6875, "rewards/chosen": -2.5505564212799072, "rewards/margins": 2.56294322013855, "rewards/rejected": -5.113499641418457, "step": 842 }, { "epoch": 0.18, "learning_rate": 1.654201680672269e-05, "logits/chosen": -2.309311866760254, "logits/rejected": -1.962480902671814, "logps/chosen": -397.01068115234375, "logps/rejected": -334.1055908203125, "loss": 0.6108, "rewards/accuracies": 0.75, "rewards/chosen": -1.3525598049163818, "rewards/margins": 2.0506162643432617, "rewards/rejected": -3.4031758308410645, "step": 843 }, { "epoch": 0.18, "learning_rate": 1.653781512605042e-05, "logits/chosen": -1.9976651668548584, "logits/rejected": -1.823923110961914, "logps/chosen": -345.355224609375, "logps/rejected": -402.2070007324219, "loss": 0.1688, "rewards/accuracies": 0.875, "rewards/chosen": -1.3158338069915771, "rewards/margins": 3.419156551361084, "rewards/rejected": -4.734990119934082, "step": 844 }, { "epoch": 0.18, "learning_rate": 1.6533613445378153e-05, "logits/chosen": -1.9651539325714111, "logits/rejected": -2.055600166320801, "logps/chosen": -268.68121337890625, "logps/rejected": -320.0924377441406, "loss": 0.5162, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2614715099334717, "rewards/margins": 2.5186023712158203, "rewards/rejected": -4.780073642730713, "step": 845 }, { "epoch": 0.18, "learning_rate": 1.6529411764705883e-05, "logits/chosen": -1.9466253519058228, "logits/rejected": -1.5661593675613403, "logps/chosen": -360.6986083984375, "logps/rejected": -369.8818359375, "loss": 0.3559, "rewards/accuracies": 0.875, "rewards/chosen": -1.6444405317306519, "rewards/margins": 2.702845335006714, "rewards/rejected": -4.347285747528076, "step": 846 }, { "epoch": 0.18, "learning_rate": 1.6525210084033613e-05, "logits/chosen": -2.030992031097412, "logits/rejected": -1.8643478155136108, "logps/chosen": -302.4504089355469, "logps/rejected": -300.9117736816406, "loss": 0.201, "rewards/accuracies": 0.875, "rewards/chosen": -1.0876160860061646, "rewards/margins": 3.9334614276885986, "rewards/rejected": -5.021077632904053, "step": 847 }, { "epoch": 0.18, "learning_rate": 1.6521008403361343e-05, "logits/chosen": -1.9273464679718018, "logits/rejected": -1.8194384574890137, "logps/chosen": -381.476806640625, "logps/rejected": -406.6851501464844, "loss": 0.306, "rewards/accuracies": 0.875, "rewards/chosen": -1.0414516925811768, "rewards/margins": 2.5162088871002197, "rewards/rejected": -3.5576605796813965, "step": 848 }, { "epoch": 0.18, "learning_rate": 1.6516806722689077e-05, "logits/chosen": -1.8308701515197754, "logits/rejected": -1.709214210510254, "logps/chosen": -380.6348876953125, "logps/rejected": -396.2377624511719, "loss": 0.3121, "rewards/accuracies": 0.75, "rewards/chosen": -1.2020562887191772, "rewards/margins": 2.959399938583374, "rewards/rejected": -4.16145658493042, "step": 849 }, { "epoch": 0.18, "learning_rate": 1.6512605042016807e-05, "logits/chosen": -2.2406840324401855, "logits/rejected": -1.7540903091430664, "logps/chosen": -352.529052734375, "logps/rejected": -291.0569152832031, "loss": 0.2422, "rewards/accuracies": 0.875, "rewards/chosen": -1.038619041442871, "rewards/margins": 3.9786553382873535, "rewards/rejected": -5.017273902893066, "step": 850 }, { "epoch": 0.18, "learning_rate": 1.6508403361344537e-05, "logits/chosen": -2.294544219970703, "logits/rejected": -2.259855270385742, "logps/chosen": -350.5856628417969, "logps/rejected": -408.121337890625, "loss": 0.4345, "rewards/accuracies": 0.8125, "rewards/chosen": -0.87151038646698, "rewards/margins": 2.057766914367676, "rewards/rejected": -2.929277181625366, "step": 851 }, { "epoch": 0.18, "learning_rate": 1.650420168067227e-05, "logits/chosen": -2.2867562770843506, "logits/rejected": -2.1423451900482178, "logps/chosen": -176.26080322265625, "logps/rejected": -226.0680694580078, "loss": 0.3108, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2988001108169556, "rewards/margins": 3.1676506996154785, "rewards/rejected": -4.4664506912231445, "step": 852 }, { "epoch": 0.18, "learning_rate": 1.65e-05, "logits/chosen": -2.1745870113372803, "logits/rejected": -2.0351154804229736, "logps/chosen": -319.2963562011719, "logps/rejected": -310.81927490234375, "loss": 0.395, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1311728954315186, "rewards/margins": 2.9557228088378906, "rewards/rejected": -4.08689546585083, "step": 853 }, { "epoch": 0.18, "learning_rate": 1.649579831932773e-05, "logits/chosen": -1.8942925930023193, "logits/rejected": -2.042156934738159, "logps/chosen": -322.89141845703125, "logps/rejected": -362.81488037109375, "loss": 0.6047, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9837080240249634, "rewards/margins": 2.8692259788513184, "rewards/rejected": -3.852933883666992, "step": 854 }, { "epoch": 0.18, "learning_rate": 1.6491596638655462e-05, "logits/chosen": -1.9540199041366577, "logits/rejected": -2.148458957672119, "logps/chosen": -200.40017700195312, "logps/rejected": -272.02374267578125, "loss": 0.3267, "rewards/accuracies": 0.75, "rewards/chosen": -1.5883879661560059, "rewards/margins": 2.9480838775634766, "rewards/rejected": -4.536471843719482, "step": 855 }, { "epoch": 0.18, "learning_rate": 1.6487394957983195e-05, "logits/chosen": -2.0282158851623535, "logits/rejected": -1.984845519065857, "logps/chosen": -234.03158569335938, "logps/rejected": -297.2955322265625, "loss": 0.1634, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9600425958633423, "rewards/margins": 3.9911789894104004, "rewards/rejected": -4.951221942901611, "step": 856 }, { "epoch": 0.18, "learning_rate": 1.6483193277310926e-05, "logits/chosen": -2.0615880489349365, "logits/rejected": -1.6632945537567139, "logps/chosen": -348.844482421875, "logps/rejected": -317.849365234375, "loss": 0.4408, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9168353080749512, "rewards/margins": 3.5147628784179688, "rewards/rejected": -4.43159818649292, "step": 857 }, { "epoch": 0.18, "learning_rate": 1.6478991596638656e-05, "logits/chosen": -2.010307788848877, "logits/rejected": -1.8121495246887207, "logps/chosen": -238.03521728515625, "logps/rejected": -237.03298950195312, "loss": 0.1395, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5243815183639526, "rewards/margins": 4.120826721191406, "rewards/rejected": -4.645208358764648, "step": 858 }, { "epoch": 0.18, "learning_rate": 1.6474789915966386e-05, "logits/chosen": -2.1482393741607666, "logits/rejected": -1.954933762550354, "logps/chosen": -253.74533081054688, "logps/rejected": -296.804443359375, "loss": 0.2187, "rewards/accuracies": 0.875, "rewards/chosen": -0.9486840963363647, "rewards/margins": 4.430158615112305, "rewards/rejected": -5.378842353820801, "step": 859 }, { "epoch": 0.18, "learning_rate": 1.647058823529412e-05, "logits/chosen": -2.2143912315368652, "logits/rejected": -1.9657034873962402, "logps/chosen": -379.7645263671875, "logps/rejected": -453.1151428222656, "loss": 0.3534, "rewards/accuracies": 0.875, "rewards/chosen": -1.390915870666504, "rewards/margins": 3.3093655109405518, "rewards/rejected": -4.700282096862793, "step": 860 }, { "epoch": 0.18, "learning_rate": 1.646638655462185e-05, "logits/chosen": -2.304980516433716, "logits/rejected": -2.093060255050659, "logps/chosen": -292.78802490234375, "logps/rejected": -328.1954650878906, "loss": 0.5197, "rewards/accuracies": 0.625, "rewards/chosen": -1.2974127531051636, "rewards/margins": 1.7077938318252563, "rewards/rejected": -3.005206346511841, "step": 861 }, { "epoch": 0.18, "learning_rate": 1.646218487394958e-05, "logits/chosen": -1.9428411722183228, "logits/rejected": -1.5828006267547607, "logps/chosen": -298.0184326171875, "logps/rejected": -261.4896240234375, "loss": 0.3705, "rewards/accuracies": 0.875, "rewards/chosen": -1.3908194303512573, "rewards/margins": 1.7250456809997559, "rewards/rejected": -3.1158649921417236, "step": 862 }, { "epoch": 0.18, "learning_rate": 1.645798319327731e-05, "logits/chosen": -2.2632293701171875, "logits/rejected": -2.421980619430542, "logps/chosen": -320.84283447265625, "logps/rejected": -354.6207275390625, "loss": 0.3628, "rewards/accuracies": 0.75, "rewards/chosen": -1.3431496620178223, "rewards/margins": 2.89518404006958, "rewards/rejected": -4.238333702087402, "step": 863 }, { "epoch": 0.18, "learning_rate": 1.6453781512605044e-05, "logits/chosen": -2.2326416969299316, "logits/rejected": -1.7632266283035278, "logps/chosen": -416.22735595703125, "logps/rejected": -413.9497985839844, "loss": 0.1922, "rewards/accuracies": 0.9375, "rewards/chosen": -0.05262170732021332, "rewards/margins": 3.9401092529296875, "rewards/rejected": -3.9927310943603516, "step": 864 }, { "epoch": 0.18, "learning_rate": 1.6449579831932774e-05, "logits/chosen": -2.2643580436706543, "logits/rejected": -1.5607458353042603, "logps/chosen": -383.83551025390625, "logps/rejected": -321.55718994140625, "loss": 0.1777, "rewards/accuracies": 0.9375, "rewards/chosen": -0.30315354466438293, "rewards/margins": 3.521136522293091, "rewards/rejected": -3.8242900371551514, "step": 865 }, { "epoch": 0.18, "learning_rate": 1.6445378151260504e-05, "logits/chosen": -1.699146032333374, "logits/rejected": -1.956099271774292, "logps/chosen": -245.18296813964844, "logps/rejected": -312.64801025390625, "loss": 0.3764, "rewards/accuracies": 0.75, "rewards/chosen": -1.6060068607330322, "rewards/margins": 2.6803605556488037, "rewards/rejected": -4.286367416381836, "step": 866 }, { "epoch": 0.18, "learning_rate": 1.6441176470588235e-05, "logits/chosen": -2.0695505142211914, "logits/rejected": -1.6839985847473145, "logps/chosen": -302.244873046875, "logps/rejected": -303.36810302734375, "loss": 0.4166, "rewards/accuracies": 0.75, "rewards/chosen": -1.0057995319366455, "rewards/margins": 2.0823960304260254, "rewards/rejected": -3.08819580078125, "step": 867 }, { "epoch": 0.18, "learning_rate": 1.6436974789915968e-05, "logits/chosen": -1.829862356185913, "logits/rejected": -2.0130064487457275, "logps/chosen": -224.52940368652344, "logps/rejected": -285.8321533203125, "loss": 0.3092, "rewards/accuracies": 0.875, "rewards/chosen": -1.3459399938583374, "rewards/margins": 2.6092429161071777, "rewards/rejected": -3.9551827907562256, "step": 868 }, { "epoch": 0.18, "learning_rate": 1.64327731092437e-05, "logits/chosen": -2.3632571697235107, "logits/rejected": -1.8849446773529053, "logps/chosen": -388.83197021484375, "logps/rejected": -340.25634765625, "loss": 0.3105, "rewards/accuracies": 0.75, "rewards/chosen": -0.5788600444793701, "rewards/margins": 2.7900514602661133, "rewards/rejected": -3.3689115047454834, "step": 869 }, { "epoch": 0.18, "learning_rate": 1.642857142857143e-05, "logits/chosen": -2.087378978729248, "logits/rejected": -1.8918813467025757, "logps/chosen": -241.5161590576172, "logps/rejected": -268.33074951171875, "loss": 0.4724, "rewards/accuracies": 0.6875, "rewards/chosen": -1.0371454954147339, "rewards/margins": 2.611354112625122, "rewards/rejected": -3.6484994888305664, "step": 870 }, { "epoch": 0.18, "learning_rate": 1.642436974789916e-05, "logits/chosen": -2.0778613090515137, "logits/rejected": -1.9503037929534912, "logps/chosen": -239.8323516845703, "logps/rejected": -262.5771789550781, "loss": 0.1792, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8480715155601501, "rewards/margins": 3.899735927581787, "rewards/rejected": -4.747807502746582, "step": 871 }, { "epoch": 0.18, "learning_rate": 1.6420168067226892e-05, "logits/chosen": -1.9055280685424805, "logits/rejected": -1.6988685131072998, "logps/chosen": -207.60768127441406, "logps/rejected": -249.6638641357422, "loss": 0.214, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3459224700927734, "rewards/margins": 2.718294382095337, "rewards/rejected": -4.064216613769531, "step": 872 }, { "epoch": 0.18, "learning_rate": 1.6415966386554623e-05, "logits/chosen": -2.025665760040283, "logits/rejected": -1.8769866228103638, "logps/chosen": -365.9964599609375, "logps/rejected": -341.34466552734375, "loss": 0.1997, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0866661071777344, "rewards/margins": 3.3571691513061523, "rewards/rejected": -4.443835258483887, "step": 873 }, { "epoch": 0.18, "learning_rate": 1.6411764705882353e-05, "logits/chosen": -2.006777763366699, "logits/rejected": -1.6285200119018555, "logps/chosen": -291.044189453125, "logps/rejected": -305.30926513671875, "loss": 0.342, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0532143115997314, "rewards/margins": 2.9847567081451416, "rewards/rejected": -4.037971496582031, "step": 874 }, { "epoch": 0.18, "learning_rate": 1.6407563025210086e-05, "logits/chosen": -1.941154956817627, "logits/rejected": -1.896328091621399, "logps/chosen": -226.91986083984375, "logps/rejected": -321.24261474609375, "loss": 0.3292, "rewards/accuracies": 0.75, "rewards/chosen": -1.8371610641479492, "rewards/margins": 2.636752128601074, "rewards/rejected": -4.473913669586182, "step": 875 }, { "epoch": 0.18, "learning_rate": 1.6403361344537817e-05, "logits/chosen": -1.7844109535217285, "logits/rejected": -1.823304295539856, "logps/chosen": -229.37229919433594, "logps/rejected": -377.96490478515625, "loss": 0.1902, "rewards/accuracies": 0.875, "rewards/chosen": -1.5230915546417236, "rewards/margins": 4.438862323760986, "rewards/rejected": -5.961954116821289, "step": 876 }, { "epoch": 0.18, "learning_rate": 1.6399159663865547e-05, "logits/chosen": -2.117547035217285, "logits/rejected": -1.943943738937378, "logps/chosen": -208.5506591796875, "logps/rejected": -260.7738342285156, "loss": 0.655, "rewards/accuracies": 0.75, "rewards/chosen": -2.129209280014038, "rewards/margins": 3.2130351066589355, "rewards/rejected": -5.342244625091553, "step": 877 }, { "epoch": 0.18, "learning_rate": 1.6394957983193277e-05, "logits/chosen": -1.8762304782867432, "logits/rejected": -2.008063554763794, "logps/chosen": -341.86859130859375, "logps/rejected": -434.216796875, "loss": 0.5347, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9512302875518799, "rewards/margins": 2.597323417663574, "rewards/rejected": -3.548553466796875, "step": 878 }, { "epoch": 0.18, "learning_rate": 1.639075630252101e-05, "logits/chosen": -1.9159164428710938, "logits/rejected": -1.9696593284606934, "logps/chosen": -201.14906311035156, "logps/rejected": -301.0068054199219, "loss": 0.6713, "rewards/accuracies": 0.6875, "rewards/chosen": -1.636357307434082, "rewards/margins": 2.3764381408691406, "rewards/rejected": -4.012795448303223, "step": 879 }, { "epoch": 0.18, "learning_rate": 1.638655462184874e-05, "logits/chosen": -2.1168620586395264, "logits/rejected": -2.0773792266845703, "logps/chosen": -270.95245361328125, "logps/rejected": -291.47772216796875, "loss": 0.15, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3984571695327759, "rewards/margins": 3.4344632625579834, "rewards/rejected": -4.832920074462891, "step": 880 }, { "epoch": 0.18, "learning_rate": 1.638235294117647e-05, "logits/chosen": -1.848799705505371, "logits/rejected": -1.8525713682174683, "logps/chosen": -355.29351806640625, "logps/rejected": -310.83978271484375, "loss": 0.1713, "rewards/accuracies": 0.9375, "rewards/chosen": -0.34663504362106323, "rewards/margins": 3.317166805267334, "rewards/rejected": -3.663801670074463, "step": 881 }, { "epoch": 0.18, "learning_rate": 1.63781512605042e-05, "logits/chosen": -1.7868198156356812, "logits/rejected": -1.6365983486175537, "logps/chosen": -247.5361785888672, "logps/rejected": -322.70684814453125, "loss": 0.1945, "rewards/accuracies": 0.9375, "rewards/chosen": -1.913313865661621, "rewards/margins": 4.205984115600586, "rewards/rejected": -6.119297981262207, "step": 882 }, { "epoch": 0.18, "learning_rate": 1.6373949579831935e-05, "logits/chosen": -1.8090060949325562, "logits/rejected": -1.876386284828186, "logps/chosen": -318.5756530761719, "logps/rejected": -351.6622619628906, "loss": 0.4159, "rewards/accuracies": 0.875, "rewards/chosen": -1.3252978324890137, "rewards/margins": 2.5264720916748047, "rewards/rejected": -3.8517696857452393, "step": 883 }, { "epoch": 0.18, "learning_rate": 1.6369747899159665e-05, "logits/chosen": -2.0742955207824707, "logits/rejected": -1.9202224016189575, "logps/chosen": -331.4919738769531, "logps/rejected": -353.0850830078125, "loss": 0.4026, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6761324405670166, "rewards/margins": 4.3154191970825195, "rewards/rejected": -5.991551876068115, "step": 884 }, { "epoch": 0.19, "learning_rate": 1.6365546218487395e-05, "logits/chosen": -1.9296650886535645, "logits/rejected": -1.9566481113433838, "logps/chosen": -250.49246215820312, "logps/rejected": -281.1885986328125, "loss": 0.3591, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2915005683898926, "rewards/margins": 3.8833706378936768, "rewards/rejected": -5.17487096786499, "step": 885 }, { "epoch": 0.19, "learning_rate": 1.6361344537815126e-05, "logits/chosen": -1.8366259336471558, "logits/rejected": -1.8812752962112427, "logps/chosen": -335.166259765625, "logps/rejected": -344.3279113769531, "loss": 0.9352, "rewards/accuracies": 0.6875, "rewards/chosen": -2.160041332244873, "rewards/margins": 1.4995570182800293, "rewards/rejected": -3.6595983505249023, "step": 886 }, { "epoch": 0.19, "learning_rate": 1.635714285714286e-05, "logits/chosen": -2.2893869876861572, "logits/rejected": -2.0035924911499023, "logps/chosen": -508.09478759765625, "logps/rejected": -332.3266296386719, "loss": 0.221, "rewards/accuracies": 0.875, "rewards/chosen": -1.2859076261520386, "rewards/margins": 3.980938673019409, "rewards/rejected": -5.266845703125, "step": 887 }, { "epoch": 0.19, "learning_rate": 1.635294117647059e-05, "logits/chosen": -2.169868230819702, "logits/rejected": -1.9147133827209473, "logps/chosen": -229.88088989257812, "logps/rejected": -302.2627868652344, "loss": 0.3355, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2932333946228027, "rewards/margins": 3.5300803184509277, "rewards/rejected": -4.8233137130737305, "step": 888 }, { "epoch": 0.19, "learning_rate": 1.634873949579832e-05, "logits/chosen": -1.8004443645477295, "logits/rejected": -1.66507887840271, "logps/chosen": -366.08819580078125, "logps/rejected": -355.1624755859375, "loss": 0.1311, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2386223077774048, "rewards/margins": 4.955789566040039, "rewards/rejected": -6.194411277770996, "step": 889 }, { "epoch": 0.19, "learning_rate": 1.634453781512605e-05, "logits/chosen": -1.8324013948440552, "logits/rejected": -1.7521955966949463, "logps/chosen": -276.56988525390625, "logps/rejected": -264.121826171875, "loss": 0.6167, "rewards/accuracies": 0.75, "rewards/chosen": -1.6577619314193726, "rewards/margins": 1.9621384143829346, "rewards/rejected": -3.6199002265930176, "step": 890 }, { "epoch": 0.19, "learning_rate": 1.6340336134453784e-05, "logits/chosen": -2.2964229583740234, "logits/rejected": -1.6694972515106201, "logps/chosen": -425.6768798828125, "logps/rejected": -321.85736083984375, "loss": 0.197, "rewards/accuracies": 0.875, "rewards/chosen": -1.5942802429199219, "rewards/margins": 3.9484291076660156, "rewards/rejected": -5.5427093505859375, "step": 891 }, { "epoch": 0.19, "learning_rate": 1.6336134453781514e-05, "logits/chosen": -1.870035171508789, "logits/rejected": -1.6253803968429565, "logps/chosen": -314.9829406738281, "logps/rejected": -321.0347900390625, "loss": 0.5274, "rewards/accuracies": 0.6875, "rewards/chosen": -2.105365037918091, "rewards/margins": 2.045036554336548, "rewards/rejected": -4.150401592254639, "step": 892 }, { "epoch": 0.19, "learning_rate": 1.6331932773109244e-05, "logits/chosen": -2.0040812492370605, "logits/rejected": -2.1056299209594727, "logps/chosen": -261.9435729980469, "logps/rejected": -307.0917053222656, "loss": 0.5958, "rewards/accuracies": 0.8125, "rewards/chosen": -1.740065097808838, "rewards/margins": 1.639905333518982, "rewards/rejected": -3.3799703121185303, "step": 893 }, { "epoch": 0.19, "learning_rate": 1.6327731092436974e-05, "logits/chosen": -1.840165376663208, "logits/rejected": -1.7288068532943726, "logps/chosen": -256.5667419433594, "logps/rejected": -288.83355712890625, "loss": 0.2113, "rewards/accuracies": 0.875, "rewards/chosen": -1.358893632888794, "rewards/margins": 2.9421653747558594, "rewards/rejected": -4.301058769226074, "step": 894 }, { "epoch": 0.19, "learning_rate": 1.6323529411764708e-05, "logits/chosen": -2.0562233924865723, "logits/rejected": -1.8470474481582642, "logps/chosen": -287.37371826171875, "logps/rejected": -330.51934814453125, "loss": 0.3998, "rewards/accuracies": 0.75, "rewards/chosen": -1.304749608039856, "rewards/margins": 3.2410483360290527, "rewards/rejected": -4.545797824859619, "step": 895 }, { "epoch": 0.19, "learning_rate": 1.6319327731092438e-05, "logits/chosen": -2.3353686332702637, "logits/rejected": -2.2728850841522217, "logps/chosen": -353.1683654785156, "logps/rejected": -323.61065673828125, "loss": 0.2424, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6277364492416382, "rewards/margins": 2.9938278198242188, "rewards/rejected": -4.621563911437988, "step": 896 }, { "epoch": 0.19, "learning_rate": 1.6315126050420168e-05, "logits/chosen": -2.0406594276428223, "logits/rejected": -1.948315143585205, "logps/chosen": -300.9489440917969, "logps/rejected": -305.4774475097656, "loss": 0.282, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2815067768096924, "rewards/margins": 3.31465482711792, "rewards/rejected": -4.596161365509033, "step": 897 }, { "epoch": 0.19, "learning_rate": 1.6310924369747902e-05, "logits/chosen": -2.0638017654418945, "logits/rejected": -2.173253059387207, "logps/chosen": -266.5672302246094, "logps/rejected": -341.09552001953125, "loss": 0.4387, "rewards/accuracies": 0.875, "rewards/chosen": -1.6528520584106445, "rewards/margins": 1.8177275657653809, "rewards/rejected": -3.4705796241760254, "step": 898 }, { "epoch": 0.19, "learning_rate": 1.6306722689075632e-05, "logits/chosen": -2.0284862518310547, "logits/rejected": -1.877831220626831, "logps/chosen": -242.322509765625, "logps/rejected": -253.47434997558594, "loss": 0.2654, "rewards/accuracies": 0.75, "rewards/chosen": -1.3718500137329102, "rewards/margins": 3.1581337451934814, "rewards/rejected": -4.5299835205078125, "step": 899 }, { "epoch": 0.19, "learning_rate": 1.6302521008403362e-05, "logits/chosen": -1.961199402809143, "logits/rejected": -2.235602378845215, "logps/chosen": -327.5709228515625, "logps/rejected": -392.82080078125, "loss": 0.207, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6375327110290527, "rewards/margins": 3.892061710357666, "rewards/rejected": -5.529594421386719, "step": 900 }, { "epoch": 0.19, "learning_rate": 1.6298319327731093e-05, "logits/chosen": -2.0290136337280273, "logits/rejected": -1.696539044380188, "logps/chosen": -334.60601806640625, "logps/rejected": -262.968505859375, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -1.1712181568145752, "rewards/margins": 4.417806625366211, "rewards/rejected": -5.589025020599365, "step": 901 }, { "epoch": 0.19, "learning_rate": 1.6294117647058826e-05, "logits/chosen": -2.2502691745758057, "logits/rejected": -2.1580700874328613, "logps/chosen": -271.84765625, "logps/rejected": -209.18585205078125, "loss": 0.5402, "rewards/accuracies": 0.75, "rewards/chosen": -1.5988433361053467, "rewards/margins": 2.7156684398651123, "rewards/rejected": -4.314511775970459, "step": 902 }, { "epoch": 0.19, "learning_rate": 1.6289915966386556e-05, "logits/chosen": -2.0320043563842773, "logits/rejected": -1.588046669960022, "logps/chosen": -393.6407775878906, "logps/rejected": -276.511962890625, "loss": 0.4197, "rewards/accuracies": 0.75, "rewards/chosen": -1.903933048248291, "rewards/margins": 2.1499881744384766, "rewards/rejected": -4.053921699523926, "step": 903 }, { "epoch": 0.19, "learning_rate": 1.6285714285714287e-05, "logits/chosen": -2.47733211517334, "logits/rejected": -1.9541778564453125, "logps/chosen": -443.1294250488281, "logps/rejected": -330.012939453125, "loss": 0.2284, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0894834995269775, "rewards/margins": 3.156179189682007, "rewards/rejected": -4.245662689208984, "step": 904 }, { "epoch": 0.19, "learning_rate": 1.6281512605042017e-05, "logits/chosen": -2.060857057571411, "logits/rejected": -1.7653508186340332, "logps/chosen": -318.81451416015625, "logps/rejected": -241.42276000976562, "loss": 0.4835, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8648567199707031, "rewards/margins": 1.6097255945205688, "rewards/rejected": -3.4745821952819824, "step": 905 }, { "epoch": 0.19, "learning_rate": 1.627731092436975e-05, "logits/chosen": -2.1348257064819336, "logits/rejected": -1.7093878984451294, "logps/chosen": -285.4877014160156, "logps/rejected": -290.025634765625, "loss": 0.3394, "rewards/accuracies": 0.875, "rewards/chosen": -1.996217131614685, "rewards/margins": 2.865814208984375, "rewards/rejected": -4.862030982971191, "step": 906 }, { "epoch": 0.19, "learning_rate": 1.627310924369748e-05, "logits/chosen": -1.5797868967056274, "logits/rejected": -1.4345656633377075, "logps/chosen": -342.4110107421875, "logps/rejected": -269.3785095214844, "loss": 0.3566, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7635388374328613, "rewards/margins": 2.652501344680786, "rewards/rejected": -4.416040420532227, "step": 907 }, { "epoch": 0.19, "learning_rate": 1.626890756302521e-05, "logits/chosen": -2.080841064453125, "logits/rejected": -1.4903613328933716, "logps/chosen": -443.020263671875, "logps/rejected": -397.37432861328125, "loss": 0.1821, "rewards/accuracies": 0.875, "rewards/chosen": -1.0566246509552002, "rewards/margins": 3.6935763359069824, "rewards/rejected": -4.7502007484436035, "step": 908 }, { "epoch": 0.19, "learning_rate": 1.626470588235294e-05, "logits/chosen": -2.2648098468780518, "logits/rejected": -1.8361196517944336, "logps/chosen": -390.60595703125, "logps/rejected": -379.9647216796875, "loss": 0.0849, "rewards/accuracies": 1.0, "rewards/chosen": -0.834219217300415, "rewards/margins": 3.8679771423339844, "rewards/rejected": -4.70219612121582, "step": 909 }, { "epoch": 0.19, "learning_rate": 1.6260504201680675e-05, "logits/chosen": -1.9963374137878418, "logits/rejected": -2.1134889125823975, "logps/chosen": -352.23309326171875, "logps/rejected": -386.251953125, "loss": 0.267, "rewards/accuracies": 0.875, "rewards/chosen": -1.7679274082183838, "rewards/margins": 2.7292299270629883, "rewards/rejected": -4.497157096862793, "step": 910 }, { "epoch": 0.19, "learning_rate": 1.6256302521008405e-05, "logits/chosen": -2.139585018157959, "logits/rejected": -1.826736569404602, "logps/chosen": -305.2021179199219, "logps/rejected": -273.81103515625, "loss": 0.2705, "rewards/accuracies": 0.875, "rewards/chosen": -1.1817421913146973, "rewards/margins": 2.803069591522217, "rewards/rejected": -3.984811782836914, "step": 911 }, { "epoch": 0.19, "learning_rate": 1.6252100840336135e-05, "logits/chosen": -1.981002688407898, "logits/rejected": -2.0703396797180176, "logps/chosen": -406.9721984863281, "logps/rejected": -371.20440673828125, "loss": 0.2799, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1434235572814941, "rewards/margins": 3.0727317333221436, "rewards/rejected": -4.216155052185059, "step": 912 }, { "epoch": 0.19, "learning_rate": 1.6247899159663865e-05, "logits/chosen": -1.7255853414535522, "logits/rejected": -1.8718693256378174, "logps/chosen": -286.42413330078125, "logps/rejected": -319.8940124511719, "loss": 0.3252, "rewards/accuracies": 0.8125, "rewards/chosen": -1.128069281578064, "rewards/margins": 2.729300022125244, "rewards/rejected": -3.8573689460754395, "step": 913 }, { "epoch": 0.19, "learning_rate": 1.62436974789916e-05, "logits/chosen": -1.887884259223938, "logits/rejected": -1.9022674560546875, "logps/chosen": -256.7900085449219, "logps/rejected": -269.56280517578125, "loss": 0.7025, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0569393634796143, "rewards/margins": 1.2357622385025024, "rewards/rejected": -3.292701482772827, "step": 914 }, { "epoch": 0.19, "learning_rate": 1.623949579831933e-05, "logits/chosen": -2.2448368072509766, "logits/rejected": -1.9794319868087769, "logps/chosen": -379.6931457519531, "logps/rejected": -299.73516845703125, "loss": 0.2382, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9326602816581726, "rewards/margins": 3.316673755645752, "rewards/rejected": -4.249334335327148, "step": 915 }, { "epoch": 0.19, "learning_rate": 1.623529411764706e-05, "logits/chosen": -1.8805540800094604, "logits/rejected": -1.561633825302124, "logps/chosen": -298.3570556640625, "logps/rejected": -316.96923828125, "loss": 0.3102, "rewards/accuracies": 0.75, "rewards/chosen": -1.9303977489471436, "rewards/margins": 2.5443007946014404, "rewards/rejected": -4.474699020385742, "step": 916 }, { "epoch": 0.19, "learning_rate": 1.623109243697479e-05, "logits/chosen": -2.048431396484375, "logits/rejected": -1.9180465936660767, "logps/chosen": -295.2594909667969, "logps/rejected": -264.12872314453125, "loss": 0.2324, "rewards/accuracies": 0.875, "rewards/chosen": -1.1292060613632202, "rewards/margins": 3.2113733291625977, "rewards/rejected": -4.340579032897949, "step": 917 }, { "epoch": 0.19, "learning_rate": 1.6226890756302523e-05, "logits/chosen": -2.3067197799682617, "logits/rejected": -2.1548309326171875, "logps/chosen": -301.251708984375, "logps/rejected": -332.4122009277344, "loss": 0.4744, "rewards/accuracies": 0.75, "rewards/chosen": -1.0159692764282227, "rewards/margins": 1.5090093612670898, "rewards/rejected": -2.5249786376953125, "step": 918 }, { "epoch": 0.19, "learning_rate": 1.6222689075630253e-05, "logits/chosen": -1.8239529132843018, "logits/rejected": -1.8729701042175293, "logps/chosen": -301.16497802734375, "logps/rejected": -319.0786437988281, "loss": 0.3685, "rewards/accuracies": 0.875, "rewards/chosen": -0.8851511478424072, "rewards/margins": 2.9885482788085938, "rewards/rejected": -3.87369966506958, "step": 919 }, { "epoch": 0.19, "learning_rate": 1.6218487394957984e-05, "logits/chosen": -2.174138307571411, "logits/rejected": -1.890104055404663, "logps/chosen": -558.3136596679688, "logps/rejected": -482.9833984375, "loss": 0.157, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0088156461715698, "rewards/margins": 3.641759157180786, "rewards/rejected": -4.650574684143066, "step": 920 }, { "epoch": 0.19, "learning_rate": 1.6214285714285717e-05, "logits/chosen": -1.9609408378601074, "logits/rejected": -1.9450379610061646, "logps/chosen": -166.66647338867188, "logps/rejected": -230.41082763671875, "loss": 0.3405, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8020756840705872, "rewards/margins": 2.7518205642700195, "rewards/rejected": -3.553896188735962, "step": 921 }, { "epoch": 0.19, "learning_rate": 1.6210084033613448e-05, "logits/chosen": -2.2108054161071777, "logits/rejected": -1.811680793762207, "logps/chosen": -468.9318542480469, "logps/rejected": -335.023193359375, "loss": 0.2847, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9263225793838501, "rewards/margins": 2.652930736541748, "rewards/rejected": -3.5792531967163086, "step": 922 }, { "epoch": 0.19, "learning_rate": 1.6205882352941178e-05, "logits/chosen": -2.0254106521606445, "logits/rejected": -1.9421586990356445, "logps/chosen": -411.4443664550781, "logps/rejected": -476.85455322265625, "loss": 0.2413, "rewards/accuracies": 0.875, "rewards/chosen": -0.49983686208724976, "rewards/margins": 2.882030725479126, "rewards/rejected": -3.3818676471710205, "step": 923 }, { "epoch": 0.19, "learning_rate": 1.6201680672268908e-05, "logits/chosen": -2.1283533573150635, "logits/rejected": -1.5643560886383057, "logps/chosen": -213.84304809570312, "logps/rejected": -213.2480010986328, "loss": 0.3522, "rewards/accuracies": 0.75, "rewards/chosen": -1.468817949295044, "rewards/margins": 2.8268935680389404, "rewards/rejected": -4.295711517333984, "step": 924 }, { "epoch": 0.19, "learning_rate": 1.619747899159664e-05, "logits/chosen": -2.2803688049316406, "logits/rejected": -2.024850606918335, "logps/chosen": -513.8499145507812, "logps/rejected": -487.3311767578125, "loss": 0.2643, "rewards/accuracies": 0.875, "rewards/chosen": -0.35037142038345337, "rewards/margins": 5.109805107116699, "rewards/rejected": -5.460176467895508, "step": 925 }, { "epoch": 0.19, "learning_rate": 1.6193277310924372e-05, "logits/chosen": -2.05611252784729, "logits/rejected": -1.5205470323562622, "logps/chosen": -338.35125732421875, "logps/rejected": -348.0825500488281, "loss": 0.1334, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2922396659851074, "rewards/margins": 4.227748870849609, "rewards/rejected": -5.519988059997559, "step": 926 }, { "epoch": 0.19, "learning_rate": 1.6189075630252102e-05, "logits/chosen": -1.9922364950180054, "logits/rejected": -1.9709713459014893, "logps/chosen": -340.4877014160156, "logps/rejected": -306.7227478027344, "loss": 0.4801, "rewards/accuracies": 0.875, "rewards/chosen": -1.722760558128357, "rewards/margins": 2.3541500568389893, "rewards/rejected": -4.076910972595215, "step": 927 }, { "epoch": 0.19, "learning_rate": 1.6184873949579832e-05, "logits/chosen": -2.097243309020996, "logits/rejected": -1.9899710416793823, "logps/chosen": -349.98687744140625, "logps/rejected": -340.2271423339844, "loss": 0.4914, "rewards/accuracies": 0.8125, "rewards/chosen": -0.592383861541748, "rewards/margins": 3.6400413513183594, "rewards/rejected": -4.232425212860107, "step": 928 }, { "epoch": 0.19, "learning_rate": 1.6180672268907566e-05, "logits/chosen": -1.8802413940429688, "logits/rejected": -1.7215006351470947, "logps/chosen": -279.944091796875, "logps/rejected": -281.08184814453125, "loss": 0.4542, "rewards/accuracies": 0.75, "rewards/chosen": -1.5357730388641357, "rewards/margins": 2.8300819396972656, "rewards/rejected": -4.365854740142822, "step": 929 }, { "epoch": 0.19, "learning_rate": 1.6176470588235296e-05, "logits/chosen": -1.8888359069824219, "logits/rejected": -1.9341886043548584, "logps/chosen": -252.97305297851562, "logps/rejected": -347.5448913574219, "loss": 0.343, "rewards/accuracies": 0.875, "rewards/chosen": -1.2318804264068604, "rewards/margins": 3.367450714111328, "rewards/rejected": -4.599330902099609, "step": 930 }, { "epoch": 0.19, "learning_rate": 1.6172268907563026e-05, "logits/chosen": -2.2744109630584717, "logits/rejected": -1.9098711013793945, "logps/chosen": -331.54168701171875, "logps/rejected": -309.3653259277344, "loss": 0.4295, "rewards/accuracies": 0.875, "rewards/chosen": -1.0927395820617676, "rewards/margins": 3.3619794845581055, "rewards/rejected": -4.454718589782715, "step": 931 }, { "epoch": 0.19, "learning_rate": 1.6168067226890757e-05, "logits/chosen": -2.229959011077881, "logits/rejected": -2.0564463138580322, "logps/chosen": -337.414794921875, "logps/rejected": -316.2709655761719, "loss": 0.3448, "rewards/accuracies": 0.875, "rewards/chosen": -0.9802029728889465, "rewards/margins": 2.461056709289551, "rewards/rejected": -3.4412598609924316, "step": 932 }, { "epoch": 0.2, "learning_rate": 1.616386554621849e-05, "logits/chosen": -2.2918221950531006, "logits/rejected": -1.9197709560394287, "logps/chosen": -377.126220703125, "logps/rejected": -350.18035888671875, "loss": 0.376, "rewards/accuracies": 0.875, "rewards/chosen": -1.168889045715332, "rewards/margins": 3.2541346549987793, "rewards/rejected": -4.4230241775512695, "step": 933 }, { "epoch": 0.2, "learning_rate": 1.615966386554622e-05, "logits/chosen": -2.046694755554199, "logits/rejected": -1.7996652126312256, "logps/chosen": -379.83453369140625, "logps/rejected": -424.0287780761719, "loss": 0.3029, "rewards/accuracies": 0.875, "rewards/chosen": -1.5068362951278687, "rewards/margins": 3.8165125846862793, "rewards/rejected": -5.323348522186279, "step": 934 }, { "epoch": 0.2, "learning_rate": 1.615546218487395e-05, "logits/chosen": -2.006044626235962, "logits/rejected": -1.7604379653930664, "logps/chosen": -358.02349853515625, "logps/rejected": -372.83563232421875, "loss": 0.2468, "rewards/accuracies": 0.875, "rewards/chosen": -1.1392922401428223, "rewards/margins": 4.669717788696289, "rewards/rejected": -5.809009552001953, "step": 935 }, { "epoch": 0.2, "learning_rate": 1.615126050420168e-05, "logits/chosen": -2.06150484085083, "logits/rejected": -2.139726400375366, "logps/chosen": -274.4190368652344, "logps/rejected": -346.6410217285156, "loss": 0.4083, "rewards/accuracies": 0.75, "rewards/chosen": -1.7062376737594604, "rewards/margins": 2.6906228065490723, "rewards/rejected": -4.396860122680664, "step": 936 }, { "epoch": 0.2, "learning_rate": 1.6147058823529414e-05, "logits/chosen": -2.36195707321167, "logits/rejected": -2.273604393005371, "logps/chosen": -265.2520446777344, "logps/rejected": -262.8707275390625, "loss": 0.4938, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0171256065368652, "rewards/margins": 2.4653913974761963, "rewards/rejected": -4.482516765594482, "step": 937 }, { "epoch": 0.2, "learning_rate": 1.6142857142857145e-05, "logits/chosen": -1.8691480159759521, "logits/rejected": -1.7311128377914429, "logps/chosen": -275.6723937988281, "logps/rejected": -302.32171630859375, "loss": 0.2987, "rewards/accuracies": 0.875, "rewards/chosen": -1.598949670791626, "rewards/margins": 3.0386173725128174, "rewards/rejected": -4.637567520141602, "step": 938 }, { "epoch": 0.2, "learning_rate": 1.6138655462184875e-05, "logits/chosen": -2.3988940715789795, "logits/rejected": -2.1015825271606445, "logps/chosen": -278.5812072753906, "logps/rejected": -282.5924987792969, "loss": 0.4784, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0416860580444336, "rewards/margins": 3.5054492950439453, "rewards/rejected": -5.547134876251221, "step": 939 }, { "epoch": 0.2, "learning_rate": 1.6134453781512605e-05, "logits/chosen": -1.6708526611328125, "logits/rejected": -1.3594670295715332, "logps/chosen": -284.24395751953125, "logps/rejected": -261.78753662109375, "loss": 0.3832, "rewards/accuracies": 0.875, "rewards/chosen": -1.343563437461853, "rewards/margins": 3.172497510910034, "rewards/rejected": -4.516060829162598, "step": 940 }, { "epoch": 0.2, "learning_rate": 1.613025210084034e-05, "logits/chosen": -2.0220630168914795, "logits/rejected": -1.6935721635818481, "logps/chosen": -284.0840148925781, "logps/rejected": -305.60125732421875, "loss": 0.2676, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1521260738372803, "rewards/margins": 3.2628371715545654, "rewards/rejected": -4.414963245391846, "step": 941 }, { "epoch": 0.2, "learning_rate": 1.612605042016807e-05, "logits/chosen": -2.0774691104888916, "logits/rejected": -2.070549488067627, "logps/chosen": -288.32379150390625, "logps/rejected": -283.20526123046875, "loss": 0.3461, "rewards/accuracies": 0.875, "rewards/chosen": -1.3477067947387695, "rewards/margins": 3.0809428691864014, "rewards/rejected": -4.42864990234375, "step": 942 }, { "epoch": 0.2, "learning_rate": 1.61218487394958e-05, "logits/chosen": -1.7696259021759033, "logits/rejected": -1.865986704826355, "logps/chosen": -253.49505615234375, "logps/rejected": -442.7955322265625, "loss": 0.1525, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4500681161880493, "rewards/margins": 5.546562671661377, "rewards/rejected": -6.996630668640137, "step": 943 }, { "epoch": 0.2, "learning_rate": 1.6117647058823533e-05, "logits/chosen": -2.2208175659179688, "logits/rejected": -1.7669366598129272, "logps/chosen": -264.23095703125, "logps/rejected": -267.67083740234375, "loss": 0.369, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2596253156661987, "rewards/margins": 2.8266682624816895, "rewards/rejected": -4.086293697357178, "step": 944 }, { "epoch": 0.2, "learning_rate": 1.6113445378151263e-05, "logits/chosen": -2.0319926738739014, "logits/rejected": -1.9267170429229736, "logps/chosen": -306.5227355957031, "logps/rejected": -331.3794250488281, "loss": 0.5253, "rewards/accuracies": 0.75, "rewards/chosen": -1.1882493495941162, "rewards/margins": 2.494633436203003, "rewards/rejected": -3.682882785797119, "step": 945 }, { "epoch": 0.2, "learning_rate": 1.6109243697478993e-05, "logits/chosen": -2.099152088165283, "logits/rejected": -1.7469524145126343, "logps/chosen": -306.03839111328125, "logps/rejected": -273.3168029785156, "loss": 0.1075, "rewards/accuracies": 1.0, "rewards/chosen": -0.8145140409469604, "rewards/margins": 3.748594284057617, "rewards/rejected": -4.563108444213867, "step": 946 }, { "epoch": 0.2, "learning_rate": 1.6105042016806723e-05, "logits/chosen": -2.0426230430603027, "logits/rejected": -1.679762601852417, "logps/chosen": -288.73663330078125, "logps/rejected": -272.72576904296875, "loss": 0.4731, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6338205337524414, "rewards/margins": 2.9622178077697754, "rewards/rejected": -4.596037864685059, "step": 947 }, { "epoch": 0.2, "learning_rate": 1.6100840336134457e-05, "logits/chosen": -2.098888874053955, "logits/rejected": -2.033669948577881, "logps/chosen": -296.52008056640625, "logps/rejected": -330.41473388671875, "loss": 0.2741, "rewards/accuracies": 0.875, "rewards/chosen": -1.3321335315704346, "rewards/margins": 3.7607579231262207, "rewards/rejected": -5.092891216278076, "step": 948 }, { "epoch": 0.2, "learning_rate": 1.6096638655462187e-05, "logits/chosen": -1.8267366886138916, "logits/rejected": -1.715399980545044, "logps/chosen": -410.0887451171875, "logps/rejected": -333.095703125, "loss": 0.4216, "rewards/accuracies": 0.75, "rewards/chosen": -1.5467424392700195, "rewards/margins": 2.395495653152466, "rewards/rejected": -3.9422380924224854, "step": 949 }, { "epoch": 0.2, "learning_rate": 1.6092436974789917e-05, "logits/chosen": -2.006714344024658, "logits/rejected": -2.23336124420166, "logps/chosen": -288.5964050292969, "logps/rejected": -427.2220458984375, "loss": 0.0577, "rewards/accuracies": 1.0, "rewards/chosen": -1.6714967489242554, "rewards/margins": 4.585170269012451, "rewards/rejected": -6.256667613983154, "step": 950 }, { "epoch": 0.2, "learning_rate": 1.6088235294117648e-05, "logits/chosen": -2.2115395069122314, "logits/rejected": -2.147038698196411, "logps/chosen": -252.1461944580078, "logps/rejected": -330.59161376953125, "loss": 0.0979, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4695258140563965, "rewards/margins": 3.851541519165039, "rewards/rejected": -5.3210673332214355, "step": 951 }, { "epoch": 0.2, "learning_rate": 1.608403361344538e-05, "logits/chosen": -2.0036466121673584, "logits/rejected": -1.905857801437378, "logps/chosen": -389.0947265625, "logps/rejected": -306.5209655761719, "loss": 0.3509, "rewards/accuracies": 0.875, "rewards/chosen": -1.507166862487793, "rewards/margins": 3.9212779998779297, "rewards/rejected": -5.4284443855285645, "step": 952 }, { "epoch": 0.2, "learning_rate": 1.607983193277311e-05, "logits/chosen": -2.4142422676086426, "logits/rejected": -2.2419326305389404, "logps/chosen": -288.0456237792969, "logps/rejected": -310.0427551269531, "loss": 0.1473, "rewards/accuracies": 0.875, "rewards/chosen": -0.4873964190483093, "rewards/margins": 3.292844295501709, "rewards/rejected": -3.780241012573242, "step": 953 }, { "epoch": 0.2, "learning_rate": 1.6075630252100842e-05, "logits/chosen": -2.146336555480957, "logits/rejected": -2.030111789703369, "logps/chosen": -250.44271850585938, "logps/rejected": -314.92950439453125, "loss": 0.2473, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6969060897827148, "rewards/margins": 3.61592960357666, "rewards/rejected": -5.312835693359375, "step": 954 }, { "epoch": 0.2, "learning_rate": 1.6071428571428572e-05, "logits/chosen": -2.2848095893859863, "logits/rejected": -2.0192418098449707, "logps/chosen": -396.1111755371094, "logps/rejected": -312.5511169433594, "loss": 0.4043, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8675146102905273, "rewards/margins": 3.271200180053711, "rewards/rejected": -5.138714790344238, "step": 955 }, { "epoch": 0.2, "learning_rate": 1.6067226890756306e-05, "logits/chosen": -2.0489754676818848, "logits/rejected": -1.9869868755340576, "logps/chosen": -400.83404541015625, "logps/rejected": -427.11968994140625, "loss": 0.1299, "rewards/accuracies": 1.0, "rewards/chosen": -0.9448701739311218, "rewards/margins": 3.682661294937134, "rewards/rejected": -4.6275315284729, "step": 956 }, { "epoch": 0.2, "learning_rate": 1.6063025210084036e-05, "logits/chosen": -2.131647825241089, "logits/rejected": -1.7466652393341064, "logps/chosen": -462.75537109375, "logps/rejected": -384.59039306640625, "loss": 0.3906, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8894122838974, "rewards/margins": 2.7314529418945312, "rewards/rejected": -4.620865345001221, "step": 957 }, { "epoch": 0.2, "learning_rate": 1.6058823529411766e-05, "logits/chosen": -1.8268980979919434, "logits/rejected": -1.9135490655899048, "logps/chosen": -181.1867218017578, "logps/rejected": -257.34600830078125, "loss": 0.2489, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9502403736114502, "rewards/margins": 2.652364730834961, "rewards/rejected": -4.602604866027832, "step": 958 }, { "epoch": 0.2, "learning_rate": 1.6054621848739496e-05, "logits/chosen": -1.9169213771820068, "logits/rejected": -2.2156481742858887, "logps/chosen": -292.47357177734375, "logps/rejected": -370.2200927734375, "loss": 0.2202, "rewards/accuracies": 0.875, "rewards/chosen": -1.8812296390533447, "rewards/margins": 4.399232864379883, "rewards/rejected": -6.280462265014648, "step": 959 }, { "epoch": 0.2, "learning_rate": 1.605042016806723e-05, "logits/chosen": -1.9813048839569092, "logits/rejected": -2.1631431579589844, "logps/chosen": -348.94384765625, "logps/rejected": -346.21331787109375, "loss": 0.8581, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9477732181549072, "rewards/margins": 2.3577558994293213, "rewards/rejected": -4.3055291175842285, "step": 960 }, { "epoch": 0.2, "learning_rate": 1.604621848739496e-05, "logits/chosen": -1.8230655193328857, "logits/rejected": -1.8407657146453857, "logps/chosen": -180.0000457763672, "logps/rejected": -295.68603515625, "loss": 0.1646, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1433920860290527, "rewards/margins": 4.4139885902404785, "rewards/rejected": -5.557380676269531, "step": 961 }, { "epoch": 0.2, "learning_rate": 1.604201680672269e-05, "logits/chosen": -2.3089263439178467, "logits/rejected": -1.8260228633880615, "logps/chosen": -237.73977661132812, "logps/rejected": -186.85098266601562, "loss": 0.4226, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7359535694122314, "rewards/margins": 1.7652562856674194, "rewards/rejected": -3.5012099742889404, "step": 962 }, { "epoch": 0.2, "learning_rate": 1.6037815126050424e-05, "logits/chosen": -2.5158958435058594, "logits/rejected": -2.2219765186309814, "logps/chosen": -568.96728515625, "logps/rejected": -465.2297668457031, "loss": 0.648, "rewards/accuracies": 0.875, "rewards/chosen": -0.6339870095252991, "rewards/margins": 3.2373623847961426, "rewards/rejected": -3.871349334716797, "step": 963 }, { "epoch": 0.2, "learning_rate": 1.6033613445378154e-05, "logits/chosen": -2.211601734161377, "logits/rejected": -2.054121255874634, "logps/chosen": -405.22821044921875, "logps/rejected": -430.16552734375, "loss": 0.2456, "rewards/accuracies": 0.875, "rewards/chosen": -1.5112473964691162, "rewards/margins": 3.313117504119873, "rewards/rejected": -4.82436466217041, "step": 964 }, { "epoch": 0.2, "learning_rate": 1.6029411764705884e-05, "logits/chosen": -1.7952327728271484, "logits/rejected": -1.3685411214828491, "logps/chosen": -328.3746337890625, "logps/rejected": -304.05780029296875, "loss": 0.4384, "rewards/accuracies": 0.75, "rewards/chosen": -1.9979314804077148, "rewards/margins": 2.496091365814209, "rewards/rejected": -4.494022846221924, "step": 965 }, { "epoch": 0.2, "learning_rate": 1.6025210084033615e-05, "logits/chosen": -2.0978634357452393, "logits/rejected": -1.5490803718566895, "logps/chosen": -637.6278076171875, "logps/rejected": -367.8577575683594, "loss": 0.1327, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0601354837417603, "rewards/margins": 3.7182583808898926, "rewards/rejected": -4.7783942222595215, "step": 966 }, { "epoch": 0.2, "learning_rate": 1.6021008403361348e-05, "logits/chosen": -2.293529510498047, "logits/rejected": -1.758965015411377, "logps/chosen": -289.671630859375, "logps/rejected": -268.2323303222656, "loss": 0.516, "rewards/accuracies": 0.875, "rewards/chosen": -1.6899057626724243, "rewards/margins": 2.7523252964019775, "rewards/rejected": -4.442231178283691, "step": 967 }, { "epoch": 0.2, "learning_rate": 1.601680672268908e-05, "logits/chosen": -1.9055092334747314, "logits/rejected": -2.0438497066497803, "logps/chosen": -305.809326171875, "logps/rejected": -352.200439453125, "loss": 0.4915, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1041617393493652, "rewards/margins": 2.303605079650879, "rewards/rejected": -4.407767295837402, "step": 968 }, { "epoch": 0.2, "learning_rate": 1.601260504201681e-05, "logits/chosen": -1.9326434135437012, "logits/rejected": -2.179170846939087, "logps/chosen": -407.10430908203125, "logps/rejected": -450.77386474609375, "loss": 0.7245, "rewards/accuracies": 0.5625, "rewards/chosen": -1.7418363094329834, "rewards/margins": 1.7377456426620483, "rewards/rejected": -3.4795820713043213, "step": 969 }, { "epoch": 0.2, "learning_rate": 1.600840336134454e-05, "logits/chosen": -2.1444361209869385, "logits/rejected": -2.093311071395874, "logps/chosen": -337.5771179199219, "logps/rejected": -289.28472900390625, "loss": 0.1159, "rewards/accuracies": 1.0, "rewards/chosen": -1.4055050611495972, "rewards/margins": 4.7778730392456055, "rewards/rejected": -6.183377742767334, "step": 970 }, { "epoch": 0.2, "learning_rate": 1.6004201680672272e-05, "logits/chosen": -2.1992075443267822, "logits/rejected": -1.967665433883667, "logps/chosen": -351.4008483886719, "logps/rejected": -428.0211181640625, "loss": 0.157, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8945674896240234, "rewards/margins": 3.9394099712371826, "rewards/rejected": -4.833977222442627, "step": 971 }, { "epoch": 0.2, "learning_rate": 1.6000000000000003e-05, "logits/chosen": -2.328667163848877, "logits/rejected": -1.9910246133804321, "logps/chosen": -316.12939453125, "logps/rejected": -309.1440734863281, "loss": 0.3179, "rewards/accuracies": 0.875, "rewards/chosen": -0.9753143191337585, "rewards/margins": 3.3967621326446533, "rewards/rejected": -4.372076511383057, "step": 972 }, { "epoch": 0.2, "learning_rate": 1.5995798319327733e-05, "logits/chosen": -2.0464725494384766, "logits/rejected": -2.0297629833221436, "logps/chosen": -430.61016845703125, "logps/rejected": -437.5151062011719, "loss": 0.1596, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9454315304756165, "rewards/margins": 3.407010555267334, "rewards/rejected": -4.352442264556885, "step": 973 }, { "epoch": 0.2, "learning_rate": 1.5991596638655463e-05, "logits/chosen": -2.111048698425293, "logits/rejected": -1.7278022766113281, "logps/chosen": -271.6231384277344, "logps/rejected": -237.2025604248047, "loss": 0.3344, "rewards/accuracies": 0.75, "rewards/chosen": -1.518215298652649, "rewards/margins": 3.550593376159668, "rewards/rejected": -5.068808555603027, "step": 974 }, { "epoch": 0.2, "learning_rate": 1.5987394957983197e-05, "logits/chosen": -2.125736713409424, "logits/rejected": -1.9389903545379639, "logps/chosen": -356.8438415527344, "logps/rejected": -345.59375, "loss": 0.3553, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7286221981048584, "rewards/margins": 3.0152809619903564, "rewards/rejected": -4.743903160095215, "step": 975 }, { "epoch": 0.2, "learning_rate": 1.5983193277310927e-05, "logits/chosen": -2.135952949523926, "logits/rejected": -2.1630730628967285, "logps/chosen": -296.26287841796875, "logps/rejected": -352.83026123046875, "loss": 0.2042, "rewards/accuracies": 0.9375, "rewards/chosen": -2.042497158050537, "rewards/margins": 3.567195415496826, "rewards/rejected": -5.609692573547363, "step": 976 }, { "epoch": 0.2, "learning_rate": 1.5978991596638657e-05, "logits/chosen": -1.6039451360702515, "logits/rejected": -1.5763413906097412, "logps/chosen": -304.0045471191406, "logps/rejected": -315.9101257324219, "loss": 0.2926, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6680569648742676, "rewards/margins": 3.900869131088257, "rewards/rejected": -5.568925857543945, "step": 977 }, { "epoch": 0.2, "learning_rate": 1.5974789915966387e-05, "logits/chosen": -2.230966806411743, "logits/rejected": -1.9008398056030273, "logps/chosen": -348.0404357910156, "logps/rejected": -309.02825927734375, "loss": 0.0941, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0074272155761719, "rewards/margins": 4.123209476470947, "rewards/rejected": -5.130637168884277, "step": 978 }, { "epoch": 0.2, "learning_rate": 1.597058823529412e-05, "logits/chosen": -2.1211373805999756, "logits/rejected": -2.225551128387451, "logps/chosen": -280.7225341796875, "logps/rejected": -319.66412353515625, "loss": 0.2427, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2527220249176025, "rewards/margins": 3.760939836502075, "rewards/rejected": -6.0136613845825195, "step": 979 }, { "epoch": 0.21, "learning_rate": 1.596638655462185e-05, "logits/chosen": -1.992407202720642, "logits/rejected": -2.0630340576171875, "logps/chosen": -273.6285400390625, "logps/rejected": -267.4842529296875, "loss": 1.3249, "rewards/accuracies": 0.5625, "rewards/chosen": -3.1059138774871826, "rewards/margins": 0.3664852976799011, "rewards/rejected": -3.4723992347717285, "step": 980 }, { "epoch": 0.21, "learning_rate": 1.596218487394958e-05, "logits/chosen": -2.4282758235931396, "logits/rejected": -2.183845281600952, "logps/chosen": -318.62451171875, "logps/rejected": -338.48553466796875, "loss": 0.411, "rewards/accuracies": 0.8125, "rewards/chosen": -1.801322340965271, "rewards/margins": 3.6411235332489014, "rewards/rejected": -5.442445755004883, "step": 981 }, { "epoch": 0.21, "learning_rate": 1.595798319327731e-05, "logits/chosen": -2.049459934234619, "logits/rejected": -2.1267614364624023, "logps/chosen": -488.1476745605469, "logps/rejected": -434.264892578125, "loss": 0.4719, "rewards/accuracies": 0.6875, "rewards/chosen": -2.7990267276763916, "rewards/margins": 2.4907145500183105, "rewards/rejected": -5.2897419929504395, "step": 982 }, { "epoch": 0.21, "learning_rate": 1.5953781512605045e-05, "logits/chosen": -2.1547482013702393, "logits/rejected": -1.9187558889389038, "logps/chosen": -355.8290100097656, "logps/rejected": -354.94610595703125, "loss": 0.7006, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1883840560913086, "rewards/margins": 2.9620940685272217, "rewards/rejected": -5.150478363037109, "step": 983 }, { "epoch": 0.21, "learning_rate": 1.5949579831932775e-05, "logits/chosen": -2.2430317401885986, "logits/rejected": -1.9718213081359863, "logps/chosen": -345.955322265625, "logps/rejected": -399.9026794433594, "loss": 0.2591, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6024341583251953, "rewards/margins": 4.545600891113281, "rewards/rejected": -7.148034572601318, "step": 984 }, { "epoch": 0.21, "learning_rate": 1.5945378151260506e-05, "logits/chosen": -1.7310130596160889, "logits/rejected": -1.7342963218688965, "logps/chosen": -255.662841796875, "logps/rejected": -329.61865234375, "loss": 0.2125, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8036235570907593, "rewards/margins": 4.13872766494751, "rewards/rejected": -5.942351341247559, "step": 985 }, { "epoch": 0.21, "learning_rate": 1.594117647058824e-05, "logits/chosen": -2.52667236328125, "logits/rejected": -1.900354027748108, "logps/chosen": -544.683837890625, "logps/rejected": -395.1111755371094, "loss": 0.5719, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1777844429016113, "rewards/margins": 3.926586627960205, "rewards/rejected": -6.104371070861816, "step": 986 }, { "epoch": 0.21, "learning_rate": 1.5936974789915966e-05, "logits/chosen": -2.203887939453125, "logits/rejected": -1.8927711248397827, "logps/chosen": -296.181884765625, "logps/rejected": -256.6437683105469, "loss": 0.5472, "rewards/accuracies": 0.6875, "rewards/chosen": -2.595014810562134, "rewards/margins": 2.6470212936401367, "rewards/rejected": -5.242035388946533, "step": 987 }, { "epoch": 0.21, "learning_rate": 1.5932773109243696e-05, "logits/chosen": -2.147514820098877, "logits/rejected": -2.0080254077911377, "logps/chosen": -346.51324462890625, "logps/rejected": -326.51898193359375, "loss": 0.2249, "rewards/accuracies": 0.875, "rewards/chosen": -1.9157497882843018, "rewards/margins": 4.018861770629883, "rewards/rejected": -5.9346113204956055, "step": 988 }, { "epoch": 0.21, "learning_rate": 1.592857142857143e-05, "logits/chosen": -2.0567169189453125, "logits/rejected": -1.9067251682281494, "logps/chosen": -295.603759765625, "logps/rejected": -307.12847900390625, "loss": 0.3289, "rewards/accuracies": 0.875, "rewards/chosen": -2.0667433738708496, "rewards/margins": 3.1505918502807617, "rewards/rejected": -5.217335224151611, "step": 989 }, { "epoch": 0.21, "learning_rate": 1.592436974789916e-05, "logits/chosen": -2.1492984294891357, "logits/rejected": -1.9382822513580322, "logps/chosen": -329.0425109863281, "logps/rejected": -357.9788818359375, "loss": 0.2395, "rewards/accuracies": 0.875, "rewards/chosen": -1.3744035959243774, "rewards/margins": 3.94028639793396, "rewards/rejected": -5.314690589904785, "step": 990 }, { "epoch": 0.21, "learning_rate": 1.592016806722689e-05, "logits/chosen": -2.06227970123291, "logits/rejected": -1.793077826499939, "logps/chosen": -383.4049072265625, "logps/rejected": -380.588134765625, "loss": 0.3827, "rewards/accuracies": 0.875, "rewards/chosen": -2.2909398078918457, "rewards/margins": 3.0851078033447266, "rewards/rejected": -5.376047134399414, "step": 991 }, { "epoch": 0.21, "learning_rate": 1.591596638655462e-05, "logits/chosen": -2.1432809829711914, "logits/rejected": -1.4809675216674805, "logps/chosen": -287.3902587890625, "logps/rejected": -244.61148071289062, "loss": 0.1346, "rewards/accuracies": 0.9375, "rewards/chosen": -1.113315463066101, "rewards/margins": 4.157505989074707, "rewards/rejected": -5.270821571350098, "step": 992 }, { "epoch": 0.21, "learning_rate": 1.5911764705882354e-05, "logits/chosen": -2.3400068283081055, "logits/rejected": -1.8157542943954468, "logps/chosen": -381.60516357421875, "logps/rejected": -373.2441711425781, "loss": 0.1661, "rewards/accuracies": 0.875, "rewards/chosen": -1.063373327255249, "rewards/margins": 4.645111083984375, "rewards/rejected": -5.708484649658203, "step": 993 }, { "epoch": 0.21, "learning_rate": 1.5907563025210084e-05, "logits/chosen": -2.1736326217651367, "logits/rejected": -2.024364948272705, "logps/chosen": -209.30006408691406, "logps/rejected": -230.43218994140625, "loss": 0.187, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2895779609680176, "rewards/margins": 2.84120512008667, "rewards/rejected": -5.130782604217529, "step": 994 }, { "epoch": 0.21, "learning_rate": 1.5903361344537815e-05, "logits/chosen": -2.1777327060699463, "logits/rejected": -1.7259260416030884, "logps/chosen": -230.63426208496094, "logps/rejected": -259.3458557128906, "loss": 0.1616, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3928728103637695, "rewards/margins": 3.788881540298462, "rewards/rejected": -5.181754112243652, "step": 995 }, { "epoch": 0.21, "learning_rate": 1.5899159663865545e-05, "logits/chosen": -2.0159902572631836, "logits/rejected": -1.9090015888214111, "logps/chosen": -314.9594421386719, "logps/rejected": -318.53546142578125, "loss": 0.3031, "rewards/accuracies": 0.875, "rewards/chosen": -3.0985493659973145, "rewards/margins": 2.2413251399993896, "rewards/rejected": -5.339874744415283, "step": 996 }, { "epoch": 0.21, "learning_rate": 1.589495798319328e-05, "logits/chosen": -2.1222875118255615, "logits/rejected": -2.075869560241699, "logps/chosen": -279.2803039550781, "logps/rejected": -314.81561279296875, "loss": 0.1371, "rewards/accuracies": 0.9375, "rewards/chosen": -0.860377311706543, "rewards/margins": 4.949416160583496, "rewards/rejected": -5.809793472290039, "step": 997 }, { "epoch": 0.21, "learning_rate": 1.589075630252101e-05, "logits/chosen": -2.203815460205078, "logits/rejected": -2.1900599002838135, "logps/chosen": -321.6600036621094, "logps/rejected": -460.579345703125, "loss": 0.2982, "rewards/accuracies": 0.875, "rewards/chosen": -1.626387596130371, "rewards/margins": 4.398473739624023, "rewards/rejected": -6.024860858917236, "step": 998 }, { "epoch": 0.21, "learning_rate": 1.588655462184874e-05, "logits/chosen": -1.6782182455062866, "logits/rejected": -1.4933725595474243, "logps/chosen": -274.6654968261719, "logps/rejected": -245.32205200195312, "loss": 0.4175, "rewards/accuracies": 0.875, "rewards/chosen": -2.060499906539917, "rewards/margins": 3.4444291591644287, "rewards/rejected": -5.5049285888671875, "step": 999 }, { "epoch": 0.21, "learning_rate": 1.5882352941176473e-05, "logits/chosen": -2.128310441970825, "logits/rejected": -1.7982635498046875, "logps/chosen": -336.26702880859375, "logps/rejected": -310.9106750488281, "loss": 0.5163, "rewards/accuracies": 0.6875, "rewards/chosen": -2.399898052215576, "rewards/margins": 2.4173684120178223, "rewards/rejected": -4.817266464233398, "step": 1000 }, { "epoch": 0.21, "learning_rate": 1.5878151260504203e-05, "logits/chosen": -2.1564605236053467, "logits/rejected": -1.914797067642212, "logps/chosen": -304.67596435546875, "logps/rejected": -351.14013671875, "loss": 0.2622, "rewards/accuracies": 0.875, "rewards/chosen": -2.037938117980957, "rewards/margins": 5.277014255523682, "rewards/rejected": -7.314952850341797, "step": 1001 }, { "epoch": 0.21, "learning_rate": 1.5873949579831933e-05, "logits/chosen": -1.8247300386428833, "logits/rejected": -1.5515823364257812, "logps/chosen": -259.6643981933594, "logps/rejected": -255.8385772705078, "loss": 0.3908, "rewards/accuracies": 0.875, "rewards/chosen": -2.675656795501709, "rewards/margins": 2.1125659942626953, "rewards/rejected": -4.788222789764404, "step": 1002 }, { "epoch": 0.21, "learning_rate": 1.5869747899159663e-05, "logits/chosen": -2.079982280731201, "logits/rejected": -2.0852432250976562, "logps/chosen": -389.28619384765625, "logps/rejected": -397.09857177734375, "loss": 0.2464, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1009445190429688, "rewards/margins": 4.061807632446289, "rewards/rejected": -6.162752151489258, "step": 1003 }, { "epoch": 0.21, "learning_rate": 1.5865546218487397e-05, "logits/chosen": -1.6550183296203613, "logits/rejected": -1.98838210105896, "logps/chosen": -273.01409912109375, "logps/rejected": -359.2369384765625, "loss": 0.4703, "rewards/accuracies": 0.875, "rewards/chosen": -2.3376121520996094, "rewards/margins": 3.1436686515808105, "rewards/rejected": -5.48128080368042, "step": 1004 }, { "epoch": 0.21, "learning_rate": 1.5861344537815127e-05, "logits/chosen": -1.861385464668274, "logits/rejected": -1.9208061695098877, "logps/chosen": -344.4636535644531, "logps/rejected": -335.368408203125, "loss": 0.2375, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3113930225372314, "rewards/margins": 3.915203094482422, "rewards/rejected": -6.226596832275391, "step": 1005 }, { "epoch": 0.21, "learning_rate": 1.5857142857142857e-05, "logits/chosen": -2.0344033241271973, "logits/rejected": -2.0327882766723633, "logps/chosen": -286.20703125, "logps/rejected": -348.3990478515625, "loss": 0.526, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4683268070220947, "rewards/margins": 2.743124485015869, "rewards/rejected": -5.211451530456543, "step": 1006 }, { "epoch": 0.21, "learning_rate": 1.5852941176470588e-05, "logits/chosen": -2.1941375732421875, "logits/rejected": -1.8723511695861816, "logps/chosen": -381.8442687988281, "logps/rejected": -328.5703125, "loss": 0.4321, "rewards/accuracies": 0.625, "rewards/chosen": -1.955676794052124, "rewards/margins": 3.0702829360961914, "rewards/rejected": -5.0259599685668945, "step": 1007 }, { "epoch": 0.21, "learning_rate": 1.584873949579832e-05, "logits/chosen": -2.0592691898345947, "logits/rejected": -1.6820740699768066, "logps/chosen": -340.93914794921875, "logps/rejected": -297.41424560546875, "loss": 0.3653, "rewards/accuracies": 0.8125, "rewards/chosen": -2.9973559379577637, "rewards/margins": 2.6213486194610596, "rewards/rejected": -5.618704319000244, "step": 1008 }, { "epoch": 0.21, "learning_rate": 1.584453781512605e-05, "logits/chosen": -2.322657585144043, "logits/rejected": -2.3027448654174805, "logps/chosen": -413.90625, "logps/rejected": -369.0709228515625, "loss": 0.4432, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8684520721435547, "rewards/margins": 3.0394368171691895, "rewards/rejected": -4.907888412475586, "step": 1009 }, { "epoch": 0.21, "learning_rate": 1.584033613445378e-05, "logits/chosen": -2.1634111404418945, "logits/rejected": -1.6900907754898071, "logps/chosen": -289.776123046875, "logps/rejected": -303.7606506347656, "loss": 0.2806, "rewards/accuracies": 0.875, "rewards/chosen": -2.4645133018493652, "rewards/margins": 2.172579526901245, "rewards/rejected": -4.6370930671691895, "step": 1010 }, { "epoch": 0.21, "learning_rate": 1.5836134453781512e-05, "logits/chosen": -1.9188296794891357, "logits/rejected": -1.9418878555297852, "logps/chosen": -256.3186340332031, "logps/rejected": -276.24151611328125, "loss": 0.4689, "rewards/accuracies": 0.75, "rewards/chosen": -2.3924880027770996, "rewards/margins": 2.3059651851654053, "rewards/rejected": -4.698453426361084, "step": 1011 }, { "epoch": 0.21, "learning_rate": 1.5831932773109245e-05, "logits/chosen": -2.28934383392334, "logits/rejected": -1.9998149871826172, "logps/chosen": -258.2101745605469, "logps/rejected": -254.7913818359375, "loss": 0.5938, "rewards/accuracies": 0.75, "rewards/chosen": -3.0487024784088135, "rewards/margins": 2.2096283435821533, "rewards/rejected": -5.258330821990967, "step": 1012 }, { "epoch": 0.21, "learning_rate": 1.5827731092436976e-05, "logits/chosen": -2.004002571105957, "logits/rejected": -1.9574902057647705, "logps/chosen": -359.8652648925781, "logps/rejected": -345.0855407714844, "loss": 0.3681, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8650950193405151, "rewards/margins": 3.7826039791107178, "rewards/rejected": -5.647699356079102, "step": 1013 }, { "epoch": 0.21, "learning_rate": 1.5823529411764706e-05, "logits/chosen": -1.993525743484497, "logits/rejected": -1.885300636291504, "logps/chosen": -335.21929931640625, "logps/rejected": -382.243408203125, "loss": 0.5637, "rewards/accuracies": 0.75, "rewards/chosen": -2.3249666690826416, "rewards/margins": 1.8985376358032227, "rewards/rejected": -4.223504066467285, "step": 1014 }, { "epoch": 0.21, "learning_rate": 1.5819327731092436e-05, "logits/chosen": -2.210437059402466, "logits/rejected": -2.3024606704711914, "logps/chosen": -287.51861572265625, "logps/rejected": -275.244384765625, "loss": 0.125, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5779825448989868, "rewards/margins": 3.6603569984436035, "rewards/rejected": -5.238339424133301, "step": 1015 }, { "epoch": 0.21, "learning_rate": 1.581512605042017e-05, "logits/chosen": -2.023965358734131, "logits/rejected": -1.8840968608856201, "logps/chosen": -416.53369140625, "logps/rejected": -399.0089111328125, "loss": 0.1224, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1955689191818237, "rewards/margins": 4.53508996963501, "rewards/rejected": -5.730658531188965, "step": 1016 }, { "epoch": 0.21, "learning_rate": 1.58109243697479e-05, "logits/chosen": -2.0806667804718018, "logits/rejected": -2.154951333999634, "logps/chosen": -314.016845703125, "logps/rejected": -304.0430603027344, "loss": 0.4446, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4265894889831543, "rewards/margins": 3.3580851554870605, "rewards/rejected": -5.784675121307373, "step": 1017 }, { "epoch": 0.21, "learning_rate": 1.580672268907563e-05, "logits/chosen": -2.1001968383789062, "logits/rejected": -1.943385362625122, "logps/chosen": -367.1288757324219, "logps/rejected": -324.0970458984375, "loss": 0.5309, "rewards/accuracies": 0.75, "rewards/chosen": -2.123927593231201, "rewards/margins": 2.621354818344116, "rewards/rejected": -4.745282173156738, "step": 1018 }, { "epoch": 0.21, "learning_rate": 1.580252100840336e-05, "logits/chosen": -2.298964023590088, "logits/rejected": -1.869608998298645, "logps/chosen": -276.7232971191406, "logps/rejected": -284.82342529296875, "loss": 0.3035, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9368112087249756, "rewards/margins": 4.026775360107422, "rewards/rejected": -6.963586330413818, "step": 1019 }, { "epoch": 0.21, "learning_rate": 1.5798319327731094e-05, "logits/chosen": -2.269315719604492, "logits/rejected": -2.1161513328552246, "logps/chosen": -401.36480712890625, "logps/rejected": -375.71954345703125, "loss": 0.7493, "rewards/accuracies": 0.625, "rewards/chosen": -2.101378917694092, "rewards/margins": 2.2584598064422607, "rewards/rejected": -4.359838962554932, "step": 1020 }, { "epoch": 0.21, "learning_rate": 1.5794117647058824e-05, "logits/chosen": -1.9500479698181152, "logits/rejected": -1.7284438610076904, "logps/chosen": -309.60943603515625, "logps/rejected": -411.1970520019531, "loss": 0.301, "rewards/accuracies": 0.8125, "rewards/chosen": -1.672088623046875, "rewards/margins": 3.5757875442504883, "rewards/rejected": -5.247876167297363, "step": 1021 }, { "epoch": 0.21, "learning_rate": 1.5789915966386554e-05, "logits/chosen": -2.0553410053253174, "logits/rejected": -2.3254337310791016, "logps/chosen": -342.6885681152344, "logps/rejected": -358.798828125, "loss": 0.0862, "rewards/accuracies": 1.0, "rewards/chosen": -1.6520211696624756, "rewards/margins": 4.654041290283203, "rewards/rejected": -6.3060622215271, "step": 1022 }, { "epoch": 0.21, "learning_rate": 1.5785714285714288e-05, "logits/chosen": -2.1380250453948975, "logits/rejected": -2.264366388320923, "logps/chosen": -227.76400756835938, "logps/rejected": -280.9142761230469, "loss": 0.4298, "rewards/accuracies": 0.75, "rewards/chosen": -1.8462836742401123, "rewards/margins": 4.351341724395752, "rewards/rejected": -6.197626113891602, "step": 1023 }, { "epoch": 0.21, "learning_rate": 1.5781512605042018e-05, "logits/chosen": -2.2289586067199707, "logits/rejected": -1.7775155305862427, "logps/chosen": -439.2747802734375, "logps/rejected": -304.8201904296875, "loss": 0.1713, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3938897848129272, "rewards/margins": 4.525201320648193, "rewards/rejected": -5.919090747833252, "step": 1024 }, { "epoch": 0.21, "learning_rate": 1.577731092436975e-05, "logits/chosen": -2.3004164695739746, "logits/rejected": -2.111860752105713, "logps/chosen": -393.759521484375, "logps/rejected": -413.17742919921875, "loss": 0.272, "rewards/accuracies": 0.875, "rewards/chosen": -1.6322078704833984, "rewards/margins": 2.432725191116333, "rewards/rejected": -4.064932823181152, "step": 1025 }, { "epoch": 0.21, "learning_rate": 1.577310924369748e-05, "logits/chosen": -2.1183876991271973, "logits/rejected": -1.7869009971618652, "logps/chosen": -350.95098876953125, "logps/rejected": -358.25433349609375, "loss": 0.2333, "rewards/accuracies": 0.9375, "rewards/chosen": -1.558724045753479, "rewards/margins": 3.3837058544158936, "rewards/rejected": -4.942430019378662, "step": 1026 }, { "epoch": 0.21, "learning_rate": 1.5768907563025212e-05, "logits/chosen": -1.6044032573699951, "logits/rejected": -1.6205954551696777, "logps/chosen": -197.78744506835938, "logps/rejected": -260.8172912597656, "loss": 0.6223, "rewards/accuracies": 0.75, "rewards/chosen": -2.7520601749420166, "rewards/margins": 1.552250623703003, "rewards/rejected": -4.3043107986450195, "step": 1027 }, { "epoch": 0.22, "learning_rate": 1.5764705882352943e-05, "logits/chosen": -1.9246397018432617, "logits/rejected": -2.16696834564209, "logps/chosen": -293.6358947753906, "logps/rejected": -279.64666748046875, "loss": 0.5489, "rewards/accuracies": 0.75, "rewards/chosen": -2.2128446102142334, "rewards/margins": 2.0586791038513184, "rewards/rejected": -4.271523475646973, "step": 1028 }, { "epoch": 0.22, "learning_rate": 1.5760504201680673e-05, "logits/chosen": -2.089893341064453, "logits/rejected": -1.9854090213775635, "logps/chosen": -344.54998779296875, "logps/rejected": -339.42535400390625, "loss": 0.3752, "rewards/accuracies": 0.875, "rewards/chosen": -1.773606777191162, "rewards/margins": 2.2489991188049316, "rewards/rejected": -4.022605895996094, "step": 1029 }, { "epoch": 0.22, "learning_rate": 1.5756302521008403e-05, "logits/chosen": -2.307817220687866, "logits/rejected": -1.9461517333984375, "logps/chosen": -371.61962890625, "logps/rejected": -342.8824462890625, "loss": 0.1874, "rewards/accuracies": 0.875, "rewards/chosen": -1.6164699792861938, "rewards/margins": 3.5407934188842773, "rewards/rejected": -5.157263278961182, "step": 1030 }, { "epoch": 0.22, "learning_rate": 1.5752100840336137e-05, "logits/chosen": -2.0700907707214355, "logits/rejected": -2.295524835586548, "logps/chosen": -199.2720947265625, "logps/rejected": -293.9393005371094, "loss": 0.1716, "rewards/accuracies": 0.875, "rewards/chosen": -1.475959300994873, "rewards/margins": 3.9602999687194824, "rewards/rejected": -5.4362592697143555, "step": 1031 }, { "epoch": 0.22, "learning_rate": 1.5747899159663867e-05, "logits/chosen": -2.1870431900024414, "logits/rejected": -2.079000949859619, "logps/chosen": -279.5403137207031, "logps/rejected": -378.5204772949219, "loss": 0.3308, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5660508871078491, "rewards/margins": 2.89620304107666, "rewards/rejected": -4.462253570556641, "step": 1032 }, { "epoch": 0.22, "learning_rate": 1.5743697478991597e-05, "logits/chosen": -2.233264446258545, "logits/rejected": -2.1475863456726074, "logps/chosen": -280.5260009765625, "logps/rejected": -222.30731201171875, "loss": 0.4507, "rewards/accuracies": 0.6875, "rewards/chosen": -1.30723237991333, "rewards/margins": 1.550318956375122, "rewards/rejected": -2.857551336288452, "step": 1033 }, { "epoch": 0.22, "learning_rate": 1.5739495798319327e-05, "logits/chosen": -2.116415500640869, "logits/rejected": -1.9250054359436035, "logps/chosen": -351.7734680175781, "logps/rejected": -317.4046325683594, "loss": 0.4731, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3256168365478516, "rewards/margins": 2.4478559494018555, "rewards/rejected": -4.773472785949707, "step": 1034 }, { "epoch": 0.22, "learning_rate": 1.573529411764706e-05, "logits/chosen": -2.1922314167022705, "logits/rejected": -2.1391685009002686, "logps/chosen": -244.06134033203125, "logps/rejected": -289.43951416015625, "loss": 0.2559, "rewards/accuracies": 0.8125, "rewards/chosen": -1.726034164428711, "rewards/margins": 3.462398052215576, "rewards/rejected": -5.188432216644287, "step": 1035 }, { "epoch": 0.22, "learning_rate": 1.573109243697479e-05, "logits/chosen": -2.36611270904541, "logits/rejected": -1.842142105102539, "logps/chosen": -250.618896484375, "logps/rejected": -273.9358215332031, "loss": 0.376, "rewards/accuracies": 0.8125, "rewards/chosen": -1.614174246788025, "rewards/margins": 3.283245086669922, "rewards/rejected": -4.897418975830078, "step": 1036 }, { "epoch": 0.22, "learning_rate": 1.572689075630252e-05, "logits/chosen": -2.028487205505371, "logits/rejected": -1.9487860202789307, "logps/chosen": -339.63958740234375, "logps/rejected": -304.4535217285156, "loss": 0.4047, "rewards/accuracies": 0.75, "rewards/chosen": -1.5681397914886475, "rewards/margins": 3.165180206298828, "rewards/rejected": -4.733320236206055, "step": 1037 }, { "epoch": 0.22, "learning_rate": 1.572268907563025e-05, "logits/chosen": -2.0320651531219482, "logits/rejected": -2.364687204360962, "logps/chosen": -335.325927734375, "logps/rejected": -440.23577880859375, "loss": 0.4058, "rewards/accuracies": 0.75, "rewards/chosen": -1.7449978590011597, "rewards/margins": 3.087007999420166, "rewards/rejected": -4.832005977630615, "step": 1038 }, { "epoch": 0.22, "learning_rate": 1.5718487394957985e-05, "logits/chosen": -2.3069984912872314, "logits/rejected": -2.179272174835205, "logps/chosen": -444.83642578125, "logps/rejected": -415.9942626953125, "loss": 0.355, "rewards/accuracies": 0.75, "rewards/chosen": -1.5126739740371704, "rewards/margins": 4.149467468261719, "rewards/rejected": -5.662141799926758, "step": 1039 }, { "epoch": 0.22, "learning_rate": 1.5714285714285715e-05, "logits/chosen": -2.1814706325531006, "logits/rejected": -1.832313060760498, "logps/chosen": -437.3150634765625, "logps/rejected": -303.0058898925781, "loss": 0.5554, "rewards/accuracies": 0.875, "rewards/chosen": -1.6847037076950073, "rewards/margins": 2.7606427669525146, "rewards/rejected": -4.445346832275391, "step": 1040 }, { "epoch": 0.22, "learning_rate": 1.5710084033613446e-05, "logits/chosen": -2.351832866668701, "logits/rejected": -1.9153738021850586, "logps/chosen": -434.5226745605469, "logps/rejected": -359.20660400390625, "loss": 0.2545, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9174166321754456, "rewards/margins": 3.314091682434082, "rewards/rejected": -4.231508255004883, "step": 1041 }, { "epoch": 0.22, "learning_rate": 1.570588235294118e-05, "logits/chosen": -2.1452512741088867, "logits/rejected": -2.02679443359375, "logps/chosen": -278.96148681640625, "logps/rejected": -345.1434326171875, "loss": 0.2789, "rewards/accuracies": 0.8125, "rewards/chosen": -2.440082550048828, "rewards/margins": 3.3516077995300293, "rewards/rejected": -5.791690349578857, "step": 1042 }, { "epoch": 0.22, "learning_rate": 1.570168067226891e-05, "logits/chosen": -2.2455363273620605, "logits/rejected": -1.8577537536621094, "logps/chosen": -440.77410888671875, "logps/rejected": -400.5489501953125, "loss": 0.3369, "rewards/accuracies": 0.875, "rewards/chosen": -1.4547991752624512, "rewards/margins": 3.333770990371704, "rewards/rejected": -4.788569450378418, "step": 1043 }, { "epoch": 0.22, "learning_rate": 1.569747899159664e-05, "logits/chosen": -2.3846778869628906, "logits/rejected": -1.8157987594604492, "logps/chosen": -402.36468505859375, "logps/rejected": -338.8222961425781, "loss": 0.2073, "rewards/accuracies": 0.875, "rewards/chosen": -0.8999124765396118, "rewards/margins": 3.6617870330810547, "rewards/rejected": -4.561699390411377, "step": 1044 }, { "epoch": 0.22, "learning_rate": 1.569327731092437e-05, "logits/chosen": -2.145190715789795, "logits/rejected": -1.7300249338150024, "logps/chosen": -454.65838623046875, "logps/rejected": -367.63519287109375, "loss": 0.7194, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7534492015838623, "rewards/margins": 2.1044344902038574, "rewards/rejected": -4.857883930206299, "step": 1045 }, { "epoch": 0.22, "learning_rate": 1.5689075630252103e-05, "logits/chosen": -2.297982692718506, "logits/rejected": -1.787142276763916, "logps/chosen": -299.640869140625, "logps/rejected": -296.74981689453125, "loss": 0.338, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3258482217788696, "rewards/margins": 3.2778358459472656, "rewards/rejected": -4.603683948516846, "step": 1046 }, { "epoch": 0.22, "learning_rate": 1.5684873949579834e-05, "logits/chosen": -2.0597774982452393, "logits/rejected": -1.8493540287017822, "logps/chosen": -363.616943359375, "logps/rejected": -293.5428466796875, "loss": 0.5212, "rewards/accuracies": 0.75, "rewards/chosen": -1.7531907558441162, "rewards/margins": 2.836975336074829, "rewards/rejected": -4.590166091918945, "step": 1047 }, { "epoch": 0.22, "learning_rate": 1.5680672268907564e-05, "logits/chosen": -2.1516826152801514, "logits/rejected": -1.5491429567337036, "logps/chosen": -333.3130187988281, "logps/rejected": -262.74847412109375, "loss": 0.3446, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8804630041122437, "rewards/margins": 3.5086886882781982, "rewards/rejected": -5.389151573181152, "step": 1048 }, { "epoch": 0.22, "learning_rate": 1.5676470588235294e-05, "logits/chosen": -2.4076790809631348, "logits/rejected": -1.9318214654922485, "logps/chosen": -326.7567138671875, "logps/rejected": -362.8504333496094, "loss": 0.5094, "rewards/accuracies": 0.75, "rewards/chosen": -1.4532806873321533, "rewards/margins": 3.107287883758545, "rewards/rejected": -4.560568332672119, "step": 1049 }, { "epoch": 0.22, "learning_rate": 1.5672268907563028e-05, "logits/chosen": -2.1510233879089355, "logits/rejected": -1.7747035026550293, "logps/chosen": -413.5495910644531, "logps/rejected": -355.978271484375, "loss": 0.2436, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9846274852752686, "rewards/margins": 2.6355772018432617, "rewards/rejected": -4.620204448699951, "step": 1050 }, { "epoch": 0.22, "learning_rate": 1.5668067226890758e-05, "logits/chosen": -1.7494980096817017, "logits/rejected": -1.5653728246688843, "logps/chosen": -559.1565551757812, "logps/rejected": -329.03424072265625, "loss": 0.6958, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1177010536193848, "rewards/margins": 2.7193045616149902, "rewards/rejected": -5.837005615234375, "step": 1051 }, { "epoch": 0.22, "learning_rate": 1.5663865546218488e-05, "logits/chosen": -2.192047357559204, "logits/rejected": -2.125075340270996, "logps/chosen": -374.524658203125, "logps/rejected": -397.61907958984375, "loss": 0.0932, "rewards/accuracies": 1.0, "rewards/chosen": -1.144680380821228, "rewards/margins": 4.933655738830566, "rewards/rejected": -6.078336238861084, "step": 1052 }, { "epoch": 0.22, "learning_rate": 1.565966386554622e-05, "logits/chosen": -1.9797996282577515, "logits/rejected": -2.0858266353607178, "logps/chosen": -442.8431396484375, "logps/rejected": -421.2425537109375, "loss": 0.2757, "rewards/accuracies": 0.875, "rewards/chosen": -1.7781563997268677, "rewards/margins": 3.7903406620025635, "rewards/rejected": -5.5684967041015625, "step": 1053 }, { "epoch": 0.22, "learning_rate": 1.5655462184873952e-05, "logits/chosen": -2.343562602996826, "logits/rejected": -2.357333183288574, "logps/chosen": -225.52113342285156, "logps/rejected": -240.44729614257812, "loss": 0.29, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6451144218444824, "rewards/margins": 3.344367504119873, "rewards/rejected": -4.9894819259643555, "step": 1054 }, { "epoch": 0.22, "learning_rate": 1.5651260504201682e-05, "logits/chosen": -2.129213571548462, "logits/rejected": -1.8569040298461914, "logps/chosen": -333.76177978515625, "logps/rejected": -321.2130432128906, "loss": 0.2842, "rewards/accuracies": 0.875, "rewards/chosen": -1.8709126710891724, "rewards/margins": 2.6699986457824707, "rewards/rejected": -4.540911674499512, "step": 1055 }, { "epoch": 0.22, "learning_rate": 1.5647058823529412e-05, "logits/chosen": -2.449032783508301, "logits/rejected": -2.0392043590545654, "logps/chosen": -482.1714172363281, "logps/rejected": -445.0155029296875, "loss": 0.1242, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3811352252960205, "rewards/margins": 5.03152322769165, "rewards/rejected": -6.412657737731934, "step": 1056 }, { "epoch": 0.22, "learning_rate": 1.5642857142857143e-05, "logits/chosen": -2.445967197418213, "logits/rejected": -1.8239625692367554, "logps/chosen": -355.8022155761719, "logps/rejected": -265.74761962890625, "loss": 0.3162, "rewards/accuracies": 0.875, "rewards/chosen": -2.5941202640533447, "rewards/margins": 3.00582218170166, "rewards/rejected": -5.599942207336426, "step": 1057 }, { "epoch": 0.22, "learning_rate": 1.5638655462184876e-05, "logits/chosen": -2.1468396186828613, "logits/rejected": -2.0982284545898438, "logps/chosen": -420.23980712890625, "logps/rejected": -371.2124328613281, "loss": 0.1636, "rewards/accuracies": 0.875, "rewards/chosen": -1.441194772720337, "rewards/margins": 4.537558555603027, "rewards/rejected": -5.978753089904785, "step": 1058 }, { "epoch": 0.22, "learning_rate": 1.5634453781512606e-05, "logits/chosen": -2.0872554779052734, "logits/rejected": -1.7862269878387451, "logps/chosen": -353.5707092285156, "logps/rejected": -300.4449768066406, "loss": 0.1958, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6572359800338745, "rewards/margins": 3.3234400749206543, "rewards/rejected": -4.980676174163818, "step": 1059 }, { "epoch": 0.22, "learning_rate": 1.5630252100840337e-05, "logits/chosen": -2.22546124458313, "logits/rejected": -1.8920305967330933, "logps/chosen": -392.8912353515625, "logps/rejected": -421.10870361328125, "loss": 0.3166, "rewards/accuracies": 0.875, "rewards/chosen": -1.4793956279754639, "rewards/margins": 2.5840389728546143, "rewards/rejected": -4.063434600830078, "step": 1060 }, { "epoch": 0.22, "learning_rate": 1.5626050420168067e-05, "logits/chosen": -2.0951898097991943, "logits/rejected": -1.8646944761276245, "logps/chosen": -447.3456115722656, "logps/rejected": -400.37762451171875, "loss": 0.2695, "rewards/accuracies": 0.875, "rewards/chosen": -1.3893158435821533, "rewards/margins": 3.2697815895080566, "rewards/rejected": -4.659097671508789, "step": 1061 }, { "epoch": 0.22, "learning_rate": 1.56218487394958e-05, "logits/chosen": -1.9456130266189575, "logits/rejected": -2.1775624752044678, "logps/chosen": -281.8665771484375, "logps/rejected": -318.178466796875, "loss": 0.2409, "rewards/accuracies": 0.875, "rewards/chosen": -1.997543215751648, "rewards/margins": 3.2904200553894043, "rewards/rejected": -5.287962913513184, "step": 1062 }, { "epoch": 0.22, "learning_rate": 1.561764705882353e-05, "logits/chosen": -2.320709705352783, "logits/rejected": -2.3321659564971924, "logps/chosen": -312.359619140625, "logps/rejected": -337.9840087890625, "loss": 0.6249, "rewards/accuracies": 0.5625, "rewards/chosen": -2.15242075920105, "rewards/margins": 2.5110092163085938, "rewards/rejected": -4.6634297370910645, "step": 1063 }, { "epoch": 0.22, "learning_rate": 1.561344537815126e-05, "logits/chosen": -2.32196307182312, "logits/rejected": -2.2228965759277344, "logps/chosen": -264.49688720703125, "logps/rejected": -297.9734191894531, "loss": 0.3644, "rewards/accuracies": 0.875, "rewards/chosen": -2.4416754245758057, "rewards/margins": 3.9339993000030518, "rewards/rejected": -6.375675201416016, "step": 1064 }, { "epoch": 0.22, "learning_rate": 1.5609243697478995e-05, "logits/chosen": -2.125380516052246, "logits/rejected": -2.022549629211426, "logps/chosen": -374.00384521484375, "logps/rejected": -324.53106689453125, "loss": 0.1067, "rewards/accuracies": 1.0, "rewards/chosen": -1.3432464599609375, "rewards/margins": 4.418325901031494, "rewards/rejected": -5.76157283782959, "step": 1065 }, { "epoch": 0.22, "learning_rate": 1.5605042016806725e-05, "logits/chosen": -2.1170427799224854, "logits/rejected": -2.2868189811706543, "logps/chosen": -382.4078369140625, "logps/rejected": -346.73583984375, "loss": 0.5472, "rewards/accuracies": 0.75, "rewards/chosen": -1.7097530364990234, "rewards/margins": 2.548435926437378, "rewards/rejected": -4.2581892013549805, "step": 1066 }, { "epoch": 0.22, "learning_rate": 1.5600840336134455e-05, "logits/chosen": -2.2237319946289062, "logits/rejected": -1.9568169116973877, "logps/chosen": -358.5841369628906, "logps/rejected": -323.93072509765625, "loss": 0.5891, "rewards/accuracies": 0.6875, "rewards/chosen": -2.226712942123413, "rewards/margins": 1.9938093423843384, "rewards/rejected": -4.220521926879883, "step": 1067 }, { "epoch": 0.22, "learning_rate": 1.5596638655462185e-05, "logits/chosen": -2.117027759552002, "logits/rejected": -1.7533752918243408, "logps/chosen": -404.026611328125, "logps/rejected": -340.84881591796875, "loss": 0.3717, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6832351684570312, "rewards/margins": 4.267095565795898, "rewards/rejected": -6.95033073425293, "step": 1068 }, { "epoch": 0.22, "learning_rate": 1.559243697478992e-05, "logits/chosen": -2.1706459522247314, "logits/rejected": -2.1421070098876953, "logps/chosen": -332.73553466796875, "logps/rejected": -387.4233703613281, "loss": 0.1861, "rewards/accuracies": 0.9375, "rewards/chosen": -1.187776803970337, "rewards/margins": 2.5737130641937256, "rewards/rejected": -3.7614901065826416, "step": 1069 }, { "epoch": 0.22, "learning_rate": 1.558823529411765e-05, "logits/chosen": -1.998975157737732, "logits/rejected": -1.8555452823638916, "logps/chosen": -358.2004699707031, "logps/rejected": -402.6553955078125, "loss": 0.4058, "rewards/accuracies": 0.8125, "rewards/chosen": -1.752549648284912, "rewards/margins": 2.4785211086273193, "rewards/rejected": -4.231070518493652, "step": 1070 }, { "epoch": 0.22, "learning_rate": 1.558403361344538e-05, "logits/chosen": -2.1340579986572266, "logits/rejected": -1.657408595085144, "logps/chosen": -304.9349060058594, "logps/rejected": -401.2822265625, "loss": 0.0575, "rewards/accuracies": 1.0, "rewards/chosen": -1.1549335718154907, "rewards/margins": 4.3107476234436035, "rewards/rejected": -5.465681076049805, "step": 1071 }, { "epoch": 0.22, "learning_rate": 1.557983193277311e-05, "logits/chosen": -2.285947561264038, "logits/rejected": -1.7917628288269043, "logps/chosen": -302.83837890625, "logps/rejected": -272.8876953125, "loss": 0.1783, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3537662029266357, "rewards/margins": 3.5185294151306152, "rewards/rejected": -4.87229585647583, "step": 1072 }, { "epoch": 0.22, "learning_rate": 1.5575630252100843e-05, "logits/chosen": -2.2296924591064453, "logits/rejected": -1.904763102531433, "logps/chosen": -382.1318664550781, "logps/rejected": -358.8062438964844, "loss": 0.3233, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8686118125915527, "rewards/margins": 3.838479995727539, "rewards/rejected": -5.707091808319092, "step": 1073 }, { "epoch": 0.22, "learning_rate": 1.5571428571428573e-05, "logits/chosen": -2.313615322113037, "logits/rejected": -1.852065086364746, "logps/chosen": -360.0382080078125, "logps/rejected": -366.3542785644531, "loss": 0.4821, "rewards/accuracies": 0.75, "rewards/chosen": -2.0509731769561768, "rewards/margins": 2.7988686561584473, "rewards/rejected": -4.849842071533203, "step": 1074 }, { "epoch": 0.22, "learning_rate": 1.5567226890756304e-05, "logits/chosen": -2.3373594284057617, "logits/rejected": -1.7792909145355225, "logps/chosen": -413.07147216796875, "logps/rejected": -318.1133728027344, "loss": 0.5406, "rewards/accuracies": 0.6875, "rewards/chosen": -0.9407414197921753, "rewards/margins": 2.8012142181396484, "rewards/rejected": -3.741955518722534, "step": 1075 }, { "epoch": 0.23, "learning_rate": 1.5563025210084034e-05, "logits/chosen": -2.227137804031372, "logits/rejected": -1.7259658575057983, "logps/chosen": -266.745849609375, "logps/rejected": -220.77035522460938, "loss": 0.2263, "rewards/accuracies": 0.875, "rewards/chosen": -1.5586907863616943, "rewards/margins": 3.1512322425842285, "rewards/rejected": -4.709922790527344, "step": 1076 }, { "epoch": 0.23, "learning_rate": 1.5558823529411767e-05, "logits/chosen": -2.1326308250427246, "logits/rejected": -1.9316304922103882, "logps/chosen": -335.54302978515625, "logps/rejected": -338.352294921875, "loss": 0.4032, "rewards/accuracies": 0.875, "rewards/chosen": -1.738598346710205, "rewards/margins": 3.2073771953582764, "rewards/rejected": -4.9459757804870605, "step": 1077 }, { "epoch": 0.23, "learning_rate": 1.5554621848739498e-05, "logits/chosen": -2.1827735900878906, "logits/rejected": -1.9865981340408325, "logps/chosen": -247.40914916992188, "logps/rejected": -263.6688232421875, "loss": 0.2609, "rewards/accuracies": 0.875, "rewards/chosen": -1.8604538440704346, "rewards/margins": 3.1691832542419434, "rewards/rejected": -5.029636859893799, "step": 1078 }, { "epoch": 0.23, "learning_rate": 1.5550420168067228e-05, "logits/chosen": -2.3564889430999756, "logits/rejected": -1.8677804470062256, "logps/chosen": -338.5962829589844, "logps/rejected": -284.51513671875, "loss": 0.1204, "rewards/accuracies": 1.0, "rewards/chosen": -1.3590996265411377, "rewards/margins": 4.387566089630127, "rewards/rejected": -5.746665954589844, "step": 1079 }, { "epoch": 0.23, "learning_rate": 1.5546218487394958e-05, "logits/chosen": -2.318563222885132, "logits/rejected": -1.9634912014007568, "logps/chosen": -312.05987548828125, "logps/rejected": -288.6165771484375, "loss": 0.5744, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9886575937271118, "rewards/margins": 2.711333751678467, "rewards/rejected": -4.699991226196289, "step": 1080 }, { "epoch": 0.23, "learning_rate": 1.554201680672269e-05, "logits/chosen": -2.0526483058929443, "logits/rejected": -2.097865581512451, "logps/chosen": -259.4316101074219, "logps/rejected": -348.186279296875, "loss": 0.157, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3162293434143066, "rewards/margins": 4.7040276527404785, "rewards/rejected": -6.020256519317627, "step": 1081 }, { "epoch": 0.23, "learning_rate": 1.5537815126050422e-05, "logits/chosen": -2.038674831390381, "logits/rejected": -2.182054042816162, "logps/chosen": -304.2615661621094, "logps/rejected": -369.98541259765625, "loss": 0.2917, "rewards/accuracies": 0.8125, "rewards/chosen": -1.508150339126587, "rewards/margins": 3.3606181144714355, "rewards/rejected": -4.868768215179443, "step": 1082 }, { "epoch": 0.23, "learning_rate": 1.5533613445378152e-05, "logits/chosen": -2.2732157707214355, "logits/rejected": -2.1809120178222656, "logps/chosen": -198.60305786132812, "logps/rejected": -271.8071594238281, "loss": 0.0673, "rewards/accuracies": 1.0, "rewards/chosen": -1.9015932083129883, "rewards/margins": 4.2239227294921875, "rewards/rejected": -6.125515937805176, "step": 1083 }, { "epoch": 0.23, "learning_rate": 1.5529411764705882e-05, "logits/chosen": -1.7566792964935303, "logits/rejected": -1.9713528156280518, "logps/chosen": -295.0103759765625, "logps/rejected": -356.4507141113281, "loss": 0.2124, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9597657322883606, "rewards/margins": 3.1596593856811523, "rewards/rejected": -4.119425296783447, "step": 1084 }, { "epoch": 0.23, "learning_rate": 1.5525210084033616e-05, "logits/chosen": -2.1095163822174072, "logits/rejected": -1.8498040437698364, "logps/chosen": -180.953857421875, "logps/rejected": -244.48233032226562, "loss": 0.3407, "rewards/accuracies": 0.875, "rewards/chosen": -1.7570704221725464, "rewards/margins": 3.4634978771209717, "rewards/rejected": -5.2205681800842285, "step": 1085 }, { "epoch": 0.23, "learning_rate": 1.5521008403361346e-05, "logits/chosen": -2.217158317565918, "logits/rejected": -2.0744872093200684, "logps/chosen": -308.2737121582031, "logps/rejected": -311.7294921875, "loss": 0.2425, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1905388832092285, "rewards/margins": 3.3126914501190186, "rewards/rejected": -5.503230094909668, "step": 1086 }, { "epoch": 0.23, "learning_rate": 1.5516806722689076e-05, "logits/chosen": -2.2046380043029785, "logits/rejected": -1.9783538579940796, "logps/chosen": -298.5821533203125, "logps/rejected": -354.7811279296875, "loss": 0.2855, "rewards/accuracies": 0.875, "rewards/chosen": -1.4117244482040405, "rewards/margins": 3.6839473247528076, "rewards/rejected": -5.095672130584717, "step": 1087 }, { "epoch": 0.23, "learning_rate": 1.551260504201681e-05, "logits/chosen": -1.9239532947540283, "logits/rejected": -1.963545799255371, "logps/chosen": -263.43341064453125, "logps/rejected": -310.1040344238281, "loss": 0.4771, "rewards/accuracies": 0.875, "rewards/chosen": -1.7402842044830322, "rewards/margins": 3.6185336112976074, "rewards/rejected": -5.358818054199219, "step": 1088 }, { "epoch": 0.23, "learning_rate": 1.550840336134454e-05, "logits/chosen": -2.3074653148651123, "logits/rejected": -1.7325465679168701, "logps/chosen": -335.1184997558594, "logps/rejected": -249.3388214111328, "loss": 0.4232, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9868955612182617, "rewards/margins": 2.674745559692383, "rewards/rejected": -4.6616411209106445, "step": 1089 }, { "epoch": 0.23, "learning_rate": 1.550420168067227e-05, "logits/chosen": -2.0268568992614746, "logits/rejected": -2.0188844203948975, "logps/chosen": -339.70538330078125, "logps/rejected": -341.326904296875, "loss": 0.6173, "rewards/accuracies": 0.75, "rewards/chosen": -1.8043559789657593, "rewards/margins": 2.4153013229370117, "rewards/rejected": -4.2196574211120605, "step": 1090 }, { "epoch": 0.23, "learning_rate": 1.55e-05, "logits/chosen": -2.002237319946289, "logits/rejected": -1.7771501541137695, "logps/chosen": -371.6806945800781, "logps/rejected": -374.4664306640625, "loss": 0.1993, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1405192613601685, "rewards/margins": 4.472423076629639, "rewards/rejected": -5.612942218780518, "step": 1091 }, { "epoch": 0.23, "learning_rate": 1.5495798319327734e-05, "logits/chosen": -2.1593756675720215, "logits/rejected": -2.1301050186157227, "logps/chosen": -391.8299560546875, "logps/rejected": -381.370849609375, "loss": 0.6286, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7045612335205078, "rewards/margins": 2.896866798400879, "rewards/rejected": -4.601428031921387, "step": 1092 }, { "epoch": 0.23, "learning_rate": 1.5491596638655465e-05, "logits/chosen": -1.9469997882843018, "logits/rejected": -2.158026695251465, "logps/chosen": -262.4390563964844, "logps/rejected": -405.81622314453125, "loss": 0.2472, "rewards/accuracies": 0.875, "rewards/chosen": -2.338805675506592, "rewards/margins": 2.7942445278167725, "rewards/rejected": -5.133049964904785, "step": 1093 }, { "epoch": 0.23, "learning_rate": 1.5487394957983195e-05, "logits/chosen": -2.2205824851989746, "logits/rejected": -2.04874849319458, "logps/chosen": -324.73260498046875, "logps/rejected": -373.7428894042969, "loss": 0.2497, "rewards/accuracies": 0.9375, "rewards/chosen": -2.142815589904785, "rewards/margins": 3.312791585922241, "rewards/rejected": -5.455606937408447, "step": 1094 }, { "epoch": 0.23, "learning_rate": 1.5483193277310925e-05, "logits/chosen": -2.1143898963928223, "logits/rejected": -1.974482536315918, "logps/chosen": -287.5416259765625, "logps/rejected": -325.3800354003906, "loss": 0.2505, "rewards/accuracies": 0.875, "rewards/chosen": -2.365784168243408, "rewards/margins": 2.8378050327301025, "rewards/rejected": -5.203588962554932, "step": 1095 }, { "epoch": 0.23, "learning_rate": 1.547899159663866e-05, "logits/chosen": -1.9907307624816895, "logits/rejected": -1.8054124116897583, "logps/chosen": -381.383056640625, "logps/rejected": -390.400634765625, "loss": 0.2303, "rewards/accuracies": 0.875, "rewards/chosen": -1.6027462482452393, "rewards/margins": 3.7396140098571777, "rewards/rejected": -5.342360019683838, "step": 1096 }, { "epoch": 0.23, "learning_rate": 1.547478991596639e-05, "logits/chosen": -2.516204357147217, "logits/rejected": -2.2973971366882324, "logps/chosen": -380.7392272949219, "logps/rejected": -491.6644287109375, "loss": 0.3093, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6022915840148926, "rewards/margins": 2.3710646629333496, "rewards/rejected": -3.973356246948242, "step": 1097 }, { "epoch": 0.23, "learning_rate": 1.547058823529412e-05, "logits/chosen": -2.0064024925231934, "logits/rejected": -2.0810389518737793, "logps/chosen": -301.1163024902344, "logps/rejected": -320.45526123046875, "loss": 0.3922, "rewards/accuracies": 0.875, "rewards/chosen": -1.7084771394729614, "rewards/margins": 3.3058886528015137, "rewards/rejected": -5.014366149902344, "step": 1098 }, { "epoch": 0.23, "learning_rate": 1.546638655462185e-05, "logits/chosen": -2.1477789878845215, "logits/rejected": -1.7381205558776855, "logps/chosen": -280.3091125488281, "logps/rejected": -250.7599639892578, "loss": 0.3194, "rewards/accuracies": 0.75, "rewards/chosen": -2.1036622524261475, "rewards/margins": 2.9275271892547607, "rewards/rejected": -5.031189441680908, "step": 1099 }, { "epoch": 0.23, "learning_rate": 1.5462184873949583e-05, "logits/chosen": -2.0066843032836914, "logits/rejected": -1.875346302986145, "logps/chosen": -364.3885498046875, "logps/rejected": -331.80474853515625, "loss": 0.5688, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2363851070404053, "rewards/margins": 3.241832733154297, "rewards/rejected": -5.478217124938965, "step": 1100 }, { "epoch": 0.23, "learning_rate": 1.5457983193277313e-05, "logits/chosen": -2.2245278358459473, "logits/rejected": -2.165071725845337, "logps/chosen": -385.35992431640625, "logps/rejected": -378.7532958984375, "loss": 0.4057, "rewards/accuracies": 0.875, "rewards/chosen": -1.3071773052215576, "rewards/margins": 2.8613665103912354, "rewards/rejected": -4.168543815612793, "step": 1101 }, { "epoch": 0.23, "learning_rate": 1.5453781512605043e-05, "logits/chosen": -1.9166970252990723, "logits/rejected": -2.02241587638855, "logps/chosen": -191.66220092773438, "logps/rejected": -311.7247314453125, "loss": 0.1897, "rewards/accuracies": 0.875, "rewards/chosen": -1.5967204570770264, "rewards/margins": 4.551992416381836, "rewards/rejected": -6.148713111877441, "step": 1102 }, { "epoch": 0.23, "learning_rate": 1.5449579831932773e-05, "logits/chosen": -2.1552774906158447, "logits/rejected": -1.921294093132019, "logps/chosen": -367.08697509765625, "logps/rejected": -337.5814208984375, "loss": 0.5372, "rewards/accuracies": 0.75, "rewards/chosen": -1.5889697074890137, "rewards/margins": 3.4586124420166016, "rewards/rejected": -5.047582149505615, "step": 1103 }, { "epoch": 0.23, "learning_rate": 1.5445378151260507e-05, "logits/chosen": -2.3032045364379883, "logits/rejected": -2.0854313373565674, "logps/chosen": -330.4469299316406, "logps/rejected": -299.21771240234375, "loss": 0.4528, "rewards/accuracies": 0.75, "rewards/chosen": -1.7191451787948608, "rewards/margins": 3.301757335662842, "rewards/rejected": -5.020902633666992, "step": 1104 }, { "epoch": 0.23, "learning_rate": 1.5441176470588237e-05, "logits/chosen": -1.7520146369934082, "logits/rejected": -1.9824368953704834, "logps/chosen": -188.16159057617188, "logps/rejected": -291.055908203125, "loss": 0.5205, "rewards/accuracies": 0.6875, "rewards/chosen": -2.774923324584961, "rewards/margins": 2.4503207206726074, "rewards/rejected": -5.225244045257568, "step": 1105 }, { "epoch": 0.23, "learning_rate": 1.5436974789915968e-05, "logits/chosen": -2.1155741214752197, "logits/rejected": -2.234722852706909, "logps/chosen": -248.34115600585938, "logps/rejected": -326.1280517578125, "loss": 0.1846, "rewards/accuracies": 0.9375, "rewards/chosen": -0.901938796043396, "rewards/margins": 4.598575592041016, "rewards/rejected": -5.500514507293701, "step": 1106 }, { "epoch": 0.23, "learning_rate": 1.5432773109243698e-05, "logits/chosen": -2.0443203449249268, "logits/rejected": -1.9454327821731567, "logps/chosen": -246.5683135986328, "logps/rejected": -273.5752868652344, "loss": 0.9168, "rewards/accuracies": 0.5625, "rewards/chosen": -2.8990705013275146, "rewards/margins": 1.810771107673645, "rewards/rejected": -4.709841728210449, "step": 1107 }, { "epoch": 0.23, "learning_rate": 1.542857142857143e-05, "logits/chosen": -2.3644843101501465, "logits/rejected": -2.0450124740600586, "logps/chosen": -321.63824462890625, "logps/rejected": -299.8153381347656, "loss": 0.3858, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6680455207824707, "rewards/margins": 3.6659014225006104, "rewards/rejected": -5.333946704864502, "step": 1108 }, { "epoch": 0.23, "learning_rate": 1.542436974789916e-05, "logits/chosen": -2.1518239974975586, "logits/rejected": -1.5929746627807617, "logps/chosen": -379.90313720703125, "logps/rejected": -266.75, "loss": 0.4633, "rewards/accuracies": 0.6875, "rewards/chosen": -0.8649249076843262, "rewards/margins": 2.570263385772705, "rewards/rejected": -3.4351882934570312, "step": 1109 }, { "epoch": 0.23, "learning_rate": 1.5420168067226892e-05, "logits/chosen": -2.2537670135498047, "logits/rejected": -1.780888319015503, "logps/chosen": -324.98681640625, "logps/rejected": -354.29119873046875, "loss": 0.1541, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0924406051635742, "rewards/margins": 3.7305901050567627, "rewards/rejected": -4.823030471801758, "step": 1110 }, { "epoch": 0.23, "learning_rate": 1.5415966386554625e-05, "logits/chosen": -2.037766695022583, "logits/rejected": -1.8503694534301758, "logps/chosen": -229.6671905517578, "logps/rejected": -244.55088806152344, "loss": 0.3077, "rewards/accuracies": 0.875, "rewards/chosen": -1.7889714241027832, "rewards/margins": 3.404996633529663, "rewards/rejected": -5.193967819213867, "step": 1111 }, { "epoch": 0.23, "learning_rate": 1.5411764705882356e-05, "logits/chosen": -1.98320734500885, "logits/rejected": -1.9117662906646729, "logps/chosen": -227.1378631591797, "logps/rejected": -248.35687255859375, "loss": 0.4428, "rewards/accuracies": 0.75, "rewards/chosen": -1.5258548259735107, "rewards/margins": 2.148599147796631, "rewards/rejected": -3.6744537353515625, "step": 1112 }, { "epoch": 0.23, "learning_rate": 1.5407563025210086e-05, "logits/chosen": -2.256152868270874, "logits/rejected": -2.0154685974121094, "logps/chosen": -336.7105712890625, "logps/rejected": -313.89361572265625, "loss": 0.1727, "rewards/accuracies": 0.9375, "rewards/chosen": -1.451967477798462, "rewards/margins": 3.9846582412719727, "rewards/rejected": -5.4366254806518555, "step": 1113 }, { "epoch": 0.23, "learning_rate": 1.5403361344537816e-05, "logits/chosen": -2.311757802963257, "logits/rejected": -1.9487649202346802, "logps/chosen": -297.34844970703125, "logps/rejected": -255.541015625, "loss": 0.2071, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2653576135635376, "rewards/margins": 4.1422038078308105, "rewards/rejected": -5.407561302185059, "step": 1114 }, { "epoch": 0.23, "learning_rate": 1.539915966386555e-05, "logits/chosen": -2.1543688774108887, "logits/rejected": -1.8869092464447021, "logps/chosen": -375.5109558105469, "logps/rejected": -310.30157470703125, "loss": 0.4776, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1006371974945068, "rewards/margins": 3.3518919944763184, "rewards/rejected": -4.452528953552246, "step": 1115 }, { "epoch": 0.23, "learning_rate": 1.539495798319328e-05, "logits/chosen": -2.2909069061279297, "logits/rejected": -2.140662431716919, "logps/chosen": -333.17913818359375, "logps/rejected": -360.0805358886719, "loss": 0.0911, "rewards/accuracies": 1.0, "rewards/chosen": -1.4079232215881348, "rewards/margins": 5.112290382385254, "rewards/rejected": -6.520214080810547, "step": 1116 }, { "epoch": 0.23, "learning_rate": 1.539075630252101e-05, "logits/chosen": -2.166163682937622, "logits/rejected": -1.9165594577789307, "logps/chosen": -392.78741455078125, "logps/rejected": -381.2095947265625, "loss": 0.4071, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1723179817199707, "rewards/margins": 2.4729695320129395, "rewards/rejected": -3.6452877521514893, "step": 1117 }, { "epoch": 0.23, "learning_rate": 1.538655462184874e-05, "logits/chosen": -2.211686611175537, "logits/rejected": -1.7472550868988037, "logps/chosen": -389.500732421875, "logps/rejected": -333.60577392578125, "loss": 0.2015, "rewards/accuracies": 0.875, "rewards/chosen": -1.303159475326538, "rewards/margins": 3.5912461280822754, "rewards/rejected": -4.894405364990234, "step": 1118 }, { "epoch": 0.23, "learning_rate": 1.5382352941176474e-05, "logits/chosen": -2.27036714553833, "logits/rejected": -1.9544291496276855, "logps/chosen": -320.41552734375, "logps/rejected": -291.5990905761719, "loss": 0.3973, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4500982761383057, "rewards/margins": 2.831218719482422, "rewards/rejected": -4.281316757202148, "step": 1119 }, { "epoch": 0.23, "learning_rate": 1.5378151260504204e-05, "logits/chosen": -2.2146658897399902, "logits/rejected": -1.7627410888671875, "logps/chosen": -249.08782958984375, "logps/rejected": -220.69442749023438, "loss": 0.2997, "rewards/accuracies": 0.875, "rewards/chosen": -1.7283214330673218, "rewards/margins": 2.3475148677825928, "rewards/rejected": -4.075836181640625, "step": 1120 }, { "epoch": 0.23, "learning_rate": 1.5373949579831934e-05, "logits/chosen": -2.0229198932647705, "logits/rejected": -2.1845083236694336, "logps/chosen": -351.8758239746094, "logps/rejected": -332.13922119140625, "loss": 0.3351, "rewards/accuracies": 0.875, "rewards/chosen": -1.4682971239089966, "rewards/margins": 3.523670196533203, "rewards/rejected": -4.99196720123291, "step": 1121 }, { "epoch": 0.23, "learning_rate": 1.5369747899159665e-05, "logits/chosen": -2.1117653846740723, "logits/rejected": -1.7490055561065674, "logps/chosen": -313.01495361328125, "logps/rejected": -305.625732421875, "loss": 0.3378, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8274101614952087, "rewards/margins": 3.156586170196533, "rewards/rejected": -3.9839963912963867, "step": 1122 }, { "epoch": 0.23, "learning_rate": 1.5365546218487398e-05, "logits/chosen": -2.1178884506225586, "logits/rejected": -2.2283873558044434, "logps/chosen": -216.9889678955078, "logps/rejected": -223.59548950195312, "loss": 0.4544, "rewards/accuracies": 0.875, "rewards/chosen": -1.624551773071289, "rewards/margins": 2.3504786491394043, "rewards/rejected": -3.9750306606292725, "step": 1123 }, { "epoch": 0.24, "learning_rate": 1.536134453781513e-05, "logits/chosen": -1.6962761878967285, "logits/rejected": -1.551252841949463, "logps/chosen": -290.80126953125, "logps/rejected": -290.831298828125, "loss": 0.2137, "rewards/accuracies": 0.875, "rewards/chosen": -1.2125012874603271, "rewards/margins": 4.068419933319092, "rewards/rejected": -5.280921459197998, "step": 1124 }, { "epoch": 0.24, "learning_rate": 1.535714285714286e-05, "logits/chosen": -2.1728169918060303, "logits/rejected": -1.9521615505218506, "logps/chosen": -265.2166442871094, "logps/rejected": -225.73260498046875, "loss": 0.235, "rewards/accuracies": 0.875, "rewards/chosen": -1.0623376369476318, "rewards/margins": 2.468724012374878, "rewards/rejected": -3.531061887741089, "step": 1125 }, { "epoch": 0.24, "learning_rate": 1.535294117647059e-05, "logits/chosen": -2.416719675064087, "logits/rejected": -1.825348973274231, "logps/chosen": -434.5524597167969, "logps/rejected": -350.5001220703125, "loss": 0.6002, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3969985246658325, "rewards/margins": 2.2322611808776855, "rewards/rejected": -3.6292595863342285, "step": 1126 }, { "epoch": 0.24, "learning_rate": 1.5348739495798323e-05, "logits/chosen": -1.4729070663452148, "logits/rejected": -1.7964214086532593, "logps/chosen": -240.53033447265625, "logps/rejected": -335.87872314453125, "loss": 0.1162, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3010525703430176, "rewards/margins": 5.342162132263184, "rewards/rejected": -6.643215656280518, "step": 1127 }, { "epoch": 0.24, "learning_rate": 1.5344537815126053e-05, "logits/chosen": -2.0500781536102295, "logits/rejected": -1.7759275436401367, "logps/chosen": -379.8785400390625, "logps/rejected": -381.34393310546875, "loss": 0.2684, "rewards/accuracies": 0.875, "rewards/chosen": -1.3641512393951416, "rewards/margins": 3.6229872703552246, "rewards/rejected": -4.987138748168945, "step": 1128 }, { "epoch": 0.24, "learning_rate": 1.5340336134453783e-05, "logits/chosen": -2.2796554565429688, "logits/rejected": -1.5617339611053467, "logps/chosen": -342.2637023925781, "logps/rejected": -270.35797119140625, "loss": 0.2627, "rewards/accuracies": 0.875, "rewards/chosen": -1.3469939231872559, "rewards/margins": 3.1188435554504395, "rewards/rejected": -4.465837478637695, "step": 1129 }, { "epoch": 0.24, "learning_rate": 1.5336134453781513e-05, "logits/chosen": -1.9529991149902344, "logits/rejected": -1.8362208604812622, "logps/chosen": -280.33538818359375, "logps/rejected": -325.3298034667969, "loss": 0.2998, "rewards/accuracies": 0.875, "rewards/chosen": -0.9795182943344116, "rewards/margins": 1.8745293617248535, "rewards/rejected": -2.8540475368499756, "step": 1130 }, { "epoch": 0.24, "learning_rate": 1.5331932773109247e-05, "logits/chosen": -2.2299227714538574, "logits/rejected": -1.8161027431488037, "logps/chosen": -443.1059265136719, "logps/rejected": -369.3968505859375, "loss": 0.2148, "rewards/accuracies": 0.875, "rewards/chosen": -0.9800748825073242, "rewards/margins": 3.3618011474609375, "rewards/rejected": -4.341876029968262, "step": 1131 }, { "epoch": 0.24, "learning_rate": 1.5327731092436977e-05, "logits/chosen": -1.9197090864181519, "logits/rejected": -1.7479817867279053, "logps/chosen": -218.59765625, "logps/rejected": -264.2467041015625, "loss": 0.2305, "rewards/accuracies": 0.875, "rewards/chosen": -1.237673044204712, "rewards/margins": 4.283493518829346, "rewards/rejected": -5.5211663246154785, "step": 1132 }, { "epoch": 0.24, "learning_rate": 1.5323529411764707e-05, "logits/chosen": -1.852963924407959, "logits/rejected": -2.3476314544677734, "logps/chosen": -232.69512939453125, "logps/rejected": -415.89324951171875, "loss": 0.4439, "rewards/accuracies": 0.75, "rewards/chosen": -1.7668110132217407, "rewards/margins": 2.7833516597747803, "rewards/rejected": -4.5501627922058105, "step": 1133 }, { "epoch": 0.24, "learning_rate": 1.531932773109244e-05, "logits/chosen": -2.0508055686950684, "logits/rejected": -1.5809041261672974, "logps/chosen": -332.0795593261719, "logps/rejected": -310.4665832519531, "loss": 0.3461, "rewards/accuracies": 0.875, "rewards/chosen": -1.9561271667480469, "rewards/margins": 3.4839415550231934, "rewards/rejected": -5.440068244934082, "step": 1134 }, { "epoch": 0.24, "learning_rate": 1.531512605042017e-05, "logits/chosen": -1.9303784370422363, "logits/rejected": -1.9079740047454834, "logps/chosen": -276.6993103027344, "logps/rejected": -321.5356750488281, "loss": 0.2891, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3192062377929688, "rewards/margins": 4.3332109451293945, "rewards/rejected": -6.652416706085205, "step": 1135 }, { "epoch": 0.24, "learning_rate": 1.5310924369747898e-05, "logits/chosen": -2.265953302383423, "logits/rejected": -2.0183496475219727, "logps/chosen": -363.24395751953125, "logps/rejected": -329.00091552734375, "loss": 0.4092, "rewards/accuracies": 0.875, "rewards/chosen": -0.8615310788154602, "rewards/margins": 2.2970175743103027, "rewards/rejected": -3.1585488319396973, "step": 1136 }, { "epoch": 0.24, "learning_rate": 1.530672268907563e-05, "logits/chosen": -2.182778835296631, "logits/rejected": -1.8062870502471924, "logps/chosen": -290.48028564453125, "logps/rejected": -242.54159545898438, "loss": 0.463, "rewards/accuracies": 0.75, "rewards/chosen": -1.1942212581634521, "rewards/margins": 1.5299921035766602, "rewards/rejected": -2.7242136001586914, "step": 1137 }, { "epoch": 0.24, "learning_rate": 1.5302521008403362e-05, "logits/chosen": -2.180172920227051, "logits/rejected": -1.9833571910858154, "logps/chosen": -300.0404968261719, "logps/rejected": -311.17547607421875, "loss": 0.3538, "rewards/accuracies": 0.9375, "rewards/chosen": -1.848576545715332, "rewards/margins": 2.6968636512756348, "rewards/rejected": -4.545440673828125, "step": 1138 }, { "epoch": 0.24, "learning_rate": 1.5298319327731092e-05, "logits/chosen": -1.848240613937378, "logits/rejected": -1.7186464071273804, "logps/chosen": -291.4215393066406, "logps/rejected": -259.6919860839844, "loss": 0.6207, "rewards/accuracies": 0.625, "rewards/chosen": -2.0645060539245605, "rewards/margins": 1.3446435928344727, "rewards/rejected": -3.409149646759033, "step": 1139 }, { "epoch": 0.24, "learning_rate": 1.5294117647058822e-05, "logits/chosen": -2.2319459915161133, "logits/rejected": -2.116305351257324, "logps/chosen": -285.8015441894531, "logps/rejected": -268.8495788574219, "loss": 0.4726, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2956557273864746, "rewards/margins": 1.930763840675354, "rewards/rejected": -3.226419687271118, "step": 1140 }, { "epoch": 0.24, "learning_rate": 1.5289915966386556e-05, "logits/chosen": -2.074985980987549, "logits/rejected": -1.5709457397460938, "logps/chosen": -317.5798034667969, "logps/rejected": -310.46435546875, "loss": 0.3275, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7155604362487793, "rewards/margins": 2.4036169052124023, "rewards/rejected": -4.119177341461182, "step": 1141 }, { "epoch": 0.24, "learning_rate": 1.5285714285714286e-05, "logits/chosen": -1.925581455230713, "logits/rejected": -1.942786455154419, "logps/chosen": -377.4695739746094, "logps/rejected": -359.2512512207031, "loss": 0.1962, "rewards/accuracies": 0.875, "rewards/chosen": -0.6716963648796082, "rewards/margins": 3.584994316101074, "rewards/rejected": -4.256690979003906, "step": 1142 }, { "epoch": 0.24, "learning_rate": 1.5281512605042016e-05, "logits/chosen": -2.1712486743927, "logits/rejected": -2.071918487548828, "logps/chosen": -304.91314697265625, "logps/rejected": -350.95379638671875, "loss": 0.5103, "rewards/accuracies": 0.875, "rewards/chosen": -1.2292113304138184, "rewards/margins": 2.801612377166748, "rewards/rejected": -4.030823707580566, "step": 1143 }, { "epoch": 0.24, "learning_rate": 1.527731092436975e-05, "logits/chosen": -2.0356624126434326, "logits/rejected": -1.598527193069458, "logps/chosen": -289.5799255371094, "logps/rejected": -371.9056091308594, "loss": 0.087, "rewards/accuracies": 1.0, "rewards/chosen": -0.6706043481826782, "rewards/margins": 4.31977653503418, "rewards/rejected": -4.990380764007568, "step": 1144 }, { "epoch": 0.24, "learning_rate": 1.527310924369748e-05, "logits/chosen": -2.3796591758728027, "logits/rejected": -1.9921081066131592, "logps/chosen": -256.31146240234375, "logps/rejected": -307.0758056640625, "loss": 0.4264, "rewards/accuracies": 0.75, "rewards/chosen": -1.6932662725448608, "rewards/margins": 3.287510871887207, "rewards/rejected": -4.980776786804199, "step": 1145 }, { "epoch": 0.24, "learning_rate": 1.526890756302521e-05, "logits/chosen": -2.0256996154785156, "logits/rejected": -1.9255012273788452, "logps/chosen": -170.7609405517578, "logps/rejected": -289.58856201171875, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -1.6248992681503296, "rewards/margins": 3.564387083053589, "rewards/rejected": -5.189286231994629, "step": 1146 }, { "epoch": 0.24, "learning_rate": 1.526470588235294e-05, "logits/chosen": -2.3289265632629395, "logits/rejected": -2.098623752593994, "logps/chosen": -363.14874267578125, "logps/rejected": -376.38555908203125, "loss": 0.296, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9599385857582092, "rewards/margins": 2.7599925994873047, "rewards/rejected": -3.719931125640869, "step": 1147 }, { "epoch": 0.24, "learning_rate": 1.5260504201680674e-05, "logits/chosen": -2.3561441898345947, "logits/rejected": -1.7958621978759766, "logps/chosen": -361.17633056640625, "logps/rejected": -386.96893310546875, "loss": 0.1898, "rewards/accuracies": 0.875, "rewards/chosen": -1.0192713737487793, "rewards/margins": 3.683149814605713, "rewards/rejected": -4.702421188354492, "step": 1148 }, { "epoch": 0.24, "learning_rate": 1.5256302521008404e-05, "logits/chosen": -1.8521263599395752, "logits/rejected": -2.0047295093536377, "logps/chosen": -198.44480895996094, "logps/rejected": -304.13226318359375, "loss": 0.3763, "rewards/accuracies": 0.75, "rewards/chosen": -1.4949949979782104, "rewards/margins": 2.9132418632507324, "rewards/rejected": -4.408236980438232, "step": 1149 }, { "epoch": 0.24, "learning_rate": 1.5252100840336135e-05, "logits/chosen": -2.2550265789031982, "logits/rejected": -1.8941617012023926, "logps/chosen": -433.40771484375, "logps/rejected": -396.194580078125, "loss": 0.0882, "rewards/accuracies": 1.0, "rewards/chosen": -0.745537281036377, "rewards/margins": 4.053267955780029, "rewards/rejected": -4.798805236816406, "step": 1150 }, { "epoch": 0.24, "learning_rate": 1.5247899159663866e-05, "logits/chosen": -2.058539867401123, "logits/rejected": -1.7930188179016113, "logps/chosen": -335.4761962890625, "logps/rejected": -344.7674865722656, "loss": 0.5355, "rewards/accuracies": 0.75, "rewards/chosen": -1.1005860567092896, "rewards/margins": 2.015758514404297, "rewards/rejected": -3.116344451904297, "step": 1151 }, { "epoch": 0.24, "learning_rate": 1.5243697478991597e-05, "logits/chosen": -2.295010805130005, "logits/rejected": -1.729496955871582, "logps/chosen": -253.00033569335938, "logps/rejected": -252.05328369140625, "loss": 0.3484, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1418451070785522, "rewards/margins": 3.6182286739349365, "rewards/rejected": -4.760074138641357, "step": 1152 }, { "epoch": 0.24, "learning_rate": 1.5239495798319329e-05, "logits/chosen": -2.195406436920166, "logits/rejected": -1.7860475778579712, "logps/chosen": -363.5596618652344, "logps/rejected": -372.95013427734375, "loss": 0.2198, "rewards/accuracies": 0.875, "rewards/chosen": -0.5104256868362427, "rewards/margins": 3.341014862060547, "rewards/rejected": -3.8514404296875, "step": 1153 }, { "epoch": 0.24, "learning_rate": 1.5235294117647059e-05, "logits/chosen": -2.098393440246582, "logits/rejected": -2.1312763690948486, "logps/chosen": -389.659423828125, "logps/rejected": -314.89373779296875, "loss": 0.263, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4257755279541016, "rewards/margins": 2.055939197540283, "rewards/rejected": -3.4817147254943848, "step": 1154 }, { "epoch": 0.24, "learning_rate": 1.523109243697479e-05, "logits/chosen": -2.2290303707122803, "logits/rejected": -2.139991283416748, "logps/chosen": -226.84371948242188, "logps/rejected": -317.8481140136719, "loss": 0.1434, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7654358148574829, "rewards/margins": 4.401028633117676, "rewards/rejected": -5.166464805603027, "step": 1155 }, { "epoch": 0.24, "learning_rate": 1.5226890756302521e-05, "logits/chosen": -1.8731436729431152, "logits/rejected": -2.31772518157959, "logps/chosen": -262.9060974121094, "logps/rejected": -360.10992431640625, "loss": 0.2233, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6219669580459595, "rewards/margins": 4.034126281738281, "rewards/rejected": -5.656093120574951, "step": 1156 }, { "epoch": 0.24, "learning_rate": 1.5222689075630253e-05, "logits/chosen": -2.472644090652466, "logits/rejected": -2.0184128284454346, "logps/chosen": -368.617919921875, "logps/rejected": -336.496337890625, "loss": 0.1419, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1643491983413696, "rewards/margins": 4.170035362243652, "rewards/rejected": -5.334384441375732, "step": 1157 }, { "epoch": 0.24, "learning_rate": 1.5218487394957983e-05, "logits/chosen": -2.2415294647216797, "logits/rejected": -1.8104461431503296, "logps/chosen": -399.25042724609375, "logps/rejected": -375.8715515136719, "loss": 0.2422, "rewards/accuracies": 0.875, "rewards/chosen": -1.0566871166229248, "rewards/margins": 3.323225736618042, "rewards/rejected": -4.379912853240967, "step": 1158 }, { "epoch": 0.24, "learning_rate": 1.5214285714285715e-05, "logits/chosen": -2.205252170562744, "logits/rejected": -1.8324387073516846, "logps/chosen": -359.388671875, "logps/rejected": -363.02655029296875, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -2.436235189437866, "rewards/margins": 3.160069704055786, "rewards/rejected": -5.596304893493652, "step": 1159 }, { "epoch": 0.24, "learning_rate": 1.5210084033613445e-05, "logits/chosen": -2.1195058822631836, "logits/rejected": -1.8476227521896362, "logps/chosen": -403.9994201660156, "logps/rejected": -294.9276123046875, "loss": 0.4712, "rewards/accuracies": 0.6875, "rewards/chosen": -1.329033613204956, "rewards/margins": 3.0810399055480957, "rewards/rejected": -4.410073757171631, "step": 1160 }, { "epoch": 0.24, "learning_rate": 1.5205882352941177e-05, "logits/chosen": -2.224428176879883, "logits/rejected": -1.8038300275802612, "logps/chosen": -262.11114501953125, "logps/rejected": -295.90057373046875, "loss": 0.3352, "rewards/accuracies": 0.75, "rewards/chosen": -2.1146607398986816, "rewards/margins": 3.3923401832580566, "rewards/rejected": -5.507000923156738, "step": 1161 }, { "epoch": 0.24, "learning_rate": 1.5201680672268907e-05, "logits/chosen": -1.646331548690796, "logits/rejected": -2.291196346282959, "logps/chosen": -98.36325073242188, "logps/rejected": -234.84335327148438, "loss": 0.2678, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7265188694000244, "rewards/margins": 3.967656135559082, "rewards/rejected": -5.6941752433776855, "step": 1162 }, { "epoch": 0.24, "learning_rate": 1.519747899159664e-05, "logits/chosen": -2.0785071849823, "logits/rejected": -2.061446189880371, "logps/chosen": -359.7589111328125, "logps/rejected": -330.1527404785156, "loss": 0.2576, "rewards/accuracies": 0.875, "rewards/chosen": -1.2150859832763672, "rewards/margins": 3.0104217529296875, "rewards/rejected": -4.225507736206055, "step": 1163 }, { "epoch": 0.24, "learning_rate": 1.519327731092437e-05, "logits/chosen": -1.7263665199279785, "logits/rejected": -2.04426646232605, "logps/chosen": -196.4629364013672, "logps/rejected": -380.02984619140625, "loss": 0.1058, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4911531209945679, "rewards/margins": 5.467153072357178, "rewards/rejected": -6.958306312561035, "step": 1164 }, { "epoch": 0.24, "learning_rate": 1.5189075630252101e-05, "logits/chosen": -2.488992929458618, "logits/rejected": -1.8200385570526123, "logps/chosen": -383.03936767578125, "logps/rejected": -298.67730712890625, "loss": 0.2467, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3275595903396606, "rewards/margins": 4.3523712158203125, "rewards/rejected": -5.679930686950684, "step": 1165 }, { "epoch": 0.24, "learning_rate": 1.5184873949579833e-05, "logits/chosen": -1.8770177364349365, "logits/rejected": -1.548773169517517, "logps/chosen": -245.68344116210938, "logps/rejected": -237.56875610351562, "loss": 0.4612, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7806426286697388, "rewards/margins": 1.6030175685882568, "rewards/rejected": -3.383660316467285, "step": 1166 }, { "epoch": 0.24, "learning_rate": 1.5180672268907564e-05, "logits/chosen": -2.0789072513580322, "logits/rejected": -2.193075656890869, "logps/chosen": -325.4642333984375, "logps/rejected": -434.20294189453125, "loss": 0.2357, "rewards/accuracies": 0.875, "rewards/chosen": -1.9273462295532227, "rewards/margins": 3.5224170684814453, "rewards/rejected": -5.449763298034668, "step": 1167 }, { "epoch": 0.24, "learning_rate": 1.5176470588235295e-05, "logits/chosen": -1.8905764818191528, "logits/rejected": -1.5760489702224731, "logps/chosen": -287.21142578125, "logps/rejected": -286.29345703125, "loss": 0.2064, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5765068531036377, "rewards/margins": 4.297512531280518, "rewards/rejected": -5.874019622802734, "step": 1168 }, { "epoch": 0.24, "learning_rate": 1.5172268907563026e-05, "logits/chosen": -2.0364441871643066, "logits/rejected": -2.1332294940948486, "logps/chosen": -324.4705505371094, "logps/rejected": -291.461669921875, "loss": 0.1753, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2496651411056519, "rewards/margins": 4.754192352294922, "rewards/rejected": -6.003857612609863, "step": 1169 }, { "epoch": 0.24, "learning_rate": 1.5168067226890758e-05, "logits/chosen": -1.92880117893219, "logits/rejected": -2.3277297019958496, "logps/chosen": -315.2311096191406, "logps/rejected": -323.5077819824219, "loss": 0.2561, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5292918682098389, "rewards/margins": 4.4300312995910645, "rewards/rejected": -5.959322929382324, "step": 1170 }, { "epoch": 0.24, "learning_rate": 1.5163865546218488e-05, "logits/chosen": -2.1607794761657715, "logits/rejected": -1.8354965448379517, "logps/chosen": -385.51019287109375, "logps/rejected": -333.3912353515625, "loss": 0.299, "rewards/accuracies": 0.8125, "rewards/chosen": -2.22383975982666, "rewards/margins": 4.075891494750977, "rewards/rejected": -6.299731254577637, "step": 1171 }, { "epoch": 0.25, "learning_rate": 1.515966386554622e-05, "logits/chosen": -2.104717254638672, "logits/rejected": -2.279975652694702, "logps/chosen": -207.76742553710938, "logps/rejected": -389.9500732421875, "loss": 0.3505, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9203474521636963, "rewards/margins": 3.584094762802124, "rewards/rejected": -5.50444221496582, "step": 1172 }, { "epoch": 0.25, "learning_rate": 1.515546218487395e-05, "logits/chosen": -2.137160062789917, "logits/rejected": -2.1771063804626465, "logps/chosen": -343.55914306640625, "logps/rejected": -384.8305969238281, "loss": 0.2173, "rewards/accuracies": 0.875, "rewards/chosen": -1.2285467386245728, "rewards/margins": 4.091034412384033, "rewards/rejected": -5.319580554962158, "step": 1173 }, { "epoch": 0.25, "learning_rate": 1.5151260504201682e-05, "logits/chosen": -2.150630474090576, "logits/rejected": -2.2648842334747314, "logps/chosen": -374.4417724609375, "logps/rejected": -381.836669921875, "loss": 0.43, "rewards/accuracies": 0.75, "rewards/chosen": -1.69371497631073, "rewards/margins": 2.7535622119903564, "rewards/rejected": -4.447277069091797, "step": 1174 }, { "epoch": 0.25, "learning_rate": 1.5147058823529412e-05, "logits/chosen": -2.009212017059326, "logits/rejected": -1.942438006401062, "logps/chosen": -305.95404052734375, "logps/rejected": -310.2395324707031, "loss": 0.2136, "rewards/accuracies": 0.875, "rewards/chosen": -1.851820468902588, "rewards/margins": 4.212376117706299, "rewards/rejected": -6.064196586608887, "step": 1175 }, { "epoch": 0.25, "learning_rate": 1.5142857142857144e-05, "logits/chosen": -2.0716683864593506, "logits/rejected": -1.8512918949127197, "logps/chosen": -341.6034240722656, "logps/rejected": -356.722412109375, "loss": 0.1297, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0413408279418945, "rewards/margins": 4.149041652679443, "rewards/rejected": -6.190382480621338, "step": 1176 }, { "epoch": 0.25, "learning_rate": 1.5138655462184874e-05, "logits/chosen": -1.8365814685821533, "logits/rejected": -1.7843756675720215, "logps/chosen": -396.0382385253906, "logps/rejected": -394.5455627441406, "loss": 0.2291, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4077531099319458, "rewards/margins": 3.8222298622131348, "rewards/rejected": -5.229983329772949, "step": 1177 }, { "epoch": 0.25, "learning_rate": 1.5134453781512606e-05, "logits/chosen": -1.930666208267212, "logits/rejected": -1.7580417394638062, "logps/chosen": -310.0386962890625, "logps/rejected": -384.09393310546875, "loss": 0.4902, "rewards/accuracies": 0.875, "rewards/chosen": -2.8741931915283203, "rewards/margins": 3.8421387672424316, "rewards/rejected": -6.71633243560791, "step": 1178 }, { "epoch": 0.25, "learning_rate": 1.5130252100840336e-05, "logits/chosen": -2.257823944091797, "logits/rejected": -1.7922625541687012, "logps/chosen": -331.9492492675781, "logps/rejected": -247.1479949951172, "loss": 0.3182, "rewards/accuracies": 0.875, "rewards/chosen": -2.9673147201538086, "rewards/margins": 2.276440143585205, "rewards/rejected": -5.2437543869018555, "step": 1179 }, { "epoch": 0.25, "learning_rate": 1.5126050420168068e-05, "logits/chosen": -2.345992088317871, "logits/rejected": -2.2462806701660156, "logps/chosen": -355.7221374511719, "logps/rejected": -426.6363525390625, "loss": 0.2716, "rewards/accuracies": 0.875, "rewards/chosen": -1.4929978847503662, "rewards/margins": 4.724306106567383, "rewards/rejected": -6.21730375289917, "step": 1180 }, { "epoch": 0.25, "learning_rate": 1.5121848739495799e-05, "logits/chosen": -2.040586233139038, "logits/rejected": -1.7297885417938232, "logps/chosen": -343.8881530761719, "logps/rejected": -352.7177734375, "loss": 0.8962, "rewards/accuracies": 0.75, "rewards/chosen": -2.852390766143799, "rewards/margins": 2.0606026649475098, "rewards/rejected": -4.912993431091309, "step": 1181 }, { "epoch": 0.25, "learning_rate": 1.511764705882353e-05, "logits/chosen": -2.219639778137207, "logits/rejected": -2.1185994148254395, "logps/chosen": -467.3554382324219, "logps/rejected": -422.8388671875, "loss": 0.4088, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7696847915649414, "rewards/margins": 3.0175669193267822, "rewards/rejected": -4.7872514724731445, "step": 1182 }, { "epoch": 0.25, "learning_rate": 1.511344537815126e-05, "logits/chosen": -2.1489109992980957, "logits/rejected": -1.979834794998169, "logps/chosen": -255.49951171875, "logps/rejected": -287.5498962402344, "loss": 0.5424, "rewards/accuracies": 0.875, "rewards/chosen": -2.4700961112976074, "rewards/margins": 4.546854019165039, "rewards/rejected": -7.016949653625488, "step": 1183 }, { "epoch": 0.25, "learning_rate": 1.5109243697478993e-05, "logits/chosen": -2.1440699100494385, "logits/rejected": -2.0058140754699707, "logps/chosen": -368.530517578125, "logps/rejected": -352.6629943847656, "loss": 0.0977, "rewards/accuracies": 1.0, "rewards/chosen": -0.6444863080978394, "rewards/margins": 4.887570858001709, "rewards/rejected": -5.532057285308838, "step": 1184 }, { "epoch": 0.25, "learning_rate": 1.5105042016806723e-05, "logits/chosen": -2.0883612632751465, "logits/rejected": -1.9995131492614746, "logps/chosen": -285.09320068359375, "logps/rejected": -347.45361328125, "loss": 0.1575, "rewards/accuracies": 0.9375, "rewards/chosen": -2.339712619781494, "rewards/margins": 3.4179694652557373, "rewards/rejected": -5.757681846618652, "step": 1185 }, { "epoch": 0.25, "learning_rate": 1.5100840336134455e-05, "logits/chosen": -2.131596088409424, "logits/rejected": -1.7838683128356934, "logps/chosen": -360.7657775878906, "logps/rejected": -258.069580078125, "loss": 0.1869, "rewards/accuracies": 0.875, "rewards/chosen": -1.5763764381408691, "rewards/margins": 4.613051891326904, "rewards/rejected": -6.189428329467773, "step": 1186 }, { "epoch": 0.25, "learning_rate": 1.5096638655462185e-05, "logits/chosen": -1.929497480392456, "logits/rejected": -1.8926093578338623, "logps/chosen": -227.36569213867188, "logps/rejected": -228.517333984375, "loss": 0.1753, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5404335260391235, "rewards/margins": 4.4586310386657715, "rewards/rejected": -5.9990644454956055, "step": 1187 }, { "epoch": 0.25, "learning_rate": 1.5092436974789917e-05, "logits/chosen": -2.088183879852295, "logits/rejected": -2.080101490020752, "logps/chosen": -269.8706359863281, "logps/rejected": -295.9175109863281, "loss": 0.3102, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8600308895111084, "rewards/margins": 3.108163833618164, "rewards/rejected": -4.968194961547852, "step": 1188 }, { "epoch": 0.25, "learning_rate": 1.5088235294117649e-05, "logits/chosen": -1.9852622747421265, "logits/rejected": -1.657360315322876, "logps/chosen": -348.3370361328125, "logps/rejected": -423.8879699707031, "loss": 0.5602, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3043925762176514, "rewards/margins": 3.0825114250183105, "rewards/rejected": -5.386903762817383, "step": 1189 }, { "epoch": 0.25, "learning_rate": 1.5084033613445379e-05, "logits/chosen": -1.8977195024490356, "logits/rejected": -1.7659292221069336, "logps/chosen": -325.5350036621094, "logps/rejected": -324.3315124511719, "loss": 0.1718, "rewards/accuracies": 0.9375, "rewards/chosen": -1.330365538597107, "rewards/margins": 5.726489543914795, "rewards/rejected": -7.056855201721191, "step": 1190 }, { "epoch": 0.25, "learning_rate": 1.5079831932773111e-05, "logits/chosen": -2.3464407920837402, "logits/rejected": -1.7052339315414429, "logps/chosen": -382.53253173828125, "logps/rejected": -333.1458740234375, "loss": 0.6533, "rewards/accuracies": 0.75, "rewards/chosen": -1.7083666324615479, "rewards/margins": 3.4127748012542725, "rewards/rejected": -5.12114143371582, "step": 1191 }, { "epoch": 0.25, "learning_rate": 1.5075630252100841e-05, "logits/chosen": -2.2305030822753906, "logits/rejected": -1.8566007614135742, "logps/chosen": -301.3981628417969, "logps/rejected": -268.1978759765625, "loss": 0.347, "rewards/accuracies": 0.8125, "rewards/chosen": -2.473177671432495, "rewards/margins": 2.4284377098083496, "rewards/rejected": -4.901615142822266, "step": 1192 }, { "epoch": 0.25, "learning_rate": 1.5071428571428573e-05, "logits/chosen": -2.0026190280914307, "logits/rejected": -2.090710163116455, "logps/chosen": -174.71018981933594, "logps/rejected": -192.7593994140625, "loss": 0.7049, "rewards/accuracies": 0.625, "rewards/chosen": -2.2492282390594482, "rewards/margins": 0.8885723948478699, "rewards/rejected": -3.137800693511963, "step": 1193 }, { "epoch": 0.25, "learning_rate": 1.5067226890756303e-05, "logits/chosen": -2.3190665245056152, "logits/rejected": -1.9814680814743042, "logps/chosen": -373.4332275390625, "logps/rejected": -345.6336975097656, "loss": 0.416, "rewards/accuracies": 0.8125, "rewards/chosen": -2.515828847885132, "rewards/margins": 2.645512819290161, "rewards/rejected": -5.161341667175293, "step": 1194 }, { "epoch": 0.25, "learning_rate": 1.5063025210084035e-05, "logits/chosen": -2.291168212890625, "logits/rejected": -1.6695159673690796, "logps/chosen": -280.7257385253906, "logps/rejected": -347.4483642578125, "loss": 0.8863, "rewards/accuracies": 0.875, "rewards/chosen": -2.0984272956848145, "rewards/margins": 2.378565788269043, "rewards/rejected": -4.476993083953857, "step": 1195 }, { "epoch": 0.25, "learning_rate": 1.5058823529411765e-05, "logits/chosen": -1.8760955333709717, "logits/rejected": -1.5016248226165771, "logps/chosen": -269.7454833984375, "logps/rejected": -264.7718811035156, "loss": 0.3244, "rewards/accuracies": 0.9375, "rewards/chosen": -2.398189067840576, "rewards/margins": 5.083847999572754, "rewards/rejected": -7.482037544250488, "step": 1196 }, { "epoch": 0.25, "learning_rate": 1.5054621848739497e-05, "logits/chosen": -2.094642400741577, "logits/rejected": -1.7723804712295532, "logps/chosen": -208.8339385986328, "logps/rejected": -184.4738006591797, "loss": 0.2928, "rewards/accuracies": 0.875, "rewards/chosen": -1.588585615158081, "rewards/margins": 3.3819456100463867, "rewards/rejected": -4.970530986785889, "step": 1197 }, { "epoch": 0.25, "learning_rate": 1.5050420168067228e-05, "logits/chosen": -2.2957119941711426, "logits/rejected": -1.9593957662582397, "logps/chosen": -418.91845703125, "logps/rejected": -350.30194091796875, "loss": 0.598, "rewards/accuracies": 0.75, "rewards/chosen": -1.5677578449249268, "rewards/margins": 2.54343843460083, "rewards/rejected": -4.111196517944336, "step": 1198 }, { "epoch": 0.25, "learning_rate": 1.504621848739496e-05, "logits/chosen": -2.02939772605896, "logits/rejected": -1.7546391487121582, "logps/chosen": -359.5293273925781, "logps/rejected": -315.7480773925781, "loss": 0.3058, "rewards/accuracies": 0.875, "rewards/chosen": -1.6924017667770386, "rewards/margins": 3.448317050933838, "rewards/rejected": -5.140718936920166, "step": 1199 }, { "epoch": 0.25, "learning_rate": 1.504201680672269e-05, "logits/chosen": -2.1023483276367188, "logits/rejected": -1.813053846359253, "logps/chosen": -424.4500427246094, "logps/rejected": -445.86981201171875, "loss": 0.0722, "rewards/accuracies": 1.0, "rewards/chosen": -1.4255785942077637, "rewards/margins": 4.9957685470581055, "rewards/rejected": -6.421347141265869, "step": 1200 }, { "epoch": 0.25, "learning_rate": 1.5037815126050422e-05, "logits/chosen": -2.163423538208008, "logits/rejected": -1.7977042198181152, "logps/chosen": -312.72906494140625, "logps/rejected": -290.58489990234375, "loss": 0.464, "rewards/accuracies": 0.8125, "rewards/chosen": -2.5628819465637207, "rewards/margins": 2.356330394744873, "rewards/rejected": -4.919212341308594, "step": 1201 }, { "epoch": 0.25, "learning_rate": 1.5033613445378152e-05, "logits/chosen": -2.3980746269226074, "logits/rejected": -1.9703707695007324, "logps/chosen": -354.43701171875, "logps/rejected": -327.32403564453125, "loss": 0.0771, "rewards/accuracies": 1.0, "rewards/chosen": -1.9741089344024658, "rewards/margins": 4.94292688369751, "rewards/rejected": -6.9170355796813965, "step": 1202 }, { "epoch": 0.25, "learning_rate": 1.5029411764705884e-05, "logits/chosen": -1.775939702987671, "logits/rejected": -1.851386547088623, "logps/chosen": -212.29611206054688, "logps/rejected": -278.2469482421875, "loss": 0.1344, "rewards/accuracies": 0.875, "rewards/chosen": -1.4006495475769043, "rewards/margins": 4.711906433105469, "rewards/rejected": -6.112556457519531, "step": 1203 }, { "epoch": 0.25, "learning_rate": 1.5025210084033614e-05, "logits/chosen": -1.8301061391830444, "logits/rejected": -1.931321382522583, "logps/chosen": -267.00152587890625, "logps/rejected": -255.85873413085938, "loss": 0.5117, "rewards/accuracies": 0.75, "rewards/chosen": -2.289505958557129, "rewards/margins": 3.6016359329223633, "rewards/rejected": -5.891141891479492, "step": 1204 }, { "epoch": 0.25, "learning_rate": 1.5021008403361346e-05, "logits/chosen": -2.188115119934082, "logits/rejected": -1.6208341121673584, "logps/chosen": -352.37017822265625, "logps/rejected": -311.46661376953125, "loss": 0.1562, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1205482482910156, "rewards/margins": 4.6318278312683105, "rewards/rejected": -6.752375602722168, "step": 1205 }, { "epoch": 0.25, "learning_rate": 1.5016806722689076e-05, "logits/chosen": -2.007305860519409, "logits/rejected": -1.346221923828125, "logps/chosen": -353.40533447265625, "logps/rejected": -273.8104553222656, "loss": 0.2806, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2381479740142822, "rewards/margins": 3.3132522106170654, "rewards/rejected": -6.551400184631348, "step": 1206 }, { "epoch": 0.25, "learning_rate": 1.5012605042016808e-05, "logits/chosen": -2.2197954654693604, "logits/rejected": -2.0422654151916504, "logps/chosen": -255.7327117919922, "logps/rejected": -208.35516357421875, "loss": 0.209, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1673836708068848, "rewards/margins": 3.4634766578674316, "rewards/rejected": -5.630859851837158, "step": 1207 }, { "epoch": 0.25, "learning_rate": 1.5008403361344538e-05, "logits/chosen": -2.260037899017334, "logits/rejected": -2.1306285858154297, "logps/chosen": -372.90069580078125, "logps/rejected": -350.7979736328125, "loss": 0.287, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2288615703582764, "rewards/margins": 3.9649460315704346, "rewards/rejected": -6.193807601928711, "step": 1208 }, { "epoch": 0.25, "learning_rate": 1.500420168067227e-05, "logits/chosen": -1.9778294563293457, "logits/rejected": -1.902571201324463, "logps/chosen": -332.653564453125, "logps/rejected": -352.37591552734375, "loss": 0.15, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7417478561401367, "rewards/margins": 4.269123077392578, "rewards/rejected": -7.010870933532715, "step": 1209 }, { "epoch": 0.25, "learning_rate": 1.5000000000000002e-05, "logits/chosen": -2.5007359981536865, "logits/rejected": -2.257749319076538, "logps/chosen": -425.8990478515625, "logps/rejected": -416.16937255859375, "loss": 0.1807, "rewards/accuracies": 0.875, "rewards/chosen": -2.001979351043701, "rewards/margins": 3.5866730213165283, "rewards/rejected": -5.588652610778809, "step": 1210 }, { "epoch": 0.25, "learning_rate": 1.4995798319327732e-05, "logits/chosen": -2.112091541290283, "logits/rejected": -1.5676326751708984, "logps/chosen": -362.8662414550781, "logps/rejected": -316.5755920410156, "loss": 0.2354, "rewards/accuracies": 0.875, "rewards/chosen": -2.3783485889434814, "rewards/margins": 5.855016708374023, "rewards/rejected": -8.233365058898926, "step": 1211 }, { "epoch": 0.25, "learning_rate": 1.4991596638655464e-05, "logits/chosen": -2.2172112464904785, "logits/rejected": -1.773216962814331, "logps/chosen": -347.5205078125, "logps/rejected": -280.25506591796875, "loss": 0.1626, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1004161834716797, "rewards/margins": 3.5782556533813477, "rewards/rejected": -6.678671836853027, "step": 1212 }, { "epoch": 0.25, "learning_rate": 1.4987394957983194e-05, "logits/chosen": -2.1802289485931396, "logits/rejected": -1.5975370407104492, "logps/chosen": -308.1313171386719, "logps/rejected": -263.97760009765625, "loss": 0.2375, "rewards/accuracies": 0.9375, "rewards/chosen": -2.720303535461426, "rewards/margins": 3.6396279335021973, "rewards/rejected": -6.359931945800781, "step": 1213 }, { "epoch": 0.25, "learning_rate": 1.4983193277310926e-05, "logits/chosen": -2.058438539505005, "logits/rejected": -1.876246690750122, "logps/chosen": -391.8819274902344, "logps/rejected": -530.6412963867188, "loss": 0.0438, "rewards/accuracies": 1.0, "rewards/chosen": -1.8544301986694336, "rewards/margins": 6.893628120422363, "rewards/rejected": -8.748058319091797, "step": 1214 }, { "epoch": 0.25, "learning_rate": 1.4978991596638657e-05, "logits/chosen": -2.1150925159454346, "logits/rejected": -2.0265910625457764, "logps/chosen": -368.80072021484375, "logps/rejected": -273.08599853515625, "loss": 0.136, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8796675205230713, "rewards/margins": 4.929985046386719, "rewards/rejected": -6.809652328491211, "step": 1215 }, { "epoch": 0.25, "learning_rate": 1.4974789915966388e-05, "logits/chosen": -2.1040945053100586, "logits/rejected": -1.812894582748413, "logps/chosen": -223.75489807128906, "logps/rejected": -294.23748779296875, "loss": 0.1245, "rewards/accuracies": 1.0, "rewards/chosen": -2.8705954551696777, "rewards/margins": 4.475533485412598, "rewards/rejected": -7.346128940582275, "step": 1216 }, { "epoch": 0.25, "learning_rate": 1.4970588235294119e-05, "logits/chosen": -2.1186511516571045, "logits/rejected": -2.0732200145721436, "logps/chosen": -196.33123779296875, "logps/rejected": -243.09954833984375, "loss": 0.3386, "rewards/accuracies": 0.9375, "rewards/chosen": -2.925447702407837, "rewards/margins": 3.0269150733947754, "rewards/rejected": -5.952362537384033, "step": 1217 }, { "epoch": 0.25, "learning_rate": 1.496638655462185e-05, "logits/chosen": -2.140348434448242, "logits/rejected": -2.054626941680908, "logps/chosen": -365.5283203125, "logps/rejected": -360.31658935546875, "loss": 0.2577, "rewards/accuracies": 0.9375, "rewards/chosen": -2.55711030960083, "rewards/margins": 7.321220397949219, "rewards/rejected": -9.87833023071289, "step": 1218 }, { "epoch": 0.26, "learning_rate": 1.496218487394958e-05, "logits/chosen": -1.9368691444396973, "logits/rejected": -2.0441527366638184, "logps/chosen": -268.2828369140625, "logps/rejected": -333.4814147949219, "loss": 0.1697, "rewards/accuracies": 0.875, "rewards/chosen": -1.8126085996627808, "rewards/margins": 5.2551655769348145, "rewards/rejected": -7.067774295806885, "step": 1219 }, { "epoch": 0.26, "learning_rate": 1.4957983193277313e-05, "logits/chosen": -1.5235059261322021, "logits/rejected": -1.9660358428955078, "logps/chosen": -340.4520263671875, "logps/rejected": -402.4558410644531, "loss": 0.1864, "rewards/accuracies": 0.875, "rewards/chosen": -2.3598809242248535, "rewards/margins": 4.044033050537109, "rewards/rejected": -6.403914451599121, "step": 1220 }, { "epoch": 0.26, "learning_rate": 1.4953781512605043e-05, "logits/chosen": -2.52192759513855, "logits/rejected": -2.013643503189087, "logps/chosen": -359.35919189453125, "logps/rejected": -305.2646789550781, "loss": 0.2536, "rewards/accuracies": 0.875, "rewards/chosen": -2.6135849952697754, "rewards/margins": 4.372774124145508, "rewards/rejected": -6.986359596252441, "step": 1221 }, { "epoch": 0.26, "learning_rate": 1.4949579831932775e-05, "logits/chosen": -2.1180217266082764, "logits/rejected": -2.2037787437438965, "logps/chosen": -379.3891296386719, "logps/rejected": -420.52386474609375, "loss": 0.3081, "rewards/accuracies": 0.8125, "rewards/chosen": -2.5315847396850586, "rewards/margins": 3.5132007598876953, "rewards/rejected": -6.044785499572754, "step": 1222 }, { "epoch": 0.26, "learning_rate": 1.4945378151260505e-05, "logits/chosen": -2.2050085067749023, "logits/rejected": -2.200244665145874, "logps/chosen": -291.6566467285156, "logps/rejected": -331.0263977050781, "loss": 0.9311, "rewards/accuracies": 0.625, "rewards/chosen": -2.7258543968200684, "rewards/margins": 2.866224765777588, "rewards/rejected": -5.592079162597656, "step": 1223 }, { "epoch": 0.26, "learning_rate": 1.4941176470588237e-05, "logits/chosen": -2.3285887241363525, "logits/rejected": -2.107588291168213, "logps/chosen": -400.912841796875, "logps/rejected": -377.38128662109375, "loss": 0.4701, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6222760677337646, "rewards/margins": 4.1373066902160645, "rewards/rejected": -6.75958251953125, "step": 1224 }, { "epoch": 0.26, "learning_rate": 1.4936974789915967e-05, "logits/chosen": -2.198303699493408, "logits/rejected": -1.980510950088501, "logps/chosen": -343.60888671875, "logps/rejected": -422.5761413574219, "loss": 0.4523, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5156986713409424, "rewards/margins": 5.014289379119873, "rewards/rejected": -7.529987812042236, "step": 1225 }, { "epoch": 0.26, "learning_rate": 1.49327731092437e-05, "logits/chosen": -2.3580985069274902, "logits/rejected": -2.328951835632324, "logps/chosen": -321.87957763671875, "logps/rejected": -307.9559326171875, "loss": 0.6262, "rewards/accuracies": 0.6875, "rewards/chosen": -3.0097098350524902, "rewards/margins": 2.486520290374756, "rewards/rejected": -5.496230125427246, "step": 1226 }, { "epoch": 0.26, "learning_rate": 1.492857142857143e-05, "logits/chosen": -2.226259469985962, "logits/rejected": -2.0274949073791504, "logps/chosen": -173.77804565429688, "logps/rejected": -253.33920288085938, "loss": 0.113, "rewards/accuracies": 1.0, "rewards/chosen": -2.7933404445648193, "rewards/margins": 5.47879695892334, "rewards/rejected": -8.272137641906738, "step": 1227 }, { "epoch": 0.26, "learning_rate": 1.4924369747899161e-05, "logits/chosen": -2.068190813064575, "logits/rejected": -1.8367664813995361, "logps/chosen": -436.0915222167969, "logps/rejected": -471.1249694824219, "loss": 0.2845, "rewards/accuracies": 0.875, "rewards/chosen": -2.7218289375305176, "rewards/margins": 3.41749906539917, "rewards/rejected": -6.139328479766846, "step": 1228 }, { "epoch": 0.26, "learning_rate": 1.4920168067226892e-05, "logits/chosen": -2.171875238418579, "logits/rejected": -2.072824716567993, "logps/chosen": -380.54364013671875, "logps/rejected": -369.61004638671875, "loss": 0.4066, "rewards/accuracies": 0.6875, "rewards/chosen": -3.0371713638305664, "rewards/margins": 2.700989246368408, "rewards/rejected": -5.738160610198975, "step": 1229 }, { "epoch": 0.26, "learning_rate": 1.4915966386554623e-05, "logits/chosen": -2.262847423553467, "logits/rejected": -2.2142810821533203, "logps/chosen": -306.16766357421875, "logps/rejected": -299.1580505371094, "loss": 0.4377, "rewards/accuracies": 0.9375, "rewards/chosen": -3.5485153198242188, "rewards/margins": 3.2537474632263184, "rewards/rejected": -6.802263259887695, "step": 1230 }, { "epoch": 0.26, "learning_rate": 1.4911764705882354e-05, "logits/chosen": -1.872605800628662, "logits/rejected": -1.5538092851638794, "logps/chosen": -310.0887451171875, "logps/rejected": -280.9057312011719, "loss": 0.2836, "rewards/accuracies": 0.9375, "rewards/chosen": -3.3451385498046875, "rewards/margins": 3.2125837802886963, "rewards/rejected": -6.557722091674805, "step": 1231 }, { "epoch": 0.26, "learning_rate": 1.4907563025210086e-05, "logits/chosen": -2.133260726928711, "logits/rejected": -2.011679172515869, "logps/chosen": -351.28204345703125, "logps/rejected": -568.9136352539062, "loss": 0.0993, "rewards/accuracies": 1.0, "rewards/chosen": -2.1848526000976562, "rewards/margins": 5.747891426086426, "rewards/rejected": -7.932744026184082, "step": 1232 }, { "epoch": 0.26, "learning_rate": 1.4903361344537817e-05, "logits/chosen": -2.139843463897705, "logits/rejected": -2.057265281677246, "logps/chosen": -316.66693115234375, "logps/rejected": -332.8795471191406, "loss": 0.3868, "rewards/accuracies": 0.8125, "rewards/chosen": -2.246325969696045, "rewards/margins": 4.390238285064697, "rewards/rejected": -6.636564254760742, "step": 1233 }, { "epoch": 0.26, "learning_rate": 1.4899159663865548e-05, "logits/chosen": -2.131394386291504, "logits/rejected": -2.0964486598968506, "logps/chosen": -300.88348388671875, "logps/rejected": -331.24310302734375, "loss": 0.3583, "rewards/accuracies": 0.875, "rewards/chosen": -3.0885329246520996, "rewards/margins": 3.7785253524780273, "rewards/rejected": -6.867058277130127, "step": 1234 }, { "epoch": 0.26, "learning_rate": 1.489495798319328e-05, "logits/chosen": -2.165121555328369, "logits/rejected": -2.1415867805480957, "logps/chosen": -395.57470703125, "logps/rejected": -383.6015625, "loss": 0.4236, "rewards/accuracies": 0.8125, "rewards/chosen": -3.561150074005127, "rewards/margins": 3.0468127727508545, "rewards/rejected": -6.607962608337402, "step": 1235 }, { "epoch": 0.26, "learning_rate": 1.489075630252101e-05, "logits/chosen": -2.044487476348877, "logits/rejected": -2.0329229831695557, "logps/chosen": -236.61151123046875, "logps/rejected": -259.8937683105469, "loss": 0.1177, "rewards/accuracies": 0.9375, "rewards/chosen": -2.607759714126587, "rewards/margins": 5.409193992614746, "rewards/rejected": -8.01695442199707, "step": 1236 }, { "epoch": 0.26, "learning_rate": 1.4886554621848742e-05, "logits/chosen": -2.4068093299865723, "logits/rejected": -2.0248050689697266, "logps/chosen": -343.762451171875, "logps/rejected": -284.1758117675781, "loss": 0.365, "rewards/accuracies": 0.75, "rewards/chosen": -3.357558250427246, "rewards/margins": 3.0251946449279785, "rewards/rejected": -6.382753372192383, "step": 1237 }, { "epoch": 0.26, "learning_rate": 1.4882352941176472e-05, "logits/chosen": -2.171760082244873, "logits/rejected": -1.9044723510742188, "logps/chosen": -325.18353271484375, "logps/rejected": -402.3902587890625, "loss": 0.3132, "rewards/accuracies": 0.875, "rewards/chosen": -3.185366630554199, "rewards/margins": 4.973031520843506, "rewards/rejected": -8.158397674560547, "step": 1238 }, { "epoch": 0.26, "learning_rate": 1.4878151260504204e-05, "logits/chosen": -2.098283529281616, "logits/rejected": -1.8983526229858398, "logps/chosen": -388.46197509765625, "logps/rejected": -354.659423828125, "loss": 0.2702, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1840741634368896, "rewards/margins": 4.17909574508667, "rewards/rejected": -6.363170146942139, "step": 1239 }, { "epoch": 0.26, "learning_rate": 1.4873949579831934e-05, "logits/chosen": -2.224400520324707, "logits/rejected": -2.192211627960205, "logps/chosen": -285.0545654296875, "logps/rejected": -337.7933654785156, "loss": 0.3727, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2407422065734863, "rewards/margins": 2.813994884490967, "rewards/rejected": -6.054736614227295, "step": 1240 }, { "epoch": 0.26, "learning_rate": 1.4869747899159666e-05, "logits/chosen": -2.2433032989501953, "logits/rejected": -1.9871091842651367, "logps/chosen": -332.70184326171875, "logps/rejected": -339.0936279296875, "loss": 0.7457, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3039932250976562, "rewards/margins": 3.8591604232788086, "rewards/rejected": -7.163153648376465, "step": 1241 }, { "epoch": 0.26, "learning_rate": 1.4865546218487396e-05, "logits/chosen": -2.037442684173584, "logits/rejected": -2.013826370239258, "logps/chosen": -386.5946350097656, "logps/rejected": -540.1484985351562, "loss": 1.0011, "rewards/accuracies": 0.6875, "rewards/chosen": -3.319838762283325, "rewards/margins": 3.8513245582580566, "rewards/rejected": -7.171163558959961, "step": 1242 }, { "epoch": 0.26, "learning_rate": 1.4861344537815128e-05, "logits/chosen": -2.255568027496338, "logits/rejected": -2.10587739944458, "logps/chosen": -363.89447021484375, "logps/rejected": -320.13983154296875, "loss": 0.1884, "rewards/accuracies": 0.875, "rewards/chosen": -1.78605055809021, "rewards/margins": 3.9073190689086914, "rewards/rejected": -5.693369388580322, "step": 1243 }, { "epoch": 0.26, "learning_rate": 1.4857142857142858e-05, "logits/chosen": -2.293961763381958, "logits/rejected": -1.8324017524719238, "logps/chosen": -414.66143798828125, "logps/rejected": -318.05426025390625, "loss": 0.5809, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3593478202819824, "rewards/margins": 3.067253589630127, "rewards/rejected": -5.426601409912109, "step": 1244 }, { "epoch": 0.26, "learning_rate": 1.485294117647059e-05, "logits/chosen": -2.19781494140625, "logits/rejected": -2.1973230838775635, "logps/chosen": -320.99908447265625, "logps/rejected": -331.1231689453125, "loss": 0.302, "rewards/accuracies": 0.875, "rewards/chosen": -3.4913461208343506, "rewards/margins": 3.0858981609344482, "rewards/rejected": -6.577244281768799, "step": 1245 }, { "epoch": 0.26, "learning_rate": 1.484873949579832e-05, "logits/chosen": -2.202479362487793, "logits/rejected": -1.8314063549041748, "logps/chosen": -324.1307678222656, "logps/rejected": -309.2308654785156, "loss": 0.372, "rewards/accuracies": 0.75, "rewards/chosen": -2.7506794929504395, "rewards/margins": 3.8087098598480225, "rewards/rejected": -6.559390068054199, "step": 1246 }, { "epoch": 0.26, "learning_rate": 1.4844537815126052e-05, "logits/chosen": -2.0784807205200195, "logits/rejected": -2.043990135192871, "logps/chosen": -391.52606201171875, "logps/rejected": -294.4495544433594, "loss": 0.4006, "rewards/accuracies": 0.875, "rewards/chosen": -2.3852477073669434, "rewards/margins": 4.829652786254883, "rewards/rejected": -7.214900970458984, "step": 1247 }, { "epoch": 0.26, "learning_rate": 1.4840336134453783e-05, "logits/chosen": -2.2601428031921387, "logits/rejected": -1.4105021953582764, "logps/chosen": -333.609375, "logps/rejected": -311.25885009765625, "loss": 0.1278, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8014662265777588, "rewards/margins": 4.8279595375061035, "rewards/rejected": -6.629426002502441, "step": 1248 }, { "epoch": 0.26, "learning_rate": 1.4836134453781515e-05, "logits/chosen": -2.1901583671569824, "logits/rejected": -1.9587454795837402, "logps/chosen": -262.41839599609375, "logps/rejected": -346.70880126953125, "loss": 0.4117, "rewards/accuracies": 0.875, "rewards/chosen": -2.328275680541992, "rewards/margins": 3.0793232917785645, "rewards/rejected": -5.407598972320557, "step": 1249 }, { "epoch": 0.26, "learning_rate": 1.4831932773109245e-05, "logits/chosen": -2.3683345317840576, "logits/rejected": -2.1028990745544434, "logps/chosen": -375.647705078125, "logps/rejected": -354.07373046875, "loss": 0.3542, "rewards/accuracies": 0.8125, "rewards/chosen": -1.460684061050415, "rewards/margins": 2.711263656616211, "rewards/rejected": -4.171947956085205, "step": 1250 }, { "epoch": 0.26, "learning_rate": 1.4827731092436977e-05, "logits/chosen": -2.0579822063446045, "logits/rejected": -2.1787192821502686, "logps/chosen": -340.0102844238281, "logps/rejected": -309.2815856933594, "loss": 0.513, "rewards/accuracies": 0.75, "rewards/chosen": -2.4277546405792236, "rewards/margins": 3.2885842323303223, "rewards/rejected": -5.716339111328125, "step": 1251 }, { "epoch": 0.26, "learning_rate": 1.4823529411764707e-05, "logits/chosen": -2.3111047744750977, "logits/rejected": -1.9673919677734375, "logps/chosen": -313.46514892578125, "logps/rejected": -382.71282958984375, "loss": 0.2811, "rewards/accuracies": 0.875, "rewards/chosen": -2.5062832832336426, "rewards/margins": 2.6285202503204346, "rewards/rejected": -5.134803771972656, "step": 1252 }, { "epoch": 0.26, "learning_rate": 1.4819327731092439e-05, "logits/chosen": -2.0615389347076416, "logits/rejected": -2.158968448638916, "logps/chosen": -430.1170654296875, "logps/rejected": -438.90911865234375, "loss": 0.2845, "rewards/accuracies": 0.9375, "rewards/chosen": -1.925978422164917, "rewards/margins": 3.712069272994995, "rewards/rejected": -5.638047695159912, "step": 1253 }, { "epoch": 0.26, "learning_rate": 1.4815126050420169e-05, "logits/chosen": -1.9804422855377197, "logits/rejected": -2.1082122325897217, "logps/chosen": -276.42486572265625, "logps/rejected": -311.48687744140625, "loss": 0.1469, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8941290378570557, "rewards/margins": 3.601116418838501, "rewards/rejected": -5.495245933532715, "step": 1254 }, { "epoch": 0.26, "learning_rate": 1.4810924369747901e-05, "logits/chosen": -2.087886333465576, "logits/rejected": -1.9520126581192017, "logps/chosen": -410.7392272949219, "logps/rejected": -337.5001525878906, "loss": 0.4123, "rewards/accuracies": 0.875, "rewards/chosen": -1.7130568027496338, "rewards/margins": 2.0264892578125, "rewards/rejected": -3.739546298980713, "step": 1255 }, { "epoch": 0.26, "learning_rate": 1.4806722689075633e-05, "logits/chosen": -2.0098466873168945, "logits/rejected": -1.8570048809051514, "logps/chosen": -233.21795654296875, "logps/rejected": -285.7274169921875, "loss": 0.563, "rewards/accuracies": 0.75, "rewards/chosen": -2.897686719894409, "rewards/margins": 1.9628833532333374, "rewards/rejected": -4.860569477081299, "step": 1256 }, { "epoch": 0.26, "learning_rate": 1.4802521008403363e-05, "logits/chosen": -2.0937600135803223, "logits/rejected": -2.1659560203552246, "logps/chosen": -307.9239501953125, "logps/rejected": -371.7438659667969, "loss": 0.2424, "rewards/accuracies": 0.9375, "rewards/chosen": -2.197117328643799, "rewards/margins": 2.7394893169403076, "rewards/rejected": -4.9366068840026855, "step": 1257 }, { "epoch": 0.26, "learning_rate": 1.4798319327731095e-05, "logits/chosen": -2.297569513320923, "logits/rejected": -1.78489089012146, "logps/chosen": -369.9482421875, "logps/rejected": -388.3670654296875, "loss": 0.1519, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9404587745666504, "rewards/margins": 4.160933017730713, "rewards/rejected": -6.101391792297363, "step": 1258 }, { "epoch": 0.26, "learning_rate": 1.4794117647058825e-05, "logits/chosen": -2.0682833194732666, "logits/rejected": -1.9511396884918213, "logps/chosen": -241.9205780029297, "logps/rejected": -362.4847412109375, "loss": 0.2041, "rewards/accuracies": 0.875, "rewards/chosen": -2.0030581951141357, "rewards/margins": 4.154212474822998, "rewards/rejected": -6.157270431518555, "step": 1259 }, { "epoch": 0.26, "learning_rate": 1.4789915966386557e-05, "logits/chosen": -2.019435405731201, "logits/rejected": -2.0918893814086914, "logps/chosen": -296.945068359375, "logps/rejected": -288.6117858886719, "loss": 0.4968, "rewards/accuracies": 0.875, "rewards/chosen": -2.8838021755218506, "rewards/margins": 2.7718918323516846, "rewards/rejected": -5.655694007873535, "step": 1260 }, { "epoch": 0.26, "learning_rate": 1.4785714285714287e-05, "logits/chosen": -2.21256160736084, "logits/rejected": -1.863285779953003, "logps/chosen": -422.43719482421875, "logps/rejected": -348.0411376953125, "loss": 0.4344, "rewards/accuracies": 0.875, "rewards/chosen": -2.2615697383880615, "rewards/margins": 2.5435805320739746, "rewards/rejected": -4.805150032043457, "step": 1261 }, { "epoch": 0.26, "learning_rate": 1.478151260504202e-05, "logits/chosen": -2.252715587615967, "logits/rejected": -2.1746504306793213, "logps/chosen": -312.0749206542969, "logps/rejected": -298.6753845214844, "loss": 0.5479, "rewards/accuracies": 0.75, "rewards/chosen": -2.2686710357666016, "rewards/margins": 3.1939423084259033, "rewards/rejected": -5.462613582611084, "step": 1262 }, { "epoch": 0.26, "learning_rate": 1.477731092436975e-05, "logits/chosen": -2.375532865524292, "logits/rejected": -1.6960687637329102, "logps/chosen": -349.6080017089844, "logps/rejected": -293.5823974609375, "loss": 0.2514, "rewards/accuracies": 0.875, "rewards/chosen": -1.8709702491760254, "rewards/margins": 3.1332614421844482, "rewards/rejected": -5.0042314529418945, "step": 1263 }, { "epoch": 0.26, "learning_rate": 1.4773109243697481e-05, "logits/chosen": -1.8829500675201416, "logits/rejected": -2.027601957321167, "logps/chosen": -220.30653381347656, "logps/rejected": -346.98382568359375, "loss": 0.4497, "rewards/accuracies": 0.75, "rewards/chosen": -2.29658842086792, "rewards/margins": 2.055175542831421, "rewards/rejected": -4.351763725280762, "step": 1264 }, { "epoch": 0.26, "learning_rate": 1.4768907563025212e-05, "logits/chosen": -2.33864688873291, "logits/rejected": -1.9569733142852783, "logps/chosen": -424.92462158203125, "logps/rejected": -360.5376892089844, "loss": 0.3562, "rewards/accuracies": 0.8125, "rewards/chosen": -1.835164189338684, "rewards/margins": 3.688948154449463, "rewards/rejected": -5.524112224578857, "step": 1265 }, { "epoch": 0.26, "learning_rate": 1.4764705882352944e-05, "logits/chosen": -2.4514994621276855, "logits/rejected": -2.103682041168213, "logps/chosen": -295.4800109863281, "logps/rejected": -256.933349609375, "loss": 0.2922, "rewards/accuracies": 0.8125, "rewards/chosen": -2.148221969604492, "rewards/margins": 3.3808376789093018, "rewards/rejected": -5.529059886932373, "step": 1266 }, { "epoch": 0.27, "learning_rate": 1.4760504201680674e-05, "logits/chosen": -2.0571556091308594, "logits/rejected": -1.548018217086792, "logps/chosen": -398.4248046875, "logps/rejected": -276.8052978515625, "loss": 0.4752, "rewards/accuracies": 0.75, "rewards/chosen": -2.9338788986206055, "rewards/margins": 2.7316012382507324, "rewards/rejected": -5.665480613708496, "step": 1267 }, { "epoch": 0.27, "learning_rate": 1.4756302521008406e-05, "logits/chosen": -2.417056083679199, "logits/rejected": -1.7876685857772827, "logps/chosen": -492.06158447265625, "logps/rejected": -354.4727783203125, "loss": 0.7385, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2213516235351562, "rewards/margins": 2.8694396018981934, "rewards/rejected": -5.09079122543335, "step": 1268 }, { "epoch": 0.27, "learning_rate": 1.4752100840336136e-05, "logits/chosen": -1.624762773513794, "logits/rejected": -1.976487398147583, "logps/chosen": -233.65724182128906, "logps/rejected": -442.7911682128906, "loss": 0.2496, "rewards/accuracies": 0.875, "rewards/chosen": -1.4648206233978271, "rewards/margins": 4.145246505737305, "rewards/rejected": -5.610067367553711, "step": 1269 }, { "epoch": 0.27, "learning_rate": 1.4747899159663868e-05, "logits/chosen": -2.186835289001465, "logits/rejected": -2.3797495365142822, "logps/chosen": -293.5005187988281, "logps/rejected": -321.178466796875, "loss": 0.1936, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6081080436706543, "rewards/margins": 3.19716739654541, "rewards/rejected": -4.8052754402160645, "step": 1270 }, { "epoch": 0.27, "learning_rate": 1.4743697478991598e-05, "logits/chosen": -2.3055331707000732, "logits/rejected": -2.0779318809509277, "logps/chosen": -233.13351440429688, "logps/rejected": -241.09288024902344, "loss": 0.3474, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4241771697998047, "rewards/margins": 3.886657476425171, "rewards/rejected": -6.310834884643555, "step": 1271 }, { "epoch": 0.27, "learning_rate": 1.473949579831933e-05, "logits/chosen": -2.095747470855713, "logits/rejected": -1.788177251815796, "logps/chosen": -359.4222106933594, "logps/rejected": -326.16900634765625, "loss": 0.1375, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9823145866394043, "rewards/margins": 5.11967658996582, "rewards/rejected": -7.101991653442383, "step": 1272 }, { "epoch": 0.27, "learning_rate": 1.473529411764706e-05, "logits/chosen": -2.210921287536621, "logits/rejected": -1.7555038928985596, "logps/chosen": -348.13568115234375, "logps/rejected": -311.5467529296875, "loss": 0.294, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7462973594665527, "rewards/margins": 3.652886390686035, "rewards/rejected": -5.399183750152588, "step": 1273 }, { "epoch": 0.27, "learning_rate": 1.4731092436974792e-05, "logits/chosen": -2.228372097015381, "logits/rejected": -1.7779014110565186, "logps/chosen": -242.40841674804688, "logps/rejected": -266.3146057128906, "loss": 0.4707, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0871236324310303, "rewards/margins": 1.6827473640441895, "rewards/rejected": -3.769871234893799, "step": 1274 }, { "epoch": 0.27, "learning_rate": 1.4726890756302522e-05, "logits/chosen": -2.227362632751465, "logits/rejected": -2.0143706798553467, "logps/chosen": -365.6995849609375, "logps/rejected": -424.6846923828125, "loss": 0.0639, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7715792655944824, "rewards/margins": 5.921769142150879, "rewards/rejected": -6.693347930908203, "step": 1275 }, { "epoch": 0.27, "learning_rate": 1.4722689075630254e-05, "logits/chosen": -2.287069797515869, "logits/rejected": -2.1423909664154053, "logps/chosen": -382.0042419433594, "logps/rejected": -445.89288330078125, "loss": 0.3104, "rewards/accuracies": 0.875, "rewards/chosen": -1.9299278259277344, "rewards/margins": 3.295947551727295, "rewards/rejected": -5.225875377655029, "step": 1276 }, { "epoch": 0.27, "learning_rate": 1.4718487394957986e-05, "logits/chosen": -2.1588196754455566, "logits/rejected": -2.1907949447631836, "logps/chosen": -332.0426025390625, "logps/rejected": -431.2523193359375, "loss": 0.2578, "rewards/accuracies": 0.8125, "rewards/chosen": -1.533712387084961, "rewards/margins": 3.361938714981079, "rewards/rejected": -4.895650863647461, "step": 1277 }, { "epoch": 0.27, "learning_rate": 1.4714285714285716e-05, "logits/chosen": -2.192356586456299, "logits/rejected": -2.303767204284668, "logps/chosen": -349.8454895019531, "logps/rejected": -368.3902893066406, "loss": 0.4539, "rewards/accuracies": 0.875, "rewards/chosen": -1.9580715894699097, "rewards/margins": 2.893841028213501, "rewards/rejected": -4.851912498474121, "step": 1278 }, { "epoch": 0.27, "learning_rate": 1.4710084033613448e-05, "logits/chosen": -2.2149999141693115, "logits/rejected": -1.8579695224761963, "logps/chosen": -246.74853515625, "logps/rejected": -243.54165649414062, "loss": 0.3374, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1781527996063232, "rewards/margins": 2.468636989593506, "rewards/rejected": -4.646790027618408, "step": 1279 }, { "epoch": 0.27, "learning_rate": 1.4705882352941179e-05, "logits/chosen": -2.1588666439056396, "logits/rejected": -1.8670859336853027, "logps/chosen": -348.0268859863281, "logps/rejected": -362.1605529785156, "loss": 0.1768, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1999893188476562, "rewards/margins": 3.294858455657959, "rewards/rejected": -5.494847774505615, "step": 1280 }, { "epoch": 0.27, "learning_rate": 1.470168067226891e-05, "logits/chosen": -2.2778701782226562, "logits/rejected": -1.9129645824432373, "logps/chosen": -324.49468994140625, "logps/rejected": -364.21978759765625, "loss": 0.2615, "rewards/accuracies": 0.875, "rewards/chosen": -2.301697015762329, "rewards/margins": 3.9454219341278076, "rewards/rejected": -6.2471184730529785, "step": 1281 }, { "epoch": 0.27, "learning_rate": 1.469747899159664e-05, "logits/chosen": -2.1504523754119873, "logits/rejected": -2.355855941772461, "logps/chosen": -310.143798828125, "logps/rejected": -343.95294189453125, "loss": 0.6833, "rewards/accuracies": 0.6875, "rewards/chosen": -2.759343147277832, "rewards/margins": 1.7168376445770264, "rewards/rejected": -4.4761810302734375, "step": 1282 }, { "epoch": 0.27, "learning_rate": 1.4693277310924373e-05, "logits/chosen": -2.0251173973083496, "logits/rejected": -1.9537947177886963, "logps/chosen": -400.30206298828125, "logps/rejected": -375.833984375, "loss": 0.3784, "rewards/accuracies": 0.75, "rewards/chosen": -1.8339409828186035, "rewards/margins": 2.0957093238830566, "rewards/rejected": -3.92965030670166, "step": 1283 }, { "epoch": 0.27, "learning_rate": 1.4689075630252103e-05, "logits/chosen": -2.236034870147705, "logits/rejected": -2.0384464263916016, "logps/chosen": -464.8644104003906, "logps/rejected": -341.2873229980469, "loss": 0.1873, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4539563655853271, "rewards/margins": 5.187774181365967, "rewards/rejected": -6.641730308532715, "step": 1284 }, { "epoch": 0.27, "learning_rate": 1.4684873949579831e-05, "logits/chosen": -2.086874485015869, "logits/rejected": -1.9425218105316162, "logps/chosen": -325.1651916503906, "logps/rejected": -437.2560729980469, "loss": 0.2233, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2628142833709717, "rewards/margins": 4.152487754821777, "rewards/rejected": -6.415302276611328, "step": 1285 }, { "epoch": 0.27, "learning_rate": 1.4680672268907563e-05, "logits/chosen": -2.2547316551208496, "logits/rejected": -1.5304312705993652, "logps/chosen": -377.79949951171875, "logps/rejected": -314.02484130859375, "loss": 0.174, "rewards/accuracies": 0.9375, "rewards/chosen": -1.972707748413086, "rewards/margins": 3.568516969680786, "rewards/rejected": -5.541224956512451, "step": 1286 }, { "epoch": 0.27, "learning_rate": 1.4676470588235294e-05, "logits/chosen": -1.993772029876709, "logits/rejected": -2.102482795715332, "logps/chosen": -369.169677734375, "logps/rejected": -431.16851806640625, "loss": 0.1246, "rewards/accuracies": 0.9375, "rewards/chosen": -1.942072868347168, "rewards/margins": 3.7872238159179688, "rewards/rejected": -5.7292962074279785, "step": 1287 }, { "epoch": 0.27, "learning_rate": 1.4672268907563025e-05, "logits/chosen": -2.1982686519622803, "logits/rejected": -1.89798104763031, "logps/chosen": -372.82952880859375, "logps/rejected": -294.7918701171875, "loss": 0.6966, "rewards/accuracies": 0.75, "rewards/chosen": -2.335519313812256, "rewards/margins": 2.2533819675445557, "rewards/rejected": -4.588900566101074, "step": 1288 }, { "epoch": 0.27, "learning_rate": 1.4668067226890756e-05, "logits/chosen": -2.4971814155578613, "logits/rejected": -2.1143112182617188, "logps/chosen": -419.9573669433594, "logps/rejected": -348.7078552246094, "loss": 0.2194, "rewards/accuracies": 0.875, "rewards/chosen": -2.1714119911193848, "rewards/margins": 3.3058905601501465, "rewards/rejected": -5.477302551269531, "step": 1289 }, { "epoch": 0.27, "learning_rate": 1.4663865546218488e-05, "logits/chosen": -2.149749279022217, "logits/rejected": -1.6361687183380127, "logps/chosen": -385.1405334472656, "logps/rejected": -299.35797119140625, "loss": 0.3123, "rewards/accuracies": 0.8125, "rewards/chosen": -2.231283187866211, "rewards/margins": 3.6453938484191895, "rewards/rejected": -5.8766770362854, "step": 1290 }, { "epoch": 0.27, "learning_rate": 1.465966386554622e-05, "logits/chosen": -2.481612205505371, "logits/rejected": -1.9378042221069336, "logps/chosen": -379.52447509765625, "logps/rejected": -298.8292541503906, "loss": 0.3319, "rewards/accuracies": 0.875, "rewards/chosen": -1.5607129335403442, "rewards/margins": 3.0255861282348633, "rewards/rejected": -4.586298942565918, "step": 1291 }, { "epoch": 0.27, "learning_rate": 1.465546218487395e-05, "logits/chosen": -2.3154311180114746, "logits/rejected": -2.3894214630126953, "logps/chosen": -388.3495178222656, "logps/rejected": -428.69390869140625, "loss": 0.5803, "rewards/accuracies": 0.6875, "rewards/chosen": -1.547250747680664, "rewards/margins": 2.6211979389190674, "rewards/rejected": -4.168448448181152, "step": 1292 }, { "epoch": 0.27, "learning_rate": 1.4651260504201682e-05, "logits/chosen": -2.0681214332580566, "logits/rejected": -2.1275618076324463, "logps/chosen": -288.99432373046875, "logps/rejected": -265.9241943359375, "loss": 1.1976, "rewards/accuracies": 0.5625, "rewards/chosen": -3.152647018432617, "rewards/margins": 0.7481802105903625, "rewards/rejected": -3.900827407836914, "step": 1293 }, { "epoch": 0.27, "learning_rate": 1.4647058823529412e-05, "logits/chosen": -2.1913821697235107, "logits/rejected": -2.0273146629333496, "logps/chosen": -326.0242919921875, "logps/rejected": -324.28851318359375, "loss": 0.2837, "rewards/accuracies": 0.875, "rewards/chosen": -2.2924721240997314, "rewards/margins": 3.1881661415100098, "rewards/rejected": -5.48063850402832, "step": 1294 }, { "epoch": 0.27, "learning_rate": 1.4642857142857144e-05, "logits/chosen": -2.4202380180358887, "logits/rejected": -1.8570458889007568, "logps/chosen": -377.66375732421875, "logps/rejected": -364.09552001953125, "loss": 0.367, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6750707626342773, "rewards/margins": 3.2902719974517822, "rewards/rejected": -5.9653425216674805, "step": 1295 }, { "epoch": 0.27, "learning_rate": 1.4638655462184874e-05, "logits/chosen": -2.4300355911254883, "logits/rejected": -2.0552690029144287, "logps/chosen": -327.7197570800781, "logps/rejected": -359.2673034667969, "loss": 0.2016, "rewards/accuracies": 0.9375, "rewards/chosen": -2.563568592071533, "rewards/margins": 3.2751879692077637, "rewards/rejected": -5.838756561279297, "step": 1296 }, { "epoch": 0.27, "learning_rate": 1.4634453781512606e-05, "logits/chosen": -1.87666916847229, "logits/rejected": -1.7819392681121826, "logps/chosen": -388.83917236328125, "logps/rejected": -364.4532470703125, "loss": 0.3303, "rewards/accuracies": 0.875, "rewards/chosen": -1.9207514524459839, "rewards/margins": 2.494077682495117, "rewards/rejected": -4.414829254150391, "step": 1297 }, { "epoch": 0.27, "learning_rate": 1.4630252100840336e-05, "logits/chosen": -2.4306719303131104, "logits/rejected": -1.7703864574432373, "logps/chosen": -455.485107421875, "logps/rejected": -354.526123046875, "loss": 0.9332, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7424826622009277, "rewards/margins": 2.436265707015991, "rewards/rejected": -5.17874813079834, "step": 1298 }, { "epoch": 0.27, "learning_rate": 1.4626050420168068e-05, "logits/chosen": -2.37957763671875, "logits/rejected": -1.4981766939163208, "logps/chosen": -322.4650573730469, "logps/rejected": -235.86851501464844, "loss": 0.5699, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1211328506469727, "rewards/margins": 3.071442127227783, "rewards/rejected": -6.192574977874756, "step": 1299 }, { "epoch": 0.27, "learning_rate": 1.4621848739495798e-05, "logits/chosen": -2.272067070007324, "logits/rejected": -2.377992630004883, "logps/chosen": -340.32928466796875, "logps/rejected": -375.86334228515625, "loss": 0.698, "rewards/accuracies": 0.625, "rewards/chosen": -1.5345244407653809, "rewards/margins": 1.9435549974441528, "rewards/rejected": -3.478079319000244, "step": 1300 }, { "epoch": 0.27, "learning_rate": 1.461764705882353e-05, "logits/chosen": -2.0112035274505615, "logits/rejected": -1.68162202835083, "logps/chosen": -326.16461181640625, "logps/rejected": -250.62173461914062, "loss": 0.2758, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8275446891784668, "rewards/margins": 3.0117392539978027, "rewards/rejected": -4.8392839431762695, "step": 1301 }, { "epoch": 0.27, "learning_rate": 1.461344537815126e-05, "logits/chosen": -2.1388676166534424, "logits/rejected": -1.4437810182571411, "logps/chosen": -277.62640380859375, "logps/rejected": -212.07789611816406, "loss": 0.4673, "rewards/accuracies": 0.6875, "rewards/chosen": -2.8518950939178467, "rewards/margins": 2.283548355102539, "rewards/rejected": -5.135443210601807, "step": 1302 }, { "epoch": 0.27, "learning_rate": 1.4609243697478992e-05, "logits/chosen": -2.077181577682495, "logits/rejected": -1.7752044200897217, "logps/chosen": -423.83868408203125, "logps/rejected": -338.0631408691406, "loss": 0.4177, "rewards/accuracies": 0.875, "rewards/chosen": -1.6175929307937622, "rewards/margins": 2.147789716720581, "rewards/rejected": -3.7653825283050537, "step": 1303 }, { "epoch": 0.27, "learning_rate": 1.4605042016806723e-05, "logits/chosen": -2.079444408416748, "logits/rejected": -1.9716310501098633, "logps/chosen": -331.0086364746094, "logps/rejected": -394.33447265625, "loss": 0.1341, "rewards/accuracies": 1.0, "rewards/chosen": -1.6591163873672485, "rewards/margins": 3.6345081329345703, "rewards/rejected": -5.2936248779296875, "step": 1304 }, { "epoch": 0.27, "learning_rate": 1.4600840336134454e-05, "logits/chosen": -2.2736055850982666, "logits/rejected": -1.9814271926879883, "logps/chosen": -369.89117431640625, "logps/rejected": -418.7023620605469, "loss": 0.2673, "rewards/accuracies": 0.875, "rewards/chosen": -2.251814603805542, "rewards/margins": 3.0796754360198975, "rewards/rejected": -5.3314900398254395, "step": 1305 }, { "epoch": 0.27, "learning_rate": 1.4596638655462185e-05, "logits/chosen": -2.195260763168335, "logits/rejected": -1.8201298713684082, "logps/chosen": -327.15350341796875, "logps/rejected": -280.43701171875, "loss": 0.2084, "rewards/accuracies": 0.875, "rewards/chosen": -1.5229108333587646, "rewards/margins": 3.891502857208252, "rewards/rejected": -5.414413928985596, "step": 1306 }, { "epoch": 0.27, "learning_rate": 1.4592436974789917e-05, "logits/chosen": -2.049452066421509, "logits/rejected": -1.7393059730529785, "logps/chosen": -297.6163635253906, "logps/rejected": -290.0152893066406, "loss": 0.0824, "rewards/accuracies": 1.0, "rewards/chosen": -2.4388139247894287, "rewards/margins": 3.801703929901123, "rewards/rejected": -6.240517616271973, "step": 1307 }, { "epoch": 0.27, "learning_rate": 1.4588235294117647e-05, "logits/chosen": -2.1753246784210205, "logits/rejected": -2.2490053176879883, "logps/chosen": -290.8746032714844, "logps/rejected": -377.81585693359375, "loss": 0.6163, "rewards/accuracies": 0.75, "rewards/chosen": -3.167635917663574, "rewards/margins": 2.7528676986694336, "rewards/rejected": -5.920504093170166, "step": 1308 }, { "epoch": 0.27, "learning_rate": 1.4584033613445379e-05, "logits/chosen": -2.2561471462249756, "logits/rejected": -1.9270517826080322, "logps/chosen": -389.7734375, "logps/rejected": -400.7041015625, "loss": 0.3211, "rewards/accuracies": 0.8125, "rewards/chosen": -1.382431983947754, "rewards/margins": 3.2687344551086426, "rewards/rejected": -4.6511664390563965, "step": 1309 }, { "epoch": 0.27, "learning_rate": 1.4579831932773109e-05, "logits/chosen": -2.1898486614227295, "logits/rejected": -1.756376028060913, "logps/chosen": -374.4280700683594, "logps/rejected": -323.3240051269531, "loss": 0.3636, "rewards/accuracies": 0.75, "rewards/chosen": -1.966078519821167, "rewards/margins": 2.720151424407959, "rewards/rejected": -4.686229705810547, "step": 1310 }, { "epoch": 0.27, "learning_rate": 1.4575630252100841e-05, "logits/chosen": -2.33266544342041, "logits/rejected": -1.8984947204589844, "logps/chosen": -522.5299072265625, "logps/rejected": -358.5198669433594, "loss": 0.4265, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7072179317474365, "rewards/margins": 3.603027820587158, "rewards/rejected": -6.310245990753174, "step": 1311 }, { "epoch": 0.27, "learning_rate": 1.4571428571428573e-05, "logits/chosen": -2.269780158996582, "logits/rejected": -1.7072854042053223, "logps/chosen": -409.7075500488281, "logps/rejected": -286.8658142089844, "loss": 0.2319, "rewards/accuracies": 0.875, "rewards/chosen": -1.6139099597930908, "rewards/margins": 4.00421667098999, "rewards/rejected": -5.61812686920166, "step": 1312 }, { "epoch": 0.27, "learning_rate": 1.4567226890756303e-05, "logits/chosen": -2.279707908630371, "logits/rejected": -1.958705186843872, "logps/chosen": -284.9219970703125, "logps/rejected": -266.9258117675781, "loss": 0.6472, "rewards/accuracies": 0.625, "rewards/chosen": -2.398683786392212, "rewards/margins": 1.7753677368164062, "rewards/rejected": -4.174051284790039, "step": 1313 }, { "epoch": 0.27, "learning_rate": 1.4563025210084035e-05, "logits/chosen": -2.028229236602783, "logits/rejected": -2.01723575592041, "logps/chosen": -225.2945556640625, "logps/rejected": -348.3422546386719, "loss": 0.5339, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0043818950653076, "rewards/margins": 2.426387310028076, "rewards/rejected": -4.430769443511963, "step": 1314 }, { "epoch": 0.28, "learning_rate": 1.4558823529411765e-05, "logits/chosen": -2.006513833999634, "logits/rejected": -1.862075924873352, "logps/chosen": -261.91943359375, "logps/rejected": -263.3758850097656, "loss": 0.4764, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2605090141296387, "rewards/margins": 3.3402509689331055, "rewards/rejected": -5.600759983062744, "step": 1315 }, { "epoch": 0.28, "learning_rate": 1.4554621848739497e-05, "logits/chosen": -2.073103189468384, "logits/rejected": -1.504417896270752, "logps/chosen": -366.36212158203125, "logps/rejected": -351.7047424316406, "loss": 0.29, "rewards/accuracies": 0.875, "rewards/chosen": -2.465597152709961, "rewards/margins": 3.9981558322906494, "rewards/rejected": -6.463752746582031, "step": 1316 }, { "epoch": 0.28, "learning_rate": 1.4550420168067227e-05, "logits/chosen": -1.9902853965759277, "logits/rejected": -1.7849637269973755, "logps/chosen": -382.65234375, "logps/rejected": -327.2167663574219, "loss": 0.2324, "rewards/accuracies": 0.9375, "rewards/chosen": -1.443115472793579, "rewards/margins": 3.523207426071167, "rewards/rejected": -4.966322898864746, "step": 1317 }, { "epoch": 0.28, "learning_rate": 1.454621848739496e-05, "logits/chosen": -2.1147031784057617, "logits/rejected": -1.4941411018371582, "logps/chosen": -310.183837890625, "logps/rejected": -373.72235107421875, "loss": 0.3102, "rewards/accuracies": 0.875, "rewards/chosen": -1.981034278869629, "rewards/margins": 3.8686423301696777, "rewards/rejected": -5.849676609039307, "step": 1318 }, { "epoch": 0.28, "learning_rate": 1.454201680672269e-05, "logits/chosen": -2.1353440284729004, "logits/rejected": -1.4690742492675781, "logps/chosen": -449.19293212890625, "logps/rejected": -315.3084716796875, "loss": 0.3037, "rewards/accuracies": 0.875, "rewards/chosen": -1.3374102115631104, "rewards/margins": 3.0057873725891113, "rewards/rejected": -4.343197822570801, "step": 1319 }, { "epoch": 0.28, "learning_rate": 1.4537815126050421e-05, "logits/chosen": -2.21055006980896, "logits/rejected": -2.2417702674865723, "logps/chosen": -322.8713684082031, "logps/rejected": -338.9656677246094, "loss": 0.2249, "rewards/accuracies": 0.875, "rewards/chosen": -1.5615465641021729, "rewards/margins": 3.093017578125, "rewards/rejected": -4.654563903808594, "step": 1320 }, { "epoch": 0.28, "learning_rate": 1.4533613445378152e-05, "logits/chosen": -2.0902719497680664, "logits/rejected": -1.757875919342041, "logps/chosen": -409.47332763671875, "logps/rejected": -330.07220458984375, "loss": 0.7013, "rewards/accuracies": 0.8125, "rewards/chosen": -2.09310245513916, "rewards/margins": 2.0264174938201904, "rewards/rejected": -4.11952018737793, "step": 1321 }, { "epoch": 0.28, "learning_rate": 1.4529411764705883e-05, "logits/chosen": -1.9497241973876953, "logits/rejected": -1.6293092966079712, "logps/chosen": -269.0858459472656, "logps/rejected": -393.73101806640625, "loss": 0.1908, "rewards/accuracies": 0.875, "rewards/chosen": -1.9988043308258057, "rewards/margins": 4.232430458068848, "rewards/rejected": -6.231234550476074, "step": 1322 }, { "epoch": 0.28, "learning_rate": 1.4525210084033614e-05, "logits/chosen": -1.8930530548095703, "logits/rejected": -1.6600115299224854, "logps/chosen": -306.607666015625, "logps/rejected": -291.586181640625, "loss": 0.1753, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6383129358291626, "rewards/margins": 4.642019271850586, "rewards/rejected": -6.280332088470459, "step": 1323 }, { "epoch": 0.28, "learning_rate": 1.4521008403361346e-05, "logits/chosen": -2.08837890625, "logits/rejected": -1.6526830196380615, "logps/chosen": -401.0961608886719, "logps/rejected": -346.9503173828125, "loss": 0.4379, "rewards/accuracies": 0.8125, "rewards/chosen": -2.025801181793213, "rewards/margins": 2.590909957885742, "rewards/rejected": -4.616711139678955, "step": 1324 }, { "epoch": 0.28, "learning_rate": 1.4516806722689076e-05, "logits/chosen": -2.074071168899536, "logits/rejected": -1.504553198814392, "logps/chosen": -343.7025146484375, "logps/rejected": -293.0475158691406, "loss": 0.3903, "rewards/accuracies": 0.75, "rewards/chosen": -2.9261579513549805, "rewards/margins": 2.914755344390869, "rewards/rejected": -5.840913772583008, "step": 1325 }, { "epoch": 0.28, "learning_rate": 1.4512605042016808e-05, "logits/chosen": -2.2069931030273438, "logits/rejected": -1.898561716079712, "logps/chosen": -250.13214111328125, "logps/rejected": -231.3361053466797, "loss": 0.1909, "rewards/accuracies": 0.9375, "rewards/chosen": -1.880393624305725, "rewards/margins": 4.386942386627197, "rewards/rejected": -6.267335891723633, "step": 1326 }, { "epoch": 0.28, "learning_rate": 1.4508403361344538e-05, "logits/chosen": -1.9026392698287964, "logits/rejected": -1.8256945610046387, "logps/chosen": -381.3522644042969, "logps/rejected": -377.548095703125, "loss": 0.3133, "rewards/accuracies": 0.875, "rewards/chosen": -2.120358943939209, "rewards/margins": 2.705876350402832, "rewards/rejected": -4.826234817504883, "step": 1327 }, { "epoch": 0.28, "learning_rate": 1.450420168067227e-05, "logits/chosen": -1.9654139280319214, "logits/rejected": -2.182123899459839, "logps/chosen": -359.7715759277344, "logps/rejected": -378.7041931152344, "loss": 0.7192, "rewards/accuracies": 0.625, "rewards/chosen": -1.767465353012085, "rewards/margins": 1.6621711254119873, "rewards/rejected": -3.4296364784240723, "step": 1328 }, { "epoch": 0.28, "learning_rate": 1.45e-05, "logits/chosen": -2.1207194328308105, "logits/rejected": -1.7894389629364014, "logps/chosen": -286.53564453125, "logps/rejected": -209.88235473632812, "loss": 0.1868, "rewards/accuracies": 0.875, "rewards/chosen": -2.2361412048339844, "rewards/margins": 3.677762985229492, "rewards/rejected": -5.913904190063477, "step": 1329 }, { "epoch": 0.28, "learning_rate": 1.4495798319327732e-05, "logits/chosen": -2.329207420349121, "logits/rejected": -1.724953055381775, "logps/chosen": -291.082275390625, "logps/rejected": -210.01910400390625, "loss": 0.502, "rewards/accuracies": 0.75, "rewards/chosen": -1.560848593711853, "rewards/margins": 1.8143994808197021, "rewards/rejected": -3.3752479553222656, "step": 1330 }, { "epoch": 0.28, "learning_rate": 1.4491596638655462e-05, "logits/chosen": -2.0099661350250244, "logits/rejected": -2.170433282852173, "logps/chosen": -372.9079284667969, "logps/rejected": -428.889404296875, "loss": 0.1353, "rewards/accuracies": 1.0, "rewards/chosen": -1.3571659326553345, "rewards/margins": 4.624889373779297, "rewards/rejected": -5.982055187225342, "step": 1331 }, { "epoch": 0.28, "learning_rate": 1.4487394957983194e-05, "logits/chosen": -1.8971928358078003, "logits/rejected": -1.4239734411239624, "logps/chosen": -375.3805847167969, "logps/rejected": -327.14764404296875, "loss": 0.2606, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8845016956329346, "rewards/margins": 3.621929883956909, "rewards/rejected": -5.506431579589844, "step": 1332 }, { "epoch": 0.28, "learning_rate": 1.4483193277310924e-05, "logits/chosen": -2.2307510375976562, "logits/rejected": -1.8042258024215698, "logps/chosen": -346.00054931640625, "logps/rejected": -318.7489013671875, "loss": 0.4428, "rewards/accuracies": 0.75, "rewards/chosen": -2.0084006786346436, "rewards/margins": 2.5001821517944336, "rewards/rejected": -4.508583068847656, "step": 1333 }, { "epoch": 0.28, "learning_rate": 1.4478991596638656e-05, "logits/chosen": -1.5064363479614258, "logits/rejected": -1.740829586982727, "logps/chosen": -323.7245788574219, "logps/rejected": -345.5672912597656, "loss": 0.2669, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2592781782150269, "rewards/margins": 3.6023240089416504, "rewards/rejected": -4.861601829528809, "step": 1334 }, { "epoch": 0.28, "learning_rate": 1.4474789915966388e-05, "logits/chosen": -2.2216014862060547, "logits/rejected": -2.0204238891601562, "logps/chosen": -311.18768310546875, "logps/rejected": -392.4696350097656, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -1.7154901027679443, "rewards/margins": 3.872194290161133, "rewards/rejected": -5.587684631347656, "step": 1335 }, { "epoch": 0.28, "learning_rate": 1.4470588235294118e-05, "logits/chosen": -1.9822871685028076, "logits/rejected": -1.7488360404968262, "logps/chosen": -280.7610168457031, "logps/rejected": -235.25970458984375, "loss": 0.321, "rewards/accuracies": 0.875, "rewards/chosen": -2.3817272186279297, "rewards/margins": 2.813412666320801, "rewards/rejected": -5.1951398849487305, "step": 1336 }, { "epoch": 0.28, "learning_rate": 1.446638655462185e-05, "logits/chosen": -2.03188419342041, "logits/rejected": -1.7353472709655762, "logps/chosen": -416.252685546875, "logps/rejected": -409.362060546875, "loss": 0.2245, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6621981859207153, "rewards/margins": 3.5806288719177246, "rewards/rejected": -4.24282693862915, "step": 1337 }, { "epoch": 0.28, "learning_rate": 1.446218487394958e-05, "logits/chosen": -2.3418874740600586, "logits/rejected": -2.1813251972198486, "logps/chosen": -511.1329650878906, "logps/rejected": -414.8935546875, "loss": 0.1129, "rewards/accuracies": 1.0, "rewards/chosen": -0.8967446088790894, "rewards/margins": 3.791290521621704, "rewards/rejected": -4.688035011291504, "step": 1338 }, { "epoch": 0.28, "learning_rate": 1.4457983193277312e-05, "logits/chosen": -2.1939597129821777, "logits/rejected": -2.1087088584899902, "logps/chosen": -262.8658752441406, "logps/rejected": -281.9622802734375, "loss": 0.5942, "rewards/accuracies": 0.75, "rewards/chosen": -1.960983157157898, "rewards/margins": 1.6198943853378296, "rewards/rejected": -3.5808775424957275, "step": 1339 }, { "epoch": 0.28, "learning_rate": 1.4453781512605043e-05, "logits/chosen": -1.946852207183838, "logits/rejected": -2.133152484893799, "logps/chosen": -316.7564697265625, "logps/rejected": -367.07684326171875, "loss": 0.5389, "rewards/accuracies": 0.75, "rewards/chosen": -2.003140926361084, "rewards/margins": 2.608355760574341, "rewards/rejected": -4.611496925354004, "step": 1340 }, { "epoch": 0.28, "learning_rate": 1.4449579831932775e-05, "logits/chosen": -2.0431888103485107, "logits/rejected": -2.161670684814453, "logps/chosen": -264.0682373046875, "logps/rejected": -371.82293701171875, "loss": 0.0378, "rewards/accuracies": 1.0, "rewards/chosen": -1.340522050857544, "rewards/margins": 4.733572483062744, "rewards/rejected": -6.074094772338867, "step": 1341 }, { "epoch": 0.28, "learning_rate": 1.4445378151260505e-05, "logits/chosen": -2.390133857727051, "logits/rejected": -2.0135855674743652, "logps/chosen": -314.830322265625, "logps/rejected": -286.2728271484375, "loss": 0.2072, "rewards/accuracies": 0.875, "rewards/chosen": -1.1324608325958252, "rewards/margins": 3.554328441619873, "rewards/rejected": -4.686789512634277, "step": 1342 }, { "epoch": 0.28, "learning_rate": 1.4441176470588237e-05, "logits/chosen": -2.2505812644958496, "logits/rejected": -1.9314416646957397, "logps/chosen": -270.4881591796875, "logps/rejected": -309.67431640625, "loss": 0.1867, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5261785984039307, "rewards/margins": 4.273470878601074, "rewards/rejected": -5.799649715423584, "step": 1343 }, { "epoch": 0.28, "learning_rate": 1.4436974789915967e-05, "logits/chosen": -2.0623788833618164, "logits/rejected": -2.1383819580078125, "logps/chosen": -438.3116455078125, "logps/rejected": -351.8424987792969, "loss": 0.6391, "rewards/accuracies": 0.6875, "rewards/chosen": -1.8496077060699463, "rewards/margins": 2.317702531814575, "rewards/rejected": -4.1673102378845215, "step": 1344 }, { "epoch": 0.28, "learning_rate": 1.4432773109243699e-05, "logits/chosen": -1.8724569082260132, "logits/rejected": -1.6876424551010132, "logps/chosen": -177.82357788085938, "logps/rejected": -213.06161499023438, "loss": 0.3606, "rewards/accuracies": 0.75, "rewards/chosen": -2.142467975616455, "rewards/margins": 2.969913959503174, "rewards/rejected": -5.112381935119629, "step": 1345 }, { "epoch": 0.28, "learning_rate": 1.4428571428571429e-05, "logits/chosen": -2.0339412689208984, "logits/rejected": -1.7853180170059204, "logps/chosen": -353.97137451171875, "logps/rejected": -302.7935791015625, "loss": 0.4354, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6244193315505981, "rewards/margins": 3.8671085834503174, "rewards/rejected": -5.491527557373047, "step": 1346 }, { "epoch": 0.28, "learning_rate": 1.4424369747899161e-05, "logits/chosen": -1.5565789937973022, "logits/rejected": -1.540840983390808, "logps/chosen": -265.2157287597656, "logps/rejected": -309.3747253417969, "loss": 0.0784, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1418890953063965, "rewards/margins": 5.682125091552734, "rewards/rejected": -7.824014663696289, "step": 1347 }, { "epoch": 0.28, "learning_rate": 1.4420168067226891e-05, "logits/chosen": -1.9758803844451904, "logits/rejected": -1.7769906520843506, "logps/chosen": -356.5367431640625, "logps/rejected": -361.5923767089844, "loss": 0.487, "rewards/accuracies": 0.75, "rewards/chosen": -2.301805257797241, "rewards/margins": 1.9082293510437012, "rewards/rejected": -4.210034370422363, "step": 1348 }, { "epoch": 0.28, "learning_rate": 1.4415966386554623e-05, "logits/chosen": -2.1623311042785645, "logits/rejected": -1.3819507360458374, "logps/chosen": -312.258056640625, "logps/rejected": -269.7995300292969, "loss": 0.2686, "rewards/accuracies": 0.875, "rewards/chosen": -2.9637415409088135, "rewards/margins": 3.338813066482544, "rewards/rejected": -6.302555084228516, "step": 1349 }, { "epoch": 0.28, "learning_rate": 1.4411764705882353e-05, "logits/chosen": -2.185626268386841, "logits/rejected": -1.970828890800476, "logps/chosen": -305.52099609375, "logps/rejected": -313.5020751953125, "loss": 0.2481, "rewards/accuracies": 0.875, "rewards/chosen": -1.7606041431427002, "rewards/margins": 3.6268043518066406, "rewards/rejected": -5.387408256530762, "step": 1350 }, { "epoch": 0.28, "learning_rate": 1.4407563025210085e-05, "logits/chosen": -2.145057201385498, "logits/rejected": -1.7470591068267822, "logps/chosen": -342.51934814453125, "logps/rejected": -304.72381591796875, "loss": 0.5061, "rewards/accuracies": 0.625, "rewards/chosen": -2.2816319465637207, "rewards/margins": 2.417202949523926, "rewards/rejected": -4.698835372924805, "step": 1351 }, { "epoch": 0.28, "learning_rate": 1.4403361344537816e-05, "logits/chosen": -2.219416379928589, "logits/rejected": -2.1211817264556885, "logps/chosen": -291.16815185546875, "logps/rejected": -315.04644775390625, "loss": 0.4846, "rewards/accuracies": 0.75, "rewards/chosen": -2.0490496158599854, "rewards/margins": 2.4418845176696777, "rewards/rejected": -4.490933895111084, "step": 1352 }, { "epoch": 0.28, "learning_rate": 1.4399159663865547e-05, "logits/chosen": -2.3597846031188965, "logits/rejected": -2.012449264526367, "logps/chosen": -328.9736022949219, "logps/rejected": -285.1427917480469, "loss": 0.6528, "rewards/accuracies": 0.5, "rewards/chosen": -2.1324307918548584, "rewards/margins": 2.0794105529785156, "rewards/rejected": -4.211841106414795, "step": 1353 }, { "epoch": 0.28, "learning_rate": 1.4394957983193278e-05, "logits/chosen": -2.3281662464141846, "logits/rejected": -1.9261395931243896, "logps/chosen": -277.23504638671875, "logps/rejected": -291.3643798828125, "loss": 0.1952, "rewards/accuracies": 0.875, "rewards/chosen": -1.5924992561340332, "rewards/margins": 3.4374356269836426, "rewards/rejected": -5.029934883117676, "step": 1354 }, { "epoch": 0.28, "learning_rate": 1.439075630252101e-05, "logits/chosen": -2.215686559677124, "logits/rejected": -1.8025391101837158, "logps/chosen": -368.61248779296875, "logps/rejected": -334.749267578125, "loss": 0.3733, "rewards/accuracies": 0.8125, "rewards/chosen": -2.251800298690796, "rewards/margins": 3.607045888900757, "rewards/rejected": -5.858846187591553, "step": 1355 }, { "epoch": 0.28, "learning_rate": 1.4386554621848741e-05, "logits/chosen": -2.04526948928833, "logits/rejected": -1.8921620845794678, "logps/chosen": -338.46295166015625, "logps/rejected": -273.7668762207031, "loss": 0.3371, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9005348682403564, "rewards/margins": 4.121726036071777, "rewards/rejected": -6.022260665893555, "step": 1356 }, { "epoch": 0.28, "learning_rate": 1.4382352941176472e-05, "logits/chosen": -2.20761775970459, "logits/rejected": -2.3307690620422363, "logps/chosen": -305.7279052734375, "logps/rejected": -340.6954040527344, "loss": 0.4888, "rewards/accuracies": 0.6875, "rewards/chosen": -2.021620035171509, "rewards/margins": 4.108015537261963, "rewards/rejected": -6.129635810852051, "step": 1357 }, { "epoch": 0.28, "learning_rate": 1.4378151260504204e-05, "logits/chosen": -2.196288585662842, "logits/rejected": -2.134573459625244, "logps/chosen": -357.29656982421875, "logps/rejected": -398.37384033203125, "loss": 0.2675, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2381525039672852, "rewards/margins": 5.813896179199219, "rewards/rejected": -7.052048683166504, "step": 1358 }, { "epoch": 0.28, "learning_rate": 1.4373949579831934e-05, "logits/chosen": -1.9696464538574219, "logits/rejected": -1.7178163528442383, "logps/chosen": -244.8395538330078, "logps/rejected": -249.77944946289062, "loss": 0.3165, "rewards/accuracies": 0.75, "rewards/chosen": -2.1890835762023926, "rewards/margins": 4.14453125, "rewards/rejected": -6.333614826202393, "step": 1359 }, { "epoch": 0.28, "learning_rate": 1.4369747899159666e-05, "logits/chosen": -2.192800998687744, "logits/rejected": -2.068331003189087, "logps/chosen": -403.55072021484375, "logps/rejected": -467.9599609375, "loss": 0.5077, "rewards/accuracies": 0.875, "rewards/chosen": -1.5516631603240967, "rewards/margins": 4.056065559387207, "rewards/rejected": -5.607728958129883, "step": 1360 }, { "epoch": 0.28, "learning_rate": 1.4365546218487396e-05, "logits/chosen": -1.9563089609146118, "logits/rejected": -1.8423224687576294, "logps/chosen": -376.00592041015625, "logps/rejected": -324.96246337890625, "loss": 0.2359, "rewards/accuracies": 0.9375, "rewards/chosen": -1.748529314994812, "rewards/margins": 2.0949015617370605, "rewards/rejected": -3.843430995941162, "step": 1361 }, { "epoch": 0.28, "learning_rate": 1.4361344537815128e-05, "logits/chosen": -2.38944149017334, "logits/rejected": -1.783832311630249, "logps/chosen": -526.784423828125, "logps/rejected": -452.2239074707031, "loss": 0.1184, "rewards/accuracies": 1.0, "rewards/chosen": -1.2716701030731201, "rewards/margins": 4.190761566162109, "rewards/rejected": -5.462431907653809, "step": 1362 }, { "epoch": 0.29, "learning_rate": 1.4357142857142858e-05, "logits/chosen": -2.2259650230407715, "logits/rejected": -1.9749398231506348, "logps/chosen": -462.24713134765625, "logps/rejected": -349.829833984375, "loss": 0.1995, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6469653844833374, "rewards/margins": 4.059388160705566, "rewards/rejected": -5.706353187561035, "step": 1363 }, { "epoch": 0.29, "learning_rate": 1.435294117647059e-05, "logits/chosen": -1.866605281829834, "logits/rejected": -1.9206607341766357, "logps/chosen": -307.0938720703125, "logps/rejected": -346.87939453125, "loss": 0.6358, "rewards/accuracies": 0.75, "rewards/chosen": -1.9438176155090332, "rewards/margins": 2.2784810066223145, "rewards/rejected": -4.222298622131348, "step": 1364 }, { "epoch": 0.29, "learning_rate": 1.434873949579832e-05, "logits/chosen": -2.322901487350464, "logits/rejected": -2.1697988510131836, "logps/chosen": -293.63720703125, "logps/rejected": -384.8137512207031, "loss": 0.0871, "rewards/accuracies": 1.0, "rewards/chosen": -1.6706867218017578, "rewards/margins": 4.994970321655273, "rewards/rejected": -6.665657043457031, "step": 1365 }, { "epoch": 0.29, "learning_rate": 1.4344537815126052e-05, "logits/chosen": -2.1275501251220703, "logits/rejected": -1.7042955160140991, "logps/chosen": -274.61004638671875, "logps/rejected": -272.31329345703125, "loss": 0.2297, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5854241847991943, "rewards/margins": 3.9313039779663086, "rewards/rejected": -5.516728401184082, "step": 1366 }, { "epoch": 0.29, "learning_rate": 1.4340336134453782e-05, "logits/chosen": -2.3549113273620605, "logits/rejected": -1.7642543315887451, "logps/chosen": -307.1114807128906, "logps/rejected": -257.6966247558594, "loss": 0.2112, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9655675888061523, "rewards/margins": 4.53652286529541, "rewards/rejected": -6.502089977264404, "step": 1367 }, { "epoch": 0.29, "learning_rate": 1.4336134453781514e-05, "logits/chosen": -1.9896125793457031, "logits/rejected": -1.9963213205337524, "logps/chosen": -331.064208984375, "logps/rejected": -333.5207214355469, "loss": 0.3627, "rewards/accuracies": 0.75, "rewards/chosen": -1.6463813781738281, "rewards/margins": 3.4043655395507812, "rewards/rejected": -5.050746917724609, "step": 1368 }, { "epoch": 0.29, "learning_rate": 1.4331932773109245e-05, "logits/chosen": -1.8610866069793701, "logits/rejected": -1.3744876384735107, "logps/chosen": -483.6385803222656, "logps/rejected": -394.76666259765625, "loss": 0.2273, "rewards/accuracies": 1.0, "rewards/chosen": -1.8994272947311401, "rewards/margins": 2.30643630027771, "rewards/rejected": -4.205862998962402, "step": 1369 }, { "epoch": 0.29, "learning_rate": 1.4327731092436976e-05, "logits/chosen": -2.0155768394470215, "logits/rejected": -1.7789020538330078, "logps/chosen": -359.98345947265625, "logps/rejected": -367.2741394042969, "loss": 0.2914, "rewards/accuracies": 0.875, "rewards/chosen": -2.6618175506591797, "rewards/margins": 5.603056907653809, "rewards/rejected": -8.264874458312988, "step": 1370 }, { "epoch": 0.29, "learning_rate": 1.4323529411764707e-05, "logits/chosen": -2.188124656677246, "logits/rejected": -2.212658405303955, "logps/chosen": -259.1091003417969, "logps/rejected": -336.0252990722656, "loss": 0.3649, "rewards/accuracies": 0.8125, "rewards/chosen": -2.294404983520508, "rewards/margins": 2.6968204975128174, "rewards/rejected": -4.991225719451904, "step": 1371 }, { "epoch": 0.29, "learning_rate": 1.4319327731092439e-05, "logits/chosen": -1.9551945924758911, "logits/rejected": -2.2238640785217285, "logps/chosen": -244.2034454345703, "logps/rejected": -278.9943542480469, "loss": 0.6181, "rewards/accuracies": 0.75, "rewards/chosen": -2.210861921310425, "rewards/margins": 2.8090715408325195, "rewards/rejected": -5.019933223724365, "step": 1372 }, { "epoch": 0.29, "learning_rate": 1.4315126050420169e-05, "logits/chosen": -2.5590403079986572, "logits/rejected": -1.902327537536621, "logps/chosen": -402.0172424316406, "logps/rejected": -352.5087890625, "loss": 0.2206, "rewards/accuracies": 0.875, "rewards/chosen": -1.3248834609985352, "rewards/margins": 3.991797685623169, "rewards/rejected": -5.316681385040283, "step": 1373 }, { "epoch": 0.29, "learning_rate": 1.43109243697479e-05, "logits/chosen": -2.2591326236724854, "logits/rejected": -2.1572072505950928, "logps/chosen": -363.91986083984375, "logps/rejected": -314.7950134277344, "loss": 0.3981, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9238401651382446, "rewards/margins": 2.5552515983581543, "rewards/rejected": -4.479092121124268, "step": 1374 }, { "epoch": 0.29, "learning_rate": 1.4306722689075631e-05, "logits/chosen": -2.0042858123779297, "logits/rejected": -1.991528034210205, "logps/chosen": -216.24986267089844, "logps/rejected": -282.8819885253906, "loss": 0.3352, "rewards/accuracies": 0.9375, "rewards/chosen": -1.637871503829956, "rewards/margins": 4.296302318572998, "rewards/rejected": -5.934174537658691, "step": 1375 }, { "epoch": 0.29, "learning_rate": 1.4302521008403363e-05, "logits/chosen": -1.7971625328063965, "logits/rejected": -1.8850367069244385, "logps/chosen": -295.9991149902344, "logps/rejected": -342.00982666015625, "loss": 0.2578, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4014968872070312, "rewards/margins": 3.755101442337036, "rewards/rejected": -6.1565985679626465, "step": 1376 }, { "epoch": 0.29, "learning_rate": 1.4298319327731093e-05, "logits/chosen": -1.9173038005828857, "logits/rejected": -2.0198090076446533, "logps/chosen": -236.44973754882812, "logps/rejected": -332.3042297363281, "loss": 0.7922, "rewards/accuracies": 0.6875, "rewards/chosen": -2.514732837677002, "rewards/margins": 3.3359315395355225, "rewards/rejected": -5.850664138793945, "step": 1377 }, { "epoch": 0.29, "learning_rate": 1.4294117647058825e-05, "logits/chosen": -1.889622688293457, "logits/rejected": -1.6587648391723633, "logps/chosen": -322.12762451171875, "logps/rejected": -361.54913330078125, "loss": 0.4025, "rewards/accuracies": 0.75, "rewards/chosen": -2.3205466270446777, "rewards/margins": 3.280498504638672, "rewards/rejected": -5.60104513168335, "step": 1378 }, { "epoch": 0.29, "learning_rate": 1.4289915966386557e-05, "logits/chosen": -1.9957934617996216, "logits/rejected": -1.8034297227859497, "logps/chosen": -385.8522033691406, "logps/rejected": -391.60791015625, "loss": 0.2685, "rewards/accuracies": 0.875, "rewards/chosen": -1.900431513786316, "rewards/margins": 4.756706237792969, "rewards/rejected": -6.657137870788574, "step": 1379 }, { "epoch": 0.29, "learning_rate": 1.4285714285714287e-05, "logits/chosen": -2.246464252471924, "logits/rejected": -2.290904998779297, "logps/chosen": -484.63482666015625, "logps/rejected": -521.557861328125, "loss": 0.2752, "rewards/accuracies": 0.875, "rewards/chosen": -1.4362725019454956, "rewards/margins": 4.5701680183410645, "rewards/rejected": -6.006440162658691, "step": 1380 }, { "epoch": 0.29, "learning_rate": 1.4281512605042019e-05, "logits/chosen": -2.1868128776550293, "logits/rejected": -2.008037567138672, "logps/chosen": -291.0711975097656, "logps/rejected": -363.279541015625, "loss": 0.7281, "rewards/accuracies": 0.625, "rewards/chosen": -2.4228601455688477, "rewards/margins": 1.9803717136383057, "rewards/rejected": -4.403232097625732, "step": 1381 }, { "epoch": 0.29, "learning_rate": 1.427731092436975e-05, "logits/chosen": -2.1113853454589844, "logits/rejected": -1.9716029167175293, "logps/chosen": -206.6416015625, "logps/rejected": -233.48175048828125, "loss": 0.2923, "rewards/accuracies": 0.8125, "rewards/chosen": -1.611916422843933, "rewards/margins": 2.6725914478302, "rewards/rejected": -4.284507751464844, "step": 1382 }, { "epoch": 0.29, "learning_rate": 1.4273109243697481e-05, "logits/chosen": -2.266472339630127, "logits/rejected": -1.6123251914978027, "logps/chosen": -259.5782165527344, "logps/rejected": -239.5970458984375, "loss": 0.1817, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0504469871520996, "rewards/margins": 3.492048740386963, "rewards/rejected": -5.5424957275390625, "step": 1383 }, { "epoch": 0.29, "learning_rate": 1.4268907563025211e-05, "logits/chosen": -2.2564308643341064, "logits/rejected": -2.074984550476074, "logps/chosen": -246.646240234375, "logps/rejected": -261.6790771484375, "loss": 0.5386, "rewards/accuracies": 0.625, "rewards/chosen": -1.7185564041137695, "rewards/margins": 2.2213246822357178, "rewards/rejected": -3.939880847930908, "step": 1384 }, { "epoch": 0.29, "learning_rate": 1.4264705882352943e-05, "logits/chosen": -2.042741060256958, "logits/rejected": -2.086050510406494, "logps/chosen": -249.09671020507812, "logps/rejected": -305.54327392578125, "loss": 0.3511, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8466858863830566, "rewards/margins": 2.257530689239502, "rewards/rejected": -5.104217052459717, "step": 1385 }, { "epoch": 0.29, "learning_rate": 1.4260504201680674e-05, "logits/chosen": -1.9240500926971436, "logits/rejected": -1.71843683719635, "logps/chosen": -378.9152526855469, "logps/rejected": -288.91363525390625, "loss": 0.3061, "rewards/accuracies": 0.875, "rewards/chosen": -1.9485299587249756, "rewards/margins": 2.8650171756744385, "rewards/rejected": -4.813547134399414, "step": 1386 }, { "epoch": 0.29, "learning_rate": 1.4256302521008405e-05, "logits/chosen": -2.0926356315612793, "logits/rejected": -1.8393793106079102, "logps/chosen": -310.0332336425781, "logps/rejected": -357.0968017578125, "loss": 0.1234, "rewards/accuracies": 1.0, "rewards/chosen": -1.5772117376327515, "rewards/margins": 4.06320333480835, "rewards/rejected": -5.640415191650391, "step": 1387 }, { "epoch": 0.29, "learning_rate": 1.4252100840336136e-05, "logits/chosen": -2.328073263168335, "logits/rejected": -1.8795721530914307, "logps/chosen": -310.5036315917969, "logps/rejected": -258.7889404296875, "loss": 0.4366, "rewards/accuracies": 0.8125, "rewards/chosen": -2.696972608566284, "rewards/margins": 2.1630706787109375, "rewards/rejected": -4.860043525695801, "step": 1388 }, { "epoch": 0.29, "learning_rate": 1.4247899159663868e-05, "logits/chosen": -2.2217347621917725, "logits/rejected": -1.9185413122177124, "logps/chosen": -410.98291015625, "logps/rejected": -374.36431884765625, "loss": 0.1633, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1255168914794922, "rewards/margins": 4.061927795410156, "rewards/rejected": -5.187444686889648, "step": 1389 }, { "epoch": 0.29, "learning_rate": 1.4243697478991598e-05, "logits/chosen": -2.434920310974121, "logits/rejected": -1.9310954809188843, "logps/chosen": -352.0817565917969, "logps/rejected": -306.16217041015625, "loss": 0.402, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3086655139923096, "rewards/margins": 2.7785186767578125, "rewards/rejected": -5.087183952331543, "step": 1390 }, { "epoch": 0.29, "learning_rate": 1.423949579831933e-05, "logits/chosen": -2.722306489944458, "logits/rejected": -1.9645229578018188, "logps/chosen": -463.1763000488281, "logps/rejected": -381.077880859375, "loss": 0.1992, "rewards/accuracies": 0.875, "rewards/chosen": -1.8835253715515137, "rewards/margins": 5.22479248046875, "rewards/rejected": -7.108318328857422, "step": 1391 }, { "epoch": 0.29, "learning_rate": 1.423529411764706e-05, "logits/chosen": -1.903465747833252, "logits/rejected": -1.8752306699752808, "logps/chosen": -294.4646911621094, "logps/rejected": -315.959716796875, "loss": 0.255, "rewards/accuracies": 0.875, "rewards/chosen": -2.365931510925293, "rewards/margins": 3.459804058074951, "rewards/rejected": -5.825736045837402, "step": 1392 }, { "epoch": 0.29, "learning_rate": 1.4231092436974792e-05, "logits/chosen": -1.9860609769821167, "logits/rejected": -1.8734023571014404, "logps/chosen": -263.76318359375, "logps/rejected": -421.1259765625, "loss": 0.584, "rewards/accuracies": 0.75, "rewards/chosen": -1.8977365493774414, "rewards/margins": 2.9429378509521484, "rewards/rejected": -4.84067440032959, "step": 1393 }, { "epoch": 0.29, "learning_rate": 1.4226890756302522e-05, "logits/chosen": -2.0194225311279297, "logits/rejected": -2.149589776992798, "logps/chosen": -299.4510498046875, "logps/rejected": -353.85516357421875, "loss": 0.116, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5131078958511353, "rewards/margins": 4.944719314575195, "rewards/rejected": -6.457827091217041, "step": 1394 }, { "epoch": 0.29, "learning_rate": 1.4222689075630254e-05, "logits/chosen": -2.270944356918335, "logits/rejected": -2.208620309829712, "logps/chosen": -356.35430908203125, "logps/rejected": -359.2975769042969, "loss": 0.4048, "rewards/accuracies": 0.75, "rewards/chosen": -1.4218827486038208, "rewards/margins": 2.709190607070923, "rewards/rejected": -4.131073474884033, "step": 1395 }, { "epoch": 0.29, "learning_rate": 1.4218487394957984e-05, "logits/chosen": -2.1151790618896484, "logits/rejected": -1.9973409175872803, "logps/chosen": -378.75018310546875, "logps/rejected": -381.0204162597656, "loss": 0.511, "rewards/accuracies": 0.75, "rewards/chosen": -1.8107185363769531, "rewards/margins": 2.4462521076202393, "rewards/rejected": -4.256970405578613, "step": 1396 }, { "epoch": 0.29, "learning_rate": 1.4214285714285716e-05, "logits/chosen": -2.3903427124023438, "logits/rejected": -1.8573503494262695, "logps/chosen": -474.4723205566406, "logps/rejected": -374.26409912109375, "loss": 0.264, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2410386800765991, "rewards/margins": 3.4629642963409424, "rewards/rejected": -4.704002857208252, "step": 1397 }, { "epoch": 0.29, "learning_rate": 1.4210084033613446e-05, "logits/chosen": -1.777158260345459, "logits/rejected": -1.8338042497634888, "logps/chosen": -291.2160949707031, "logps/rejected": -375.8805847167969, "loss": 0.2922, "rewards/accuracies": 0.8125, "rewards/chosen": -1.819044828414917, "rewards/margins": 4.440203666687012, "rewards/rejected": -6.259248733520508, "step": 1398 }, { "epoch": 0.29, "learning_rate": 1.4205882352941178e-05, "logits/chosen": -2.0171923637390137, "logits/rejected": -2.1588454246520996, "logps/chosen": -378.64129638671875, "logps/rejected": -435.9288330078125, "loss": 0.2699, "rewards/accuracies": 0.875, "rewards/chosen": -1.9165548086166382, "rewards/margins": 4.612350940704346, "rewards/rejected": -6.528904914855957, "step": 1399 }, { "epoch": 0.29, "learning_rate": 1.4201680672268908e-05, "logits/chosen": -2.226290702819824, "logits/rejected": -1.8148497343063354, "logps/chosen": -428.9877624511719, "logps/rejected": -361.9025573730469, "loss": 0.3227, "rewards/accuracies": 0.8125, "rewards/chosen": -1.497218132019043, "rewards/margins": 4.796828746795654, "rewards/rejected": -6.294047832489014, "step": 1400 }, { "epoch": 0.29, "learning_rate": 1.419747899159664e-05, "logits/chosen": -2.370039463043213, "logits/rejected": -1.8396408557891846, "logps/chosen": -376.5686340332031, "logps/rejected": -353.7760009765625, "loss": 0.4752, "rewards/accuracies": 0.75, "rewards/chosen": -2.027282953262329, "rewards/margins": 2.6561946868896484, "rewards/rejected": -4.683477401733398, "step": 1401 }, { "epoch": 0.29, "learning_rate": 1.4193277310924372e-05, "logits/chosen": -2.050440549850464, "logits/rejected": -1.3979960680007935, "logps/chosen": -429.3384094238281, "logps/rejected": -331.03118896484375, "loss": 0.1083, "rewards/accuracies": 1.0, "rewards/chosen": -1.4097628593444824, "rewards/margins": 3.863023519515991, "rewards/rejected": -5.2727861404418945, "step": 1402 }, { "epoch": 0.29, "learning_rate": 1.4189075630252103e-05, "logits/chosen": -2.343156337738037, "logits/rejected": -2.148479700088501, "logps/chosen": -340.35040283203125, "logps/rejected": -346.0924987792969, "loss": 0.3717, "rewards/accuracies": 0.9375, "rewards/chosen": -2.152132511138916, "rewards/margins": 3.4840362071990967, "rewards/rejected": -5.636168479919434, "step": 1403 }, { "epoch": 0.29, "learning_rate": 1.4184873949579834e-05, "logits/chosen": -2.289759635925293, "logits/rejected": -1.5942713022232056, "logps/chosen": -287.6488037109375, "logps/rejected": -287.570556640625, "loss": 0.2233, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8250104188919067, "rewards/margins": 3.9715144634246826, "rewards/rejected": -5.796525001525879, "step": 1404 }, { "epoch": 0.29, "learning_rate": 1.4180672268907565e-05, "logits/chosen": -2.307034492492676, "logits/rejected": -1.817615032196045, "logps/chosen": -266.3467712402344, "logps/rejected": -298.97711181640625, "loss": 0.3836, "rewards/accuracies": 0.6875, "rewards/chosen": -1.6582483053207397, "rewards/margins": 3.8191475868225098, "rewards/rejected": -5.477396011352539, "step": 1405 }, { "epoch": 0.29, "learning_rate": 1.4176470588235297e-05, "logits/chosen": -2.200026035308838, "logits/rejected": -2.2222208976745605, "logps/chosen": -372.63671875, "logps/rejected": -288.7843017578125, "loss": 0.6764, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3803563117980957, "rewards/margins": 2.51523494720459, "rewards/rejected": -4.8955912590026855, "step": 1406 }, { "epoch": 0.29, "learning_rate": 1.4172268907563027e-05, "logits/chosen": -1.7498670816421509, "logits/rejected": -1.4911487102508545, "logps/chosen": -302.2646484375, "logps/rejected": -356.66064453125, "loss": 0.1221, "rewards/accuracies": 0.9375, "rewards/chosen": -1.326611876487732, "rewards/margins": 6.048299312591553, "rewards/rejected": -7.374910354614258, "step": 1407 }, { "epoch": 0.29, "learning_rate": 1.4168067226890759e-05, "logits/chosen": -1.918386697769165, "logits/rejected": -2.01069712638855, "logps/chosen": -318.57952880859375, "logps/rejected": -327.1434631347656, "loss": 0.139, "rewards/accuracies": 0.9375, "rewards/chosen": -1.995417833328247, "rewards/margins": 4.115584373474121, "rewards/rejected": -6.111002445220947, "step": 1408 }, { "epoch": 0.29, "learning_rate": 1.4163865546218489e-05, "logits/chosen": -2.5465407371520996, "logits/rejected": -2.0716960430145264, "logps/chosen": -307.10791015625, "logps/rejected": -270.0506591796875, "loss": 0.4094, "rewards/accuracies": 0.8125, "rewards/chosen": -2.426265239715576, "rewards/margins": 3.5469722747802734, "rewards/rejected": -5.97323751449585, "step": 1409 }, { "epoch": 0.29, "learning_rate": 1.4159663865546221e-05, "logits/chosen": -2.2793445587158203, "logits/rejected": -1.9437533617019653, "logps/chosen": -354.13043212890625, "logps/rejected": -317.4933166503906, "loss": 0.3388, "rewards/accuracies": 0.8125, "rewards/chosen": -2.526595115661621, "rewards/margins": 4.063589096069336, "rewards/rejected": -6.590184211730957, "step": 1410 }, { "epoch": 0.3, "learning_rate": 1.4155462184873951e-05, "logits/chosen": -2.152052640914917, "logits/rejected": -2.047597646713257, "logps/chosen": -282.5943603515625, "logps/rejected": -351.9524230957031, "loss": 0.3019, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7080355882644653, "rewards/margins": 3.4433722496032715, "rewards/rejected": -5.151407718658447, "step": 1411 }, { "epoch": 0.3, "learning_rate": 1.4151260504201683e-05, "logits/chosen": -1.9197349548339844, "logits/rejected": -2.351728916168213, "logps/chosen": -367.78955078125, "logps/rejected": -420.56182861328125, "loss": 0.2875, "rewards/accuracies": 0.875, "rewards/chosen": -1.511461615562439, "rewards/margins": 4.561715602874756, "rewards/rejected": -6.073177337646484, "step": 1412 }, { "epoch": 0.3, "learning_rate": 1.4147058823529413e-05, "logits/chosen": -2.3096063137054443, "logits/rejected": -1.6582013368606567, "logps/chosen": -457.0124206542969, "logps/rejected": -337.5274658203125, "loss": 0.1846, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0087244510650635, "rewards/margins": 3.5492029190063477, "rewards/rejected": -5.557927131652832, "step": 1413 }, { "epoch": 0.3, "learning_rate": 1.4142857142857145e-05, "logits/chosen": -2.1169986724853516, "logits/rejected": -1.9908413887023926, "logps/chosen": -303.75579833984375, "logps/rejected": -331.300048828125, "loss": 0.4889, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7114627361297607, "rewards/margins": 3.6085968017578125, "rewards/rejected": -5.320059299468994, "step": 1414 }, { "epoch": 0.3, "learning_rate": 1.4138655462184875e-05, "logits/chosen": -1.9522316455841064, "logits/rejected": -1.7111420631408691, "logps/chosen": -309.3537902832031, "logps/rejected": -359.9637451171875, "loss": 0.5529, "rewards/accuracies": 0.75, "rewards/chosen": -2.2922170162200928, "rewards/margins": 3.2318155765533447, "rewards/rejected": -5.524031639099121, "step": 1415 }, { "epoch": 0.3, "learning_rate": 1.4134453781512607e-05, "logits/chosen": -2.042774200439453, "logits/rejected": -2.008208751678467, "logps/chosen": -307.2394104003906, "logps/rejected": -354.38079833984375, "loss": 0.2916, "rewards/accuracies": 0.75, "rewards/chosen": -1.459295392036438, "rewards/margins": 4.657646656036377, "rewards/rejected": -6.116942405700684, "step": 1416 }, { "epoch": 0.3, "learning_rate": 1.4130252100840338e-05, "logits/chosen": -2.1195902824401855, "logits/rejected": -2.215888261795044, "logps/chosen": -345.33868408203125, "logps/rejected": -351.12353515625, "loss": 0.3433, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9914941787719727, "rewards/margins": 3.133695602416992, "rewards/rejected": -5.125189781188965, "step": 1417 }, { "epoch": 0.3, "learning_rate": 1.412605042016807e-05, "logits/chosen": -2.2169976234436035, "logits/rejected": -2.1237587928771973, "logps/chosen": -332.54766845703125, "logps/rejected": -393.8397216796875, "loss": 0.4222, "rewards/accuracies": 0.75, "rewards/chosen": -2.5497066974639893, "rewards/margins": 4.237919807434082, "rewards/rejected": -6.787626266479492, "step": 1418 }, { "epoch": 0.3, "learning_rate": 1.41218487394958e-05, "logits/chosen": -2.218597650527954, "logits/rejected": -2.225863456726074, "logps/chosen": -284.6513366699219, "logps/rejected": -314.7806396484375, "loss": 0.1528, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5764003992080688, "rewards/margins": 3.9082117080688477, "rewards/rejected": -5.484612464904785, "step": 1419 }, { "epoch": 0.3, "learning_rate": 1.4117647058823532e-05, "logits/chosen": -2.3294286727905273, "logits/rejected": -1.8868989944458008, "logps/chosen": -221.39682006835938, "logps/rejected": -273.1705017089844, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -1.5244605541229248, "rewards/margins": 6.720041275024414, "rewards/rejected": -8.244502067565918, "step": 1420 }, { "epoch": 0.3, "learning_rate": 1.4113445378151262e-05, "logits/chosen": -1.9156928062438965, "logits/rejected": -2.148045063018799, "logps/chosen": -355.34710693359375, "logps/rejected": -393.51190185546875, "loss": 0.6206, "rewards/accuracies": 0.5625, "rewards/chosen": -2.2118263244628906, "rewards/margins": 1.231339454650879, "rewards/rejected": -3.4431657791137695, "step": 1421 }, { "epoch": 0.3, "learning_rate": 1.4109243697478994e-05, "logits/chosen": -1.7248703241348267, "logits/rejected": -1.6772284507751465, "logps/chosen": -322.4893798828125, "logps/rejected": -353.74322509765625, "loss": 0.3688, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3381917476654053, "rewards/margins": 3.126230478286743, "rewards/rejected": -5.464422225952148, "step": 1422 }, { "epoch": 0.3, "learning_rate": 1.4105042016806726e-05, "logits/chosen": -2.0415499210357666, "logits/rejected": -1.9954216480255127, "logps/chosen": -335.72906494140625, "logps/rejected": -324.9419250488281, "loss": 0.2606, "rewards/accuracies": 0.875, "rewards/chosen": -1.8611574172973633, "rewards/margins": 4.32957124710083, "rewards/rejected": -6.190728187561035, "step": 1423 }, { "epoch": 0.3, "learning_rate": 1.4100840336134456e-05, "logits/chosen": -2.2340097427368164, "logits/rejected": -1.9404314756393433, "logps/chosen": -409.8175048828125, "logps/rejected": -471.13421630859375, "loss": 0.2892, "rewards/accuracies": 0.875, "rewards/chosen": -1.4455287456512451, "rewards/margins": 2.9760255813598633, "rewards/rejected": -4.4215545654296875, "step": 1424 }, { "epoch": 0.3, "learning_rate": 1.4096638655462188e-05, "logits/chosen": -2.0100035667419434, "logits/rejected": -2.005782127380371, "logps/chosen": -354.4512634277344, "logps/rejected": -388.3296813964844, "loss": 0.2543, "rewards/accuracies": 0.8125, "rewards/chosen": -2.003450393676758, "rewards/margins": 4.292349338531494, "rewards/rejected": -6.295799732208252, "step": 1425 }, { "epoch": 0.3, "learning_rate": 1.4092436974789918e-05, "logits/chosen": -2.0746102333068848, "logits/rejected": -1.8971270322799683, "logps/chosen": -335.074951171875, "logps/rejected": -378.8569641113281, "loss": 0.4782, "rewards/accuracies": 0.875, "rewards/chosen": -1.54323148727417, "rewards/margins": 3.748412609100342, "rewards/rejected": -5.291645050048828, "step": 1426 }, { "epoch": 0.3, "learning_rate": 1.408823529411765e-05, "logits/chosen": -2.40716814994812, "logits/rejected": -1.339943766593933, "logps/chosen": -423.3092346191406, "logps/rejected": -227.74795532226562, "loss": 0.1333, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8600265979766846, "rewards/margins": 3.3094186782836914, "rewards/rejected": -4.169445037841797, "step": 1427 }, { "epoch": 0.3, "learning_rate": 1.408403361344538e-05, "logits/chosen": -1.6201808452606201, "logits/rejected": -1.7330325841903687, "logps/chosen": -261.90325927734375, "logps/rejected": -265.1957702636719, "loss": 0.4388, "rewards/accuracies": 0.875, "rewards/chosen": -2.4698610305786133, "rewards/margins": 2.8409595489501953, "rewards/rejected": -5.310820579528809, "step": 1428 }, { "epoch": 0.3, "learning_rate": 1.4079831932773112e-05, "logits/chosen": -2.33933162689209, "logits/rejected": -2.242175579071045, "logps/chosen": -342.8474426269531, "logps/rejected": -339.5798645019531, "loss": 0.194, "rewards/accuracies": 0.875, "rewards/chosen": -1.0780028104782104, "rewards/margins": 4.006252288818359, "rewards/rejected": -5.084254741668701, "step": 1429 }, { "epoch": 0.3, "learning_rate": 1.4075630252100842e-05, "logits/chosen": -2.217787981033325, "logits/rejected": -1.9487733840942383, "logps/chosen": -392.06915283203125, "logps/rejected": -397.7019958496094, "loss": 0.2811, "rewards/accuracies": 0.8125, "rewards/chosen": -1.412872552871704, "rewards/margins": 2.923387050628662, "rewards/rejected": -4.336259841918945, "step": 1430 }, { "epoch": 0.3, "learning_rate": 1.4071428571428574e-05, "logits/chosen": -2.429800510406494, "logits/rejected": -2.1618807315826416, "logps/chosen": -390.2091064453125, "logps/rejected": -338.1448669433594, "loss": 0.5931, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3931775093078613, "rewards/margins": 3.402618885040283, "rewards/rejected": -4.7957963943481445, "step": 1431 }, { "epoch": 0.3, "learning_rate": 1.4067226890756304e-05, "logits/chosen": -2.263923168182373, "logits/rejected": -1.8978078365325928, "logps/chosen": -312.78607177734375, "logps/rejected": -325.0115051269531, "loss": 0.1069, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5175653100013733, "rewards/margins": 5.243579864501953, "rewards/rejected": -5.761145114898682, "step": 1432 }, { "epoch": 0.3, "learning_rate": 1.4063025210084036e-05, "logits/chosen": -2.1740081310272217, "logits/rejected": -1.7976738214492798, "logps/chosen": -508.65997314453125, "logps/rejected": -427.5522155761719, "loss": 0.2798, "rewards/accuracies": 0.875, "rewards/chosen": -0.4478694200515747, "rewards/margins": 3.086674213409424, "rewards/rejected": -3.534543037414551, "step": 1433 }, { "epoch": 0.3, "learning_rate": 1.4058823529411765e-05, "logits/chosen": -1.932845950126648, "logits/rejected": -2.2601325511932373, "logps/chosen": -370.04498291015625, "logps/rejected": -397.68121337890625, "loss": 0.198, "rewards/accuracies": 1.0, "rewards/chosen": -0.0850374698638916, "rewards/margins": 2.811128616333008, "rewards/rejected": -2.8961658477783203, "step": 1434 }, { "epoch": 0.3, "learning_rate": 1.4054621848739495e-05, "logits/chosen": -2.4003355503082275, "logits/rejected": -2.3966779708862305, "logps/chosen": -366.4671630859375, "logps/rejected": -337.98388671875, "loss": 0.4612, "rewards/accuracies": 0.75, "rewards/chosen": -1.4155943393707275, "rewards/margins": 3.2621355056762695, "rewards/rejected": -4.677729606628418, "step": 1435 }, { "epoch": 0.3, "learning_rate": 1.4050420168067227e-05, "logits/chosen": -1.882743239402771, "logits/rejected": -1.546684741973877, "logps/chosen": -277.36663818359375, "logps/rejected": -239.81103515625, "loss": 0.281, "rewards/accuracies": 0.8125, "rewards/chosen": -1.165391445159912, "rewards/margins": 2.6982457637786865, "rewards/rejected": -3.8636374473571777, "step": 1436 }, { "epoch": 0.3, "learning_rate": 1.4046218487394959e-05, "logits/chosen": -1.6716489791870117, "logits/rejected": -1.711898922920227, "logps/chosen": -312.05230712890625, "logps/rejected": -363.0792236328125, "loss": 0.462, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3209638595581055, "rewards/margins": 3.0096306800842285, "rewards/rejected": -4.330594539642334, "step": 1437 }, { "epoch": 0.3, "learning_rate": 1.4042016806722689e-05, "logits/chosen": -1.9865370988845825, "logits/rejected": -1.8661975860595703, "logps/chosen": -358.96990966796875, "logps/rejected": -427.45050048828125, "loss": 0.6389, "rewards/accuracies": 0.75, "rewards/chosen": -1.6543587446212769, "rewards/margins": 1.7921056747436523, "rewards/rejected": -3.4464643001556396, "step": 1438 }, { "epoch": 0.3, "learning_rate": 1.4037815126050421e-05, "logits/chosen": -2.063514232635498, "logits/rejected": -1.9396929740905762, "logps/chosen": -303.96685791015625, "logps/rejected": -316.91351318359375, "loss": 0.4936, "rewards/accuracies": 0.75, "rewards/chosen": -1.5536534786224365, "rewards/margins": 2.088104248046875, "rewards/rejected": -3.6417577266693115, "step": 1439 }, { "epoch": 0.3, "learning_rate": 1.4033613445378151e-05, "logits/chosen": -2.2052083015441895, "logits/rejected": -2.17655348777771, "logps/chosen": -331.864013671875, "logps/rejected": -397.267333984375, "loss": 0.5044, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1957004070281982, "rewards/margins": 3.524470567703247, "rewards/rejected": -4.720170974731445, "step": 1440 }, { "epoch": 0.3, "learning_rate": 1.4029411764705883e-05, "logits/chosen": -2.3663828372955322, "logits/rejected": -2.201965093612671, "logps/chosen": -245.049560546875, "logps/rejected": -223.2589111328125, "loss": 0.2926, "rewards/accuracies": 0.875, "rewards/chosen": -0.5418893098831177, "rewards/margins": 2.591193199157715, "rewards/rejected": -3.133082628250122, "step": 1441 }, { "epoch": 0.3, "learning_rate": 1.4025210084033613e-05, "logits/chosen": -1.9682854413986206, "logits/rejected": -2.1284408569335938, "logps/chosen": -265.33026123046875, "logps/rejected": -429.75689697265625, "loss": 0.2527, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5952258110046387, "rewards/margins": 3.873617172241211, "rewards/rejected": -5.468842506408691, "step": 1442 }, { "epoch": 0.3, "learning_rate": 1.4021008403361345e-05, "logits/chosen": -2.1686043739318848, "logits/rejected": -2.0361034870147705, "logps/chosen": -249.85365295410156, "logps/rejected": -280.7278137207031, "loss": 0.0545, "rewards/accuracies": 1.0, "rewards/chosen": -1.2448259592056274, "rewards/margins": 5.711902618408203, "rewards/rejected": -6.956728458404541, "step": 1443 }, { "epoch": 0.3, "learning_rate": 1.4016806722689076e-05, "logits/chosen": -1.8979355096817017, "logits/rejected": -2.087982654571533, "logps/chosen": -300.1328430175781, "logps/rejected": -384.6341857910156, "loss": 0.2986, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9855170249938965, "rewards/margins": 3.8924458026885986, "rewards/rejected": -4.877963066101074, "step": 1444 }, { "epoch": 0.3, "learning_rate": 1.4012605042016807e-05, "logits/chosen": -2.076925277709961, "logits/rejected": -1.8943102359771729, "logps/chosen": -238.86732482910156, "logps/rejected": -249.40692138671875, "loss": 0.3663, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1582841873168945, "rewards/margins": 2.887338399887085, "rewards/rejected": -4.045622825622559, "step": 1445 }, { "epoch": 0.3, "learning_rate": 1.4008403361344538e-05, "logits/chosen": -1.8368775844573975, "logits/rejected": -1.9516080617904663, "logps/chosen": -248.00221252441406, "logps/rejected": -302.8523254394531, "loss": 0.373, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7918825149536133, "rewards/margins": 3.4885544776916504, "rewards/rejected": -5.2804365158081055, "step": 1446 }, { "epoch": 0.3, "learning_rate": 1.400420168067227e-05, "logits/chosen": -2.305478811264038, "logits/rejected": -1.9125757217407227, "logps/chosen": -379.840576171875, "logps/rejected": -350.6776123046875, "loss": 0.293, "rewards/accuracies": 0.875, "rewards/chosen": -0.6017919778823853, "rewards/margins": 3.715904712677002, "rewards/rejected": -4.317697048187256, "step": 1447 }, { "epoch": 0.3, "learning_rate": 1.4e-05, "logits/chosen": -2.017338752746582, "logits/rejected": -1.982062578201294, "logps/chosen": -262.70953369140625, "logps/rejected": -327.8833312988281, "loss": 0.45, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4727370738983154, "rewards/margins": 2.93798565864563, "rewards/rejected": -4.410722732543945, "step": 1448 }, { "epoch": 0.3, "learning_rate": 1.3995798319327732e-05, "logits/chosen": -1.9766700267791748, "logits/rejected": -1.7306606769561768, "logps/chosen": -381.85467529296875, "logps/rejected": -277.19549560546875, "loss": 0.1462, "rewards/accuracies": 1.0, "rewards/chosen": -1.7957472801208496, "rewards/margins": 3.728302478790283, "rewards/rejected": -5.524049758911133, "step": 1449 }, { "epoch": 0.3, "learning_rate": 1.3991596638655462e-05, "logits/chosen": -2.0307886600494385, "logits/rejected": -1.9838840961456299, "logps/chosen": -207.32003784179688, "logps/rejected": -225.19398498535156, "loss": 0.5563, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3031766414642334, "rewards/margins": 2.3507626056671143, "rewards/rejected": -4.653939247131348, "step": 1450 }, { "epoch": 0.3, "learning_rate": 1.3987394957983194e-05, "logits/chosen": -2.244724988937378, "logits/rejected": -1.6441290378570557, "logps/chosen": -309.74267578125, "logps/rejected": -258.3617858886719, "loss": 0.4104, "rewards/accuracies": 0.75, "rewards/chosen": -1.564531922340393, "rewards/margins": 1.9238146543502808, "rewards/rejected": -3.488346576690674, "step": 1451 }, { "epoch": 0.3, "learning_rate": 1.3983193277310924e-05, "logits/chosen": -2.0285205841064453, "logits/rejected": -2.0154714584350586, "logps/chosen": -313.3587951660156, "logps/rejected": -338.3735046386719, "loss": 0.4524, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2947394847869873, "rewards/margins": 2.834505558013916, "rewards/rejected": -5.129245758056641, "step": 1452 }, { "epoch": 0.3, "learning_rate": 1.3978991596638656e-05, "logits/chosen": -2.079664707183838, "logits/rejected": -1.7986183166503906, "logps/chosen": -342.4872741699219, "logps/rejected": -332.5512390136719, "loss": 0.3734, "rewards/accuracies": 0.8125, "rewards/chosen": -1.215742588043213, "rewards/margins": 3.3589580059051514, "rewards/rejected": -4.574700355529785, "step": 1453 }, { "epoch": 0.3, "learning_rate": 1.3974789915966386e-05, "logits/chosen": -2.3691060543060303, "logits/rejected": -2.168433904647827, "logps/chosen": -300.65838623046875, "logps/rejected": -248.2224884033203, "loss": 0.5816, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3956382274627686, "rewards/margins": 1.9216790199279785, "rewards/rejected": -3.317317247390747, "step": 1454 }, { "epoch": 0.3, "learning_rate": 1.3970588235294118e-05, "logits/chosen": -1.884229063987732, "logits/rejected": -2.1718380451202393, "logps/chosen": -224.0662841796875, "logps/rejected": -341.57086181640625, "loss": 0.1135, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7332942485809326, "rewards/margins": 4.621888160705566, "rewards/rejected": -6.355182647705078, "step": 1455 }, { "epoch": 0.3, "learning_rate": 1.3966386554621848e-05, "logits/chosen": -2.1268794536590576, "logits/rejected": -1.4593603610992432, "logps/chosen": -386.80291748046875, "logps/rejected": -360.8091735839844, "loss": 0.2729, "rewards/accuracies": 0.875, "rewards/chosen": -1.0995765924453735, "rewards/margins": 3.890530586242676, "rewards/rejected": -4.99010705947876, "step": 1456 }, { "epoch": 0.3, "learning_rate": 1.396218487394958e-05, "logits/chosen": -2.7517549991607666, "logits/rejected": -2.046962261199951, "logps/chosen": -509.4554748535156, "logps/rejected": -346.8223571777344, "loss": 0.1636, "rewards/accuracies": 0.9375, "rewards/chosen": -1.186597466468811, "rewards/margins": 3.055948257446289, "rewards/rejected": -4.2425456047058105, "step": 1457 }, { "epoch": 0.31, "learning_rate": 1.3957983193277312e-05, "logits/chosen": -1.9073152542114258, "logits/rejected": -1.9612417221069336, "logps/chosen": -346.36553955078125, "logps/rejected": -308.7762145996094, "loss": 0.3268, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3581430912017822, "rewards/margins": 2.0800538063049316, "rewards/rejected": -3.4381966590881348, "step": 1458 }, { "epoch": 0.31, "learning_rate": 1.3953781512605042e-05, "logits/chosen": -2.021149158477783, "logits/rejected": -1.6026772260665894, "logps/chosen": -340.4598693847656, "logps/rejected": -320.02813720703125, "loss": 0.4163, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2530276775360107, "rewards/margins": 3.745903491973877, "rewards/rejected": -5.998931407928467, "step": 1459 }, { "epoch": 0.31, "learning_rate": 1.3949579831932774e-05, "logits/chosen": -2.28706955909729, "logits/rejected": -1.9779248237609863, "logps/chosen": -378.193359375, "logps/rejected": -329.69671630859375, "loss": 0.2713, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9672653675079346, "rewards/margins": 3.409691333770752, "rewards/rejected": -5.376956462860107, "step": 1460 }, { "epoch": 0.31, "learning_rate": 1.3945378151260505e-05, "logits/chosen": -2.3210740089416504, "logits/rejected": -1.943753957748413, "logps/chosen": -360.6578674316406, "logps/rejected": -307.4670715332031, "loss": 0.2442, "rewards/accuracies": 0.875, "rewards/chosen": -1.347554087638855, "rewards/margins": 4.432538986206055, "rewards/rejected": -5.780092716217041, "step": 1461 }, { "epoch": 0.31, "learning_rate": 1.3941176470588236e-05, "logits/chosen": -2.2012858390808105, "logits/rejected": -1.7777936458587646, "logps/chosen": -335.41705322265625, "logps/rejected": -332.2799987792969, "loss": 0.4697, "rewards/accuracies": 0.875, "rewards/chosen": -1.9669578075408936, "rewards/margins": 2.368565559387207, "rewards/rejected": -4.33552360534668, "step": 1462 }, { "epoch": 0.31, "learning_rate": 1.3936974789915967e-05, "logits/chosen": -2.1187491416931152, "logits/rejected": -1.7113761901855469, "logps/chosen": -281.1143798828125, "logps/rejected": -240.6015167236328, "loss": 0.4022, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9238791465759277, "rewards/margins": 3.1040847301483154, "rewards/rejected": -5.027963638305664, "step": 1463 }, { "epoch": 0.31, "learning_rate": 1.3932773109243699e-05, "logits/chosen": -2.2906455993652344, "logits/rejected": -1.875886082649231, "logps/chosen": -316.2896728515625, "logps/rejected": -359.71014404296875, "loss": 0.0806, "rewards/accuracies": 1.0, "rewards/chosen": -1.2699439525604248, "rewards/margins": 5.328958988189697, "rewards/rejected": -6.598902702331543, "step": 1464 }, { "epoch": 0.31, "learning_rate": 1.3928571428571429e-05, "logits/chosen": -2.2704389095306396, "logits/rejected": -2.1847217082977295, "logps/chosen": -323.0194091796875, "logps/rejected": -363.40191650390625, "loss": 0.4094, "rewards/accuracies": 0.75, "rewards/chosen": -2.188535451889038, "rewards/margins": 2.950287342071533, "rewards/rejected": -5.13882303237915, "step": 1465 }, { "epoch": 0.31, "learning_rate": 1.392436974789916e-05, "logits/chosen": -2.203469753265381, "logits/rejected": -2.120384931564331, "logps/chosen": -336.221435546875, "logps/rejected": -396.68536376953125, "loss": 0.2712, "rewards/accuracies": 0.875, "rewards/chosen": -1.1328576803207397, "rewards/margins": 5.192601203918457, "rewards/rejected": -6.325458526611328, "step": 1466 }, { "epoch": 0.31, "learning_rate": 1.3920168067226891e-05, "logits/chosen": -2.3901824951171875, "logits/rejected": -1.7001736164093018, "logps/chosen": -418.62738037109375, "logps/rejected": -267.2537841796875, "loss": 0.2328, "rewards/accuracies": 0.875, "rewards/chosen": -1.376701831817627, "rewards/margins": 3.787548065185547, "rewards/rejected": -5.164250373840332, "step": 1467 }, { "epoch": 0.31, "learning_rate": 1.3915966386554623e-05, "logits/chosen": -2.334265947341919, "logits/rejected": -1.885516881942749, "logps/chosen": -327.3313293457031, "logps/rejected": -397.9049987792969, "loss": 0.2187, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5809022188186646, "rewards/margins": 4.74301290512085, "rewards/rejected": -6.323915481567383, "step": 1468 }, { "epoch": 0.31, "learning_rate": 1.3911764705882353e-05, "logits/chosen": -2.3096418380737305, "logits/rejected": -1.8101575374603271, "logps/chosen": -403.4140625, "logps/rejected": -348.06353759765625, "loss": 0.1577, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0830509662628174, "rewards/margins": 4.245474338531494, "rewards/rejected": -6.328524589538574, "step": 1469 }, { "epoch": 0.31, "learning_rate": 1.3907563025210085e-05, "logits/chosen": -1.9424877166748047, "logits/rejected": -2.0393130779266357, "logps/chosen": -393.0169677734375, "logps/rejected": -432.44482421875, "loss": 0.4121, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3869253396987915, "rewards/margins": 3.0890190601348877, "rewards/rejected": -4.475944519042969, "step": 1470 }, { "epoch": 0.31, "learning_rate": 1.3903361344537815e-05, "logits/chosen": -2.292696714401245, "logits/rejected": -2.1200037002563477, "logps/chosen": -347.6493225097656, "logps/rejected": -364.51190185546875, "loss": 0.2717, "rewards/accuracies": 0.875, "rewards/chosen": -2.0046565532684326, "rewards/margins": 3.614495277404785, "rewards/rejected": -5.619152069091797, "step": 1471 }, { "epoch": 0.31, "learning_rate": 1.3899159663865547e-05, "logits/chosen": -2.054111957550049, "logits/rejected": -1.5909301042556763, "logps/chosen": -373.861572265625, "logps/rejected": -281.73895263671875, "loss": 0.2596, "rewards/accuracies": 0.8125, "rewards/chosen": -1.579409122467041, "rewards/margins": 3.710562229156494, "rewards/rejected": -5.289971351623535, "step": 1472 }, { "epoch": 0.31, "learning_rate": 1.3894957983193277e-05, "logits/chosen": -1.830489993095398, "logits/rejected": -2.200249671936035, "logps/chosen": -243.86029052734375, "logps/rejected": -269.4947204589844, "loss": 0.1208, "rewards/accuracies": 1.0, "rewards/chosen": -1.7853641510009766, "rewards/margins": 4.046734809875488, "rewards/rejected": -5.832098960876465, "step": 1473 }, { "epoch": 0.31, "learning_rate": 1.389075630252101e-05, "logits/chosen": -2.157987594604492, "logits/rejected": -2.1717755794525146, "logps/chosen": -422.9354248046875, "logps/rejected": -461.49676513671875, "loss": 0.7948, "rewards/accuracies": 0.75, "rewards/chosen": -2.138658046722412, "rewards/margins": 1.8930866718292236, "rewards/rejected": -4.031744956970215, "step": 1474 }, { "epoch": 0.31, "learning_rate": 1.388655462184874e-05, "logits/chosen": -2.288348913192749, "logits/rejected": -1.7758862972259521, "logps/chosen": -348.1274719238281, "logps/rejected": -287.52044677734375, "loss": 0.3611, "rewards/accuracies": 0.8125, "rewards/chosen": -1.988929033279419, "rewards/margins": 3.304992198944092, "rewards/rejected": -5.293920993804932, "step": 1475 }, { "epoch": 0.31, "learning_rate": 1.3882352941176471e-05, "logits/chosen": -1.9256844520568848, "logits/rejected": -1.8006024360656738, "logps/chosen": -293.7676086425781, "logps/rejected": -327.9270935058594, "loss": 0.4368, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7139800786972046, "rewards/margins": 2.328761100769043, "rewards/rejected": -4.042741298675537, "step": 1476 }, { "epoch": 0.31, "learning_rate": 1.3878151260504202e-05, "logits/chosen": -1.7193092107772827, "logits/rejected": -1.9379358291625977, "logps/chosen": -381.2054443359375, "logps/rejected": -343.2018127441406, "loss": 0.3519, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5428836345672607, "rewards/margins": 3.2864980697631836, "rewards/rejected": -5.829381942749023, "step": 1477 }, { "epoch": 0.31, "learning_rate": 1.3873949579831934e-05, "logits/chosen": -2.099285125732422, "logits/rejected": -1.6909632682800293, "logps/chosen": -403.2125244140625, "logps/rejected": -325.96710205078125, "loss": 0.4383, "rewards/accuracies": 0.875, "rewards/chosen": -2.6311864852905273, "rewards/margins": 3.1062440872192383, "rewards/rejected": -5.737430572509766, "step": 1478 }, { "epoch": 0.31, "learning_rate": 1.3869747899159664e-05, "logits/chosen": -2.607786178588867, "logits/rejected": -2.2463717460632324, "logps/chosen": -529.0430297851562, "logps/rejected": -443.3426208496094, "loss": 0.405, "rewards/accuracies": 0.875, "rewards/chosen": -0.17839226126670837, "rewards/margins": 3.6817080974578857, "rewards/rejected": -3.860100746154785, "step": 1479 }, { "epoch": 0.31, "learning_rate": 1.3865546218487396e-05, "logits/chosen": -1.884071946144104, "logits/rejected": -1.9651480913162231, "logps/chosen": -279.3707275390625, "logps/rejected": -312.4449768066406, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -2.823885917663574, "rewards/margins": 3.289954900741577, "rewards/rejected": -6.113840579986572, "step": 1480 }, { "epoch": 0.31, "learning_rate": 1.3861344537815128e-05, "logits/chosen": -1.9239745140075684, "logits/rejected": -1.473430871963501, "logps/chosen": -325.9935302734375, "logps/rejected": -245.03480529785156, "loss": 0.1864, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1942408084869385, "rewards/margins": 3.5219736099243164, "rewards/rejected": -5.716214179992676, "step": 1481 }, { "epoch": 0.31, "learning_rate": 1.3857142857142858e-05, "logits/chosen": -2.4463226795196533, "logits/rejected": -1.8636772632598877, "logps/chosen": -322.02203369140625, "logps/rejected": -347.62799072265625, "loss": 0.2507, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1330406665802, "rewards/margins": 2.6586828231811523, "rewards/rejected": -4.791723251342773, "step": 1482 }, { "epoch": 0.31, "learning_rate": 1.385294117647059e-05, "logits/chosen": -2.1206326484680176, "logits/rejected": -1.9888421297073364, "logps/chosen": -243.22801208496094, "logps/rejected": -316.9754333496094, "loss": 0.3883, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1763663291931152, "rewards/margins": 2.4952845573425293, "rewards/rejected": -4.6716508865356445, "step": 1483 }, { "epoch": 0.31, "learning_rate": 1.384873949579832e-05, "logits/chosen": -1.9638179540634155, "logits/rejected": -1.9315054416656494, "logps/chosen": -282.06280517578125, "logps/rejected": -330.5403137207031, "loss": 0.2487, "rewards/accuracies": 0.875, "rewards/chosen": -1.2130484580993652, "rewards/margins": 3.475529193878174, "rewards/rejected": -4.688577651977539, "step": 1484 }, { "epoch": 0.31, "learning_rate": 1.3844537815126052e-05, "logits/chosen": -2.2887396812438965, "logits/rejected": -1.9831876754760742, "logps/chosen": -306.62750244140625, "logps/rejected": -406.44354248046875, "loss": 0.1397, "rewards/accuracies": 1.0, "rewards/chosen": -1.7298030853271484, "rewards/margins": 3.4616518020629883, "rewards/rejected": -5.1914544105529785, "step": 1485 }, { "epoch": 0.31, "learning_rate": 1.3840336134453782e-05, "logits/chosen": -2.236088752746582, "logits/rejected": -1.503354549407959, "logps/chosen": -408.9189453125, "logps/rejected": -398.43756103515625, "loss": 0.1694, "rewards/accuracies": 1.0, "rewards/chosen": -1.9077842235565186, "rewards/margins": 4.053970813751221, "rewards/rejected": -5.96175479888916, "step": 1486 }, { "epoch": 0.31, "learning_rate": 1.3836134453781514e-05, "logits/chosen": -2.0901851654052734, "logits/rejected": -1.8386940956115723, "logps/chosen": -309.65673828125, "logps/rejected": -276.49676513671875, "loss": 0.3927, "rewards/accuracies": 0.75, "rewards/chosen": -1.4351847171783447, "rewards/margins": 3.065126657485962, "rewards/rejected": -4.500311374664307, "step": 1487 }, { "epoch": 0.31, "learning_rate": 1.3831932773109244e-05, "logits/chosen": -2.3285272121429443, "logits/rejected": -1.8131104707717896, "logps/chosen": -369.39202880859375, "logps/rejected": -257.6651306152344, "loss": 0.2556, "rewards/accuracies": 0.875, "rewards/chosen": -2.2563161849975586, "rewards/margins": 3.7034010887145996, "rewards/rejected": -5.959717750549316, "step": 1488 }, { "epoch": 0.31, "learning_rate": 1.3827731092436976e-05, "logits/chosen": -2.1904854774475098, "logits/rejected": -1.9328203201293945, "logps/chosen": -497.05462646484375, "logps/rejected": -491.21868896484375, "loss": 0.7401, "rewards/accuracies": 0.625, "rewards/chosen": -2.7987942695617676, "rewards/margins": 2.3589625358581543, "rewards/rejected": -5.157756328582764, "step": 1489 }, { "epoch": 0.31, "learning_rate": 1.3823529411764706e-05, "logits/chosen": -2.2366514205932617, "logits/rejected": -1.8316683769226074, "logps/chosen": -524.6250610351562, "logps/rejected": -513.5143432617188, "loss": 0.6225, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2956128120422363, "rewards/margins": 4.453464508056641, "rewards/rejected": -5.749077320098877, "step": 1490 }, { "epoch": 0.31, "learning_rate": 1.3819327731092438e-05, "logits/chosen": -2.22904634475708, "logits/rejected": -2.2269725799560547, "logps/chosen": -403.003173828125, "logps/rejected": -416.63543701171875, "loss": 0.3929, "rewards/accuracies": 0.875, "rewards/chosen": -1.9268664121627808, "rewards/margins": 4.095552921295166, "rewards/rejected": -6.022418975830078, "step": 1491 }, { "epoch": 0.31, "learning_rate": 1.3815126050420169e-05, "logits/chosen": -2.5502898693084717, "logits/rejected": -2.4671010971069336, "logps/chosen": -358.4950256347656, "logps/rejected": -432.85040283203125, "loss": 0.1763, "rewards/accuracies": 0.9375, "rewards/chosen": -1.804923176765442, "rewards/margins": 4.578037738800049, "rewards/rejected": -6.382961273193359, "step": 1492 }, { "epoch": 0.31, "learning_rate": 1.38109243697479e-05, "logits/chosen": -2.1150963306427, "logits/rejected": -1.798793077468872, "logps/chosen": -375.71893310546875, "logps/rejected": -373.99774169921875, "loss": 0.6347, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3594367504119873, "rewards/margins": 2.245410442352295, "rewards/rejected": -3.6048471927642822, "step": 1493 }, { "epoch": 0.31, "learning_rate": 1.380672268907563e-05, "logits/chosen": -1.9354262351989746, "logits/rejected": -1.7193901538848877, "logps/chosen": -314.1204833984375, "logps/rejected": -388.4920959472656, "loss": 0.3218, "rewards/accuracies": 0.9375, "rewards/chosen": -2.189091205596924, "rewards/margins": 3.745532989501953, "rewards/rejected": -5.934624671936035, "step": 1494 }, { "epoch": 0.31, "learning_rate": 1.3802521008403363e-05, "logits/chosen": -2.2270185947418213, "logits/rejected": -1.7530972957611084, "logps/chosen": -293.441650390625, "logps/rejected": -275.045654296875, "loss": 0.3753, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4976768493652344, "rewards/margins": 3.213649272918701, "rewards/rejected": -5.711325645446777, "step": 1495 }, { "epoch": 0.31, "learning_rate": 1.3798319327731093e-05, "logits/chosen": -2.329249143600464, "logits/rejected": -1.483070969581604, "logps/chosen": -409.2109375, "logps/rejected": -339.3017578125, "loss": 0.1405, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8761192560195923, "rewards/margins": 4.959670543670654, "rewards/rejected": -6.835789680480957, "step": 1496 }, { "epoch": 0.31, "learning_rate": 1.3794117647058825e-05, "logits/chosen": -2.076392889022827, "logits/rejected": -1.6949396133422852, "logps/chosen": -357.1734619140625, "logps/rejected": -416.26593017578125, "loss": 0.2775, "rewards/accuracies": 0.875, "rewards/chosen": -1.605079174041748, "rewards/margins": 4.900635242462158, "rewards/rejected": -6.505714416503906, "step": 1497 }, { "epoch": 0.31, "learning_rate": 1.3789915966386555e-05, "logits/chosen": -1.8389716148376465, "logits/rejected": -2.081244468688965, "logps/chosen": -291.5338439941406, "logps/rejected": -351.85089111328125, "loss": 0.4227, "rewards/accuracies": 0.625, "rewards/chosen": -1.177229404449463, "rewards/margins": 2.7249724864959717, "rewards/rejected": -3.9022021293640137, "step": 1498 }, { "epoch": 0.31, "learning_rate": 1.3785714285714287e-05, "logits/chosen": -2.1917097568511963, "logits/rejected": -2.056948661804199, "logps/chosen": -463.5747985839844, "logps/rejected": -365.2350769042969, "loss": 0.3869, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6626700162887573, "rewards/margins": 3.252378463745117, "rewards/rejected": -4.915048599243164, "step": 1499 }, { "epoch": 0.31, "learning_rate": 1.3781512605042017e-05, "logits/chosen": -2.14890456199646, "logits/rejected": -1.7595752477645874, "logps/chosen": -389.83038330078125, "logps/rejected": -335.79974365234375, "loss": 0.4572, "rewards/accuracies": 0.75, "rewards/chosen": -1.3210645914077759, "rewards/margins": 2.0165557861328125, "rewards/rejected": -3.337620496749878, "step": 1500 }, { "epoch": 0.31, "learning_rate": 1.3777310924369749e-05, "logits/chosen": -1.8883776664733887, "logits/rejected": -1.7146973609924316, "logps/chosen": -359.68695068359375, "logps/rejected": -418.39569091796875, "loss": 0.3067, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3695051670074463, "rewards/margins": 2.8430089950561523, "rewards/rejected": -5.2125139236450195, "step": 1501 }, { "epoch": 0.31, "learning_rate": 1.3773109243697481e-05, "logits/chosen": -2.3197641372680664, "logits/rejected": -2.1042861938476562, "logps/chosen": -257.4062805175781, "logps/rejected": -292.05487060546875, "loss": 0.2399, "rewards/accuracies": 0.875, "rewards/chosen": -2.731605291366577, "rewards/margins": 3.0217349529266357, "rewards/rejected": -5.753340721130371, "step": 1502 }, { "epoch": 0.31, "learning_rate": 1.3768907563025211e-05, "logits/chosen": -2.204181671142578, "logits/rejected": -1.837156057357788, "logps/chosen": -374.1438293457031, "logps/rejected": -398.418212890625, "loss": 0.3051, "rewards/accuracies": 0.75, "rewards/chosen": -1.359133243560791, "rewards/margins": 4.114659309387207, "rewards/rejected": -5.473792552947998, "step": 1503 }, { "epoch": 0.31, "learning_rate": 1.3764705882352943e-05, "logits/chosen": -1.883932113647461, "logits/rejected": -1.8983148336410522, "logps/chosen": -353.15582275390625, "logps/rejected": -368.32672119140625, "loss": 0.2818, "rewards/accuracies": 0.9375, "rewards/chosen": -2.108870506286621, "rewards/margins": 3.661158323287964, "rewards/rejected": -5.770028591156006, "step": 1504 }, { "epoch": 0.31, "learning_rate": 1.3760504201680673e-05, "logits/chosen": -2.2103703022003174, "logits/rejected": -2.035720109939575, "logps/chosen": -302.2536926269531, "logps/rejected": -324.2633361816406, "loss": 0.193, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0097177028656006, "rewards/margins": 3.105665683746338, "rewards/rejected": -5.115383625030518, "step": 1505 }, { "epoch": 0.32, "learning_rate": 1.3756302521008405e-05, "logits/chosen": -2.266658067703247, "logits/rejected": -2.1572327613830566, "logps/chosen": -287.13897705078125, "logps/rejected": -312.1755065917969, "loss": 0.2548, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0984175205230713, "rewards/margins": 3.4841713905334473, "rewards/rejected": -5.582589149475098, "step": 1506 }, { "epoch": 0.32, "learning_rate": 1.3752100840336135e-05, "logits/chosen": -2.4099137783050537, "logits/rejected": -2.189911127090454, "logps/chosen": -393.07073974609375, "logps/rejected": -372.637451171875, "loss": 0.3239, "rewards/accuracies": 0.875, "rewards/chosen": -1.9389022588729858, "rewards/margins": 3.230184316635132, "rewards/rejected": -5.169086456298828, "step": 1507 }, { "epoch": 0.32, "learning_rate": 1.3747899159663867e-05, "logits/chosen": -2.5213332176208496, "logits/rejected": -1.6966464519500732, "logps/chosen": -520.781982421875, "logps/rejected": -334.50439453125, "loss": 0.3161, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5399690866470337, "rewards/margins": 3.962235450744629, "rewards/rejected": -5.502203941345215, "step": 1508 }, { "epoch": 0.32, "learning_rate": 1.3743697478991598e-05, "logits/chosen": -2.213902235031128, "logits/rejected": -2.2826530933380127, "logps/chosen": -550.6090087890625, "logps/rejected": -494.15533447265625, "loss": 0.4333, "rewards/accuracies": 0.75, "rewards/chosen": -2.0108642578125, "rewards/margins": 4.040138244628906, "rewards/rejected": -6.051002502441406, "step": 1509 }, { "epoch": 0.32, "learning_rate": 1.373949579831933e-05, "logits/chosen": -2.0629067420959473, "logits/rejected": -1.8256256580352783, "logps/chosen": -363.9134826660156, "logps/rejected": -384.60546875, "loss": 0.1812, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8285789489746094, "rewards/margins": 4.576192855834961, "rewards/rejected": -5.4047722816467285, "step": 1510 }, { "epoch": 0.32, "learning_rate": 1.373529411764706e-05, "logits/chosen": -2.224418878555298, "logits/rejected": -1.8851299285888672, "logps/chosen": -380.7570495605469, "logps/rejected": -293.6314392089844, "loss": 0.4504, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3901138305664062, "rewards/margins": 3.098491907119751, "rewards/rejected": -5.488605499267578, "step": 1511 }, { "epoch": 0.32, "learning_rate": 1.3731092436974792e-05, "logits/chosen": -2.150125026702881, "logits/rejected": -1.5734336376190186, "logps/chosen": -321.7039794921875, "logps/rejected": -261.9718017578125, "loss": 0.426, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6680748462677, "rewards/margins": 2.8966400623321533, "rewards/rejected": -5.564715385437012, "step": 1512 }, { "epoch": 0.32, "learning_rate": 1.3726890756302522e-05, "logits/chosen": -1.7281129360198975, "logits/rejected": -2.0702805519104004, "logps/chosen": -293.47711181640625, "logps/rejected": -375.4337158203125, "loss": 0.1276, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3544812202453613, "rewards/margins": 4.82112455368042, "rewards/rejected": -7.175605773925781, "step": 1513 }, { "epoch": 0.32, "learning_rate": 1.3722689075630254e-05, "logits/chosen": -2.35453200340271, "logits/rejected": -1.9226280450820923, "logps/chosen": -429.4017333984375, "logps/rejected": -427.2686462402344, "loss": 0.3681, "rewards/accuracies": 0.8125, "rewards/chosen": -1.928022861480713, "rewards/margins": 3.355212926864624, "rewards/rejected": -5.283236503601074, "step": 1514 }, { "epoch": 0.32, "learning_rate": 1.3718487394957984e-05, "logits/chosen": -2.3008413314819336, "logits/rejected": -2.0070688724517822, "logps/chosen": -327.9501953125, "logps/rejected": -258.4668884277344, "loss": 0.2023, "rewards/accuracies": 0.875, "rewards/chosen": -1.7532017230987549, "rewards/margins": 3.323464870452881, "rewards/rejected": -5.076666831970215, "step": 1515 }, { "epoch": 0.32, "learning_rate": 1.3714285714285716e-05, "logits/chosen": -2.2839841842651367, "logits/rejected": -2.0053277015686035, "logps/chosen": -294.37078857421875, "logps/rejected": -305.677978515625, "loss": 0.1584, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8836138248443604, "rewards/margins": 4.847850322723389, "rewards/rejected": -6.731464385986328, "step": 1516 }, { "epoch": 0.32, "learning_rate": 1.3710084033613446e-05, "logits/chosen": -1.8805383443832397, "logits/rejected": -1.9337414503097534, "logps/chosen": -314.4667663574219, "logps/rejected": -362.4813232421875, "loss": 0.7267, "rewards/accuracies": 0.8125, "rewards/chosen": -3.0425734519958496, "rewards/margins": 3.1742258071899414, "rewards/rejected": -6.216799259185791, "step": 1517 }, { "epoch": 0.32, "learning_rate": 1.3705882352941178e-05, "logits/chosen": -2.324641466140747, "logits/rejected": -2.1905229091644287, "logps/chosen": -354.46856689453125, "logps/rejected": -381.2355041503906, "loss": 1.1062, "rewards/accuracies": 0.625, "rewards/chosen": -2.7712717056274414, "rewards/margins": 1.989945411682129, "rewards/rejected": -4.76121711730957, "step": 1518 }, { "epoch": 0.32, "learning_rate": 1.3701680672268908e-05, "logits/chosen": -1.9229813814163208, "logits/rejected": -1.4142259359359741, "logps/chosen": -364.59454345703125, "logps/rejected": -324.77886962890625, "loss": 0.4455, "rewards/accuracies": 0.875, "rewards/chosen": -2.35444712638855, "rewards/margins": 4.673713684082031, "rewards/rejected": -7.028160572052002, "step": 1519 }, { "epoch": 0.32, "learning_rate": 1.369747899159664e-05, "logits/chosen": -1.9923980236053467, "logits/rejected": -2.0252585411071777, "logps/chosen": -297.9746398925781, "logps/rejected": -328.2971496582031, "loss": 0.2642, "rewards/accuracies": 0.875, "rewards/chosen": -2.5249955654144287, "rewards/margins": 2.997795343399048, "rewards/rejected": -5.522790908813477, "step": 1520 }, { "epoch": 0.32, "learning_rate": 1.369327731092437e-05, "logits/chosen": -2.101191282272339, "logits/rejected": -2.0765464305877686, "logps/chosen": -302.0186767578125, "logps/rejected": -423.00286865234375, "loss": 0.6472, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7222628593444824, "rewards/margins": 4.560105323791504, "rewards/rejected": -7.282368183135986, "step": 1521 }, { "epoch": 0.32, "learning_rate": 1.3689075630252102e-05, "logits/chosen": -2.2064096927642822, "logits/rejected": -2.107010841369629, "logps/chosen": -303.9696044921875, "logps/rejected": -335.8317565917969, "loss": 0.142, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5918830633163452, "rewards/margins": 5.0224833488464355, "rewards/rejected": -6.61436653137207, "step": 1522 }, { "epoch": 0.32, "learning_rate": 1.3684873949579832e-05, "logits/chosen": -2.2449867725372314, "logits/rejected": -1.964526891708374, "logps/chosen": -372.4798278808594, "logps/rejected": -383.7760009765625, "loss": 0.4904, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1155154705047607, "rewards/margins": 3.672675132751465, "rewards/rejected": -5.788190841674805, "step": 1523 }, { "epoch": 0.32, "learning_rate": 1.3680672268907564e-05, "logits/chosen": -2.2364954948425293, "logits/rejected": -1.6362565755844116, "logps/chosen": -376.10650634765625, "logps/rejected": -370.7212829589844, "loss": 0.1605, "rewards/accuracies": 0.875, "rewards/chosen": -2.3037221431732178, "rewards/margins": 5.390576362609863, "rewards/rejected": -7.69429874420166, "step": 1524 }, { "epoch": 0.32, "learning_rate": 1.3676470588235296e-05, "logits/chosen": -2.167163372039795, "logits/rejected": -2.133495330810547, "logps/chosen": -331.7828063964844, "logps/rejected": -350.56689453125, "loss": 0.2241, "rewards/accuracies": 0.9375, "rewards/chosen": -2.061875104904175, "rewards/margins": 3.3810033798217773, "rewards/rejected": -5.442878723144531, "step": 1525 }, { "epoch": 0.32, "learning_rate": 1.3672268907563027e-05, "logits/chosen": -2.3961386680603027, "logits/rejected": -2.074831008911133, "logps/chosen": -406.5638122558594, "logps/rejected": -332.0755920410156, "loss": 0.2613, "rewards/accuracies": 0.875, "rewards/chosen": -1.5826833248138428, "rewards/margins": 3.226261615753174, "rewards/rejected": -4.808945178985596, "step": 1526 }, { "epoch": 0.32, "learning_rate": 1.3668067226890758e-05, "logits/chosen": -1.8209517002105713, "logits/rejected": -1.583056092262268, "logps/chosen": -396.4560546875, "logps/rejected": -459.8486328125, "loss": 0.155, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5934081077575684, "rewards/margins": 4.904259204864502, "rewards/rejected": -6.497666835784912, "step": 1527 }, { "epoch": 0.32, "learning_rate": 1.3663865546218489e-05, "logits/chosen": -2.3694851398468018, "logits/rejected": -2.150752305984497, "logps/chosen": -301.579833984375, "logps/rejected": -278.3642578125, "loss": 0.2743, "rewards/accuracies": 0.875, "rewards/chosen": -1.8127012252807617, "rewards/margins": 3.071471929550171, "rewards/rejected": -4.884173393249512, "step": 1528 }, { "epoch": 0.32, "learning_rate": 1.365966386554622e-05, "logits/chosen": -2.119192123413086, "logits/rejected": -1.8645139932632446, "logps/chosen": -317.47686767578125, "logps/rejected": -351.76104736328125, "loss": 0.127, "rewards/accuracies": 0.9375, "rewards/chosen": -1.608067512512207, "rewards/margins": 4.637997627258301, "rewards/rejected": -6.246065616607666, "step": 1529 }, { "epoch": 0.32, "learning_rate": 1.365546218487395e-05, "logits/chosen": -2.2033910751342773, "logits/rejected": -2.280977249145508, "logps/chosen": -371.7592468261719, "logps/rejected": -423.4562072753906, "loss": 0.0931, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8018500804901123, "rewards/margins": 5.040394306182861, "rewards/rejected": -6.842245101928711, "step": 1530 }, { "epoch": 0.32, "learning_rate": 1.3651260504201683e-05, "logits/chosen": -2.2729990482330322, "logits/rejected": -1.9864797592163086, "logps/chosen": -314.2420349121094, "logps/rejected": -250.8751678466797, "loss": 0.3364, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0307931900024414, "rewards/margins": 3.4755325317382812, "rewards/rejected": -5.506325721740723, "step": 1531 }, { "epoch": 0.32, "learning_rate": 1.3647058823529413e-05, "logits/chosen": -2.1584627628326416, "logits/rejected": -2.263535261154175, "logps/chosen": -268.9049987792969, "logps/rejected": -347.4558410644531, "loss": 0.2831, "rewards/accuracies": 0.875, "rewards/chosen": -1.2791728973388672, "rewards/margins": 3.707303762435913, "rewards/rejected": -4.986476898193359, "step": 1532 }, { "epoch": 0.32, "learning_rate": 1.3642857142857145e-05, "logits/chosen": -2.0850987434387207, "logits/rejected": -2.1552116870880127, "logps/chosen": -342.29730224609375, "logps/rejected": -416.7658386230469, "loss": 0.5831, "rewards/accuracies": 0.8125, "rewards/chosen": -2.244588613510132, "rewards/margins": 3.867178201675415, "rewards/rejected": -6.111766815185547, "step": 1533 }, { "epoch": 0.32, "learning_rate": 1.3638655462184875e-05, "logits/chosen": -1.9951586723327637, "logits/rejected": -1.5541911125183105, "logps/chosen": -322.2633056640625, "logps/rejected": -295.8976745605469, "loss": 0.2129, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8857260942459106, "rewards/margins": 3.7069284915924072, "rewards/rejected": -5.592655181884766, "step": 1534 }, { "epoch": 0.32, "learning_rate": 1.3634453781512607e-05, "logits/chosen": -2.1387205123901367, "logits/rejected": -2.0219874382019043, "logps/chosen": -248.5108642578125, "logps/rejected": -242.661376953125, "loss": 0.311, "rewards/accuracies": 0.875, "rewards/chosen": -2.3165674209594727, "rewards/margins": 3.790159225463867, "rewards/rejected": -6.10672664642334, "step": 1535 }, { "epoch": 0.32, "learning_rate": 1.3630252100840337e-05, "logits/chosen": -2.195915699005127, "logits/rejected": -1.7216646671295166, "logps/chosen": -230.2270050048828, "logps/rejected": -247.8502960205078, "loss": 0.3246, "rewards/accuracies": 0.75, "rewards/chosen": -1.8406018018722534, "rewards/margins": 3.7172141075134277, "rewards/rejected": -5.557816505432129, "step": 1536 }, { "epoch": 0.32, "learning_rate": 1.3626050420168069e-05, "logits/chosen": -1.831616759300232, "logits/rejected": -1.6921265125274658, "logps/chosen": -416.4903869628906, "logps/rejected": -335.02154541015625, "loss": 0.3942, "rewards/accuracies": 0.875, "rewards/chosen": -1.5379319190979004, "rewards/margins": 3.0702579021453857, "rewards/rejected": -4.608189582824707, "step": 1537 }, { "epoch": 0.32, "learning_rate": 1.36218487394958e-05, "logits/chosen": -1.9427571296691895, "logits/rejected": -1.903017520904541, "logps/chosen": -269.3683776855469, "logps/rejected": -294.51824951171875, "loss": 0.3237, "rewards/accuracies": 0.75, "rewards/chosen": -1.74898099899292, "rewards/margins": 3.7656946182250977, "rewards/rejected": -5.514675617218018, "step": 1538 }, { "epoch": 0.32, "learning_rate": 1.3617647058823531e-05, "logits/chosen": -2.1402130126953125, "logits/rejected": -2.1775643825531006, "logps/chosen": -326.2735595703125, "logps/rejected": -396.31488037109375, "loss": 0.4029, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9723838567733765, "rewards/margins": 3.529466152191162, "rewards/rejected": -5.501850128173828, "step": 1539 }, { "epoch": 0.32, "learning_rate": 1.3613445378151261e-05, "logits/chosen": -2.238013744354248, "logits/rejected": -2.1054842472076416, "logps/chosen": -286.1468811035156, "logps/rejected": -289.3240966796875, "loss": 0.3241, "rewards/accuracies": 0.6875, "rewards/chosen": -1.3906116485595703, "rewards/margins": 3.4860012531280518, "rewards/rejected": -4.876612663269043, "step": 1540 }, { "epoch": 0.32, "learning_rate": 1.3609243697478993e-05, "logits/chosen": -2.4581689834594727, "logits/rejected": -2.0619163513183594, "logps/chosen": -325.19097900390625, "logps/rejected": -281.8824462890625, "loss": 0.1773, "rewards/accuracies": 0.9375, "rewards/chosen": -2.159588575363159, "rewards/margins": 4.178308963775635, "rewards/rejected": -6.337897300720215, "step": 1541 }, { "epoch": 0.32, "learning_rate": 1.3605042016806724e-05, "logits/chosen": -2.159458875656128, "logits/rejected": -1.886964201927185, "logps/chosen": -404.42779541015625, "logps/rejected": -361.19317626953125, "loss": 0.6505, "rewards/accuracies": 0.75, "rewards/chosen": -2.0231292247772217, "rewards/margins": 2.7935128211975098, "rewards/rejected": -4.8166422843933105, "step": 1542 }, { "epoch": 0.32, "learning_rate": 1.3600840336134456e-05, "logits/chosen": -2.320007801055908, "logits/rejected": -1.4519001245498657, "logps/chosen": -353.6142578125, "logps/rejected": -286.37030029296875, "loss": 0.091, "rewards/accuracies": 1.0, "rewards/chosen": -2.0375373363494873, "rewards/margins": 4.341235160827637, "rewards/rejected": -6.378772258758545, "step": 1543 }, { "epoch": 0.32, "learning_rate": 1.3596638655462186e-05, "logits/chosen": -2.3219215869903564, "logits/rejected": -2.1535050868988037, "logps/chosen": -387.14959716796875, "logps/rejected": -345.3487854003906, "loss": 0.1528, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2154974937438965, "rewards/margins": 3.955014944076538, "rewards/rejected": -6.170513153076172, "step": 1544 }, { "epoch": 0.32, "learning_rate": 1.3592436974789918e-05, "logits/chosen": -2.2055633068084717, "logits/rejected": -1.9450434446334839, "logps/chosen": -406.12213134765625, "logps/rejected": -436.635986328125, "loss": 0.2705, "rewards/accuracies": 0.875, "rewards/chosen": -1.9468222856521606, "rewards/margins": 4.452834129333496, "rewards/rejected": -6.399656295776367, "step": 1545 }, { "epoch": 0.32, "learning_rate": 1.3588235294117648e-05, "logits/chosen": -2.125680685043335, "logits/rejected": -2.161830186843872, "logps/chosen": -265.9605712890625, "logps/rejected": -342.02362060546875, "loss": 0.8159, "rewards/accuracies": 0.6875, "rewards/chosen": -1.7846896648406982, "rewards/margins": 1.8813824653625488, "rewards/rejected": -3.666072368621826, "step": 1546 }, { "epoch": 0.32, "learning_rate": 1.358403361344538e-05, "logits/chosen": -2.17728853225708, "logits/rejected": -1.5438613891601562, "logps/chosen": -429.7187805175781, "logps/rejected": -414.5464172363281, "loss": 0.2032, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5351991653442383, "rewards/margins": 3.728818893432617, "rewards/rejected": -6.264018535614014, "step": 1547 }, { "epoch": 0.32, "learning_rate": 1.3579831932773112e-05, "logits/chosen": -2.4130184650421143, "logits/rejected": -2.1025772094726562, "logps/chosen": -413.2592468261719, "logps/rejected": -372.26910400390625, "loss": 0.3896, "rewards/accuracies": 0.8125, "rewards/chosen": -1.985079288482666, "rewards/margins": 2.734118938446045, "rewards/rejected": -4.719197750091553, "step": 1548 }, { "epoch": 0.32, "learning_rate": 1.3575630252100842e-05, "logits/chosen": -2.1688692569732666, "logits/rejected": -2.0466580390930176, "logps/chosen": -322.7155456542969, "logps/rejected": -362.5126037597656, "loss": 0.3998, "rewards/accuracies": 0.75, "rewards/chosen": -2.2712650299072266, "rewards/margins": 2.960850238800049, "rewards/rejected": -5.232114791870117, "step": 1549 }, { "epoch": 0.32, "learning_rate": 1.3571428571428574e-05, "logits/chosen": -1.9294326305389404, "logits/rejected": -2.00925874710083, "logps/chosen": -264.7958068847656, "logps/rejected": -318.3216857910156, "loss": 0.278, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6849164962768555, "rewards/margins": 4.047970771789551, "rewards/rejected": -5.732887268066406, "step": 1550 }, { "epoch": 0.32, "learning_rate": 1.3567226890756304e-05, "logits/chosen": -2.2406880855560303, "logits/rejected": -1.7296757698059082, "logps/chosen": -420.7481994628906, "logps/rejected": -354.55828857421875, "loss": 0.2845, "rewards/accuracies": 0.875, "rewards/chosen": -2.687650442123413, "rewards/margins": 3.1254658699035645, "rewards/rejected": -5.813116073608398, "step": 1551 }, { "epoch": 0.32, "learning_rate": 1.3563025210084036e-05, "logits/chosen": -2.248481512069702, "logits/rejected": -2.3428001403808594, "logps/chosen": -234.35072326660156, "logps/rejected": -266.1012268066406, "loss": 0.2467, "rewards/accuracies": 0.9375, "rewards/chosen": -2.225402593612671, "rewards/margins": 2.3584494590759277, "rewards/rejected": -4.583852291107178, "step": 1552 }, { "epoch": 0.32, "learning_rate": 1.3558823529411766e-05, "logits/chosen": -1.8489086627960205, "logits/rejected": -1.7926712036132812, "logps/chosen": -300.8004150390625, "logps/rejected": -226.19790649414062, "loss": 0.4611, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7169501781463623, "rewards/margins": 2.567204713821411, "rewards/rejected": -4.284154891967773, "step": 1553 }, { "epoch": 0.33, "learning_rate": 1.3554621848739498e-05, "logits/chosen": -1.6713650226593018, "logits/rejected": -1.644329309463501, "logps/chosen": -350.7186584472656, "logps/rejected": -419.7698669433594, "loss": 0.2679, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1753358840942383, "rewards/margins": 2.6158359050750732, "rewards/rejected": -4.791172027587891, "step": 1554 }, { "epoch": 0.33, "learning_rate": 1.3550420168067228e-05, "logits/chosen": -2.1645591259002686, "logits/rejected": -1.9457404613494873, "logps/chosen": -378.60113525390625, "logps/rejected": -404.9617614746094, "loss": 0.3379, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9129743576049805, "rewards/margins": 4.23444128036499, "rewards/rejected": -6.1474151611328125, "step": 1555 }, { "epoch": 0.33, "learning_rate": 1.354621848739496e-05, "logits/chosen": -2.1021556854248047, "logits/rejected": -1.9415308237075806, "logps/chosen": -338.2377624511719, "logps/rejected": -343.495849609375, "loss": 0.4451, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0029456615448, "rewards/margins": 4.4245123863220215, "rewards/rejected": -6.427458763122559, "step": 1556 }, { "epoch": 0.33, "learning_rate": 1.354201680672269e-05, "logits/chosen": -2.2972755432128906, "logits/rejected": -2.0061802864074707, "logps/chosen": -292.63671875, "logps/rejected": -244.11346435546875, "loss": 0.3559, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2151107788085938, "rewards/margins": 3.3020784854888916, "rewards/rejected": -4.5171895027160645, "step": 1557 }, { "epoch": 0.33, "learning_rate": 1.3537815126050422e-05, "logits/chosen": -2.1243882179260254, "logits/rejected": -2.206490993499756, "logps/chosen": -317.6093444824219, "logps/rejected": -319.8685607910156, "loss": 0.5334, "rewards/accuracies": 0.8125, "rewards/chosen": -2.480548858642578, "rewards/margins": 1.9016865491867065, "rewards/rejected": -4.382235527038574, "step": 1558 }, { "epoch": 0.33, "learning_rate": 1.3533613445378153e-05, "logits/chosen": -2.2083888053894043, "logits/rejected": -1.625854730606079, "logps/chosen": -295.3682861328125, "logps/rejected": -243.1657257080078, "loss": 0.1252, "rewards/accuracies": 1.0, "rewards/chosen": -1.3686109781265259, "rewards/margins": 4.547513484954834, "rewards/rejected": -5.91612434387207, "step": 1559 }, { "epoch": 0.33, "learning_rate": 1.3529411764705885e-05, "logits/chosen": -2.3533897399902344, "logits/rejected": -2.2308616638183594, "logps/chosen": -264.24200439453125, "logps/rejected": -262.6607360839844, "loss": 0.2788, "rewards/accuracies": 0.875, "rewards/chosen": -2.8086416721343994, "rewards/margins": 4.327005386352539, "rewards/rejected": -7.135647296905518, "step": 1560 }, { "epoch": 0.33, "learning_rate": 1.3525210084033615e-05, "logits/chosen": -2.256122589111328, "logits/rejected": -1.7468054294586182, "logps/chosen": -362.7600402832031, "logps/rejected": -300.8109130859375, "loss": 0.1618, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7323319911956787, "rewards/margins": 4.372570037841797, "rewards/rejected": -7.104902267456055, "step": 1561 }, { "epoch": 0.33, "learning_rate": 1.3521008403361347e-05, "logits/chosen": -2.0059309005737305, "logits/rejected": -1.9676084518432617, "logps/chosen": -218.7572784423828, "logps/rejected": -259.714599609375, "loss": 0.3778, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1661858558654785, "rewards/margins": 2.790099620819092, "rewards/rejected": -4.95628547668457, "step": 1562 }, { "epoch": 0.33, "learning_rate": 1.3516806722689077e-05, "logits/chosen": -1.9147124290466309, "logits/rejected": -2.0551693439483643, "logps/chosen": -235.26498413085938, "logps/rejected": -268.75628662109375, "loss": 0.2156, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4063061475753784, "rewards/margins": 3.8116002082824707, "rewards/rejected": -5.2179059982299805, "step": 1563 }, { "epoch": 0.33, "learning_rate": 1.3512605042016809e-05, "logits/chosen": -2.004727363586426, "logits/rejected": -1.9379262924194336, "logps/chosen": -251.23013305664062, "logps/rejected": -284.62158203125, "loss": 0.2364, "rewards/accuracies": 0.875, "rewards/chosen": -1.857201099395752, "rewards/margins": 3.7103936672210693, "rewards/rejected": -5.5675950050354, "step": 1564 }, { "epoch": 0.33, "learning_rate": 1.3508403361344539e-05, "logits/chosen": -2.0529003143310547, "logits/rejected": -1.6386899948120117, "logps/chosen": -348.4374694824219, "logps/rejected": -318.3455810546875, "loss": 0.1693, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1770364046096802, "rewards/margins": 3.762047052383423, "rewards/rejected": -4.939083576202393, "step": 1565 }, { "epoch": 0.33, "learning_rate": 1.3504201680672271e-05, "logits/chosen": -1.9407405853271484, "logits/rejected": -1.967835545539856, "logps/chosen": -297.806884765625, "logps/rejected": -417.64300537109375, "loss": 0.5938, "rewards/accuracies": 0.6875, "rewards/chosen": -2.647566556930542, "rewards/margins": 2.837312698364258, "rewards/rejected": -5.484879016876221, "step": 1566 }, { "epoch": 0.33, "learning_rate": 1.3500000000000001e-05, "logits/chosen": -2.065253257751465, "logits/rejected": -1.7523162364959717, "logps/chosen": -411.849853515625, "logps/rejected": -347.3288269042969, "loss": 0.3419, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7883737087249756, "rewards/margins": 3.5711944103240967, "rewards/rejected": -5.359567642211914, "step": 1567 }, { "epoch": 0.33, "learning_rate": 1.3495798319327733e-05, "logits/chosen": -1.73996102809906, "logits/rejected": -2.1035900115966797, "logps/chosen": -267.34564208984375, "logps/rejected": -309.44073486328125, "loss": 0.0564, "rewards/accuracies": 1.0, "rewards/chosen": -1.5502264499664307, "rewards/margins": 4.979929447174072, "rewards/rejected": -6.530155658721924, "step": 1568 }, { "epoch": 0.33, "learning_rate": 1.3491596638655465e-05, "logits/chosen": -2.1515371799468994, "logits/rejected": -1.896789312362671, "logps/chosen": -306.22601318359375, "logps/rejected": -347.0390930175781, "loss": 0.3711, "rewards/accuracies": 0.875, "rewards/chosen": -1.8402398824691772, "rewards/margins": 4.423365592956543, "rewards/rejected": -6.26360559463501, "step": 1569 }, { "epoch": 0.33, "learning_rate": 1.3487394957983195e-05, "logits/chosen": -1.6260254383087158, "logits/rejected": -1.356723666191101, "logps/chosen": -284.29986572265625, "logps/rejected": -299.8603820800781, "loss": 0.2447, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8083734512329102, "rewards/margins": 4.358606338500977, "rewards/rejected": -6.166979789733887, "step": 1570 }, { "epoch": 0.33, "learning_rate": 1.3483193277310927e-05, "logits/chosen": -2.114750385284424, "logits/rejected": -2.10658597946167, "logps/chosen": -271.921875, "logps/rejected": -352.1289978027344, "loss": 0.0758, "rewards/accuracies": 1.0, "rewards/chosen": -2.115615129470825, "rewards/margins": 4.238959312438965, "rewards/rejected": -6.354574680328369, "step": 1571 }, { "epoch": 0.33, "learning_rate": 1.3478991596638657e-05, "logits/chosen": -2.02907133102417, "logits/rejected": -1.9975831508636475, "logps/chosen": -378.42486572265625, "logps/rejected": -327.30615234375, "loss": 0.5258, "rewards/accuracies": 0.625, "rewards/chosen": -2.0004987716674805, "rewards/margins": 4.245112419128418, "rewards/rejected": -6.245611190795898, "step": 1572 }, { "epoch": 0.33, "learning_rate": 1.347478991596639e-05, "logits/chosen": -2.024139642715454, "logits/rejected": -1.9395785331726074, "logps/chosen": -337.4092102050781, "logps/rejected": -343.1611022949219, "loss": 0.352, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1575865745544434, "rewards/margins": 4.801280975341797, "rewards/rejected": -7.958868026733398, "step": 1573 }, { "epoch": 0.33, "learning_rate": 1.347058823529412e-05, "logits/chosen": -2.7350261211395264, "logits/rejected": -2.4601964950561523, "logps/chosen": -372.3827209472656, "logps/rejected": -355.8847351074219, "loss": 0.1385, "rewards/accuracies": 0.875, "rewards/chosen": -1.3894000053405762, "rewards/margins": 5.207067012786865, "rewards/rejected": -6.596466064453125, "step": 1574 }, { "epoch": 0.33, "learning_rate": 1.3466386554621851e-05, "logits/chosen": -2.297157049179077, "logits/rejected": -2.0700793266296387, "logps/chosen": -461.0620422363281, "logps/rejected": -433.7490539550781, "loss": 0.8345, "rewards/accuracies": 0.625, "rewards/chosen": -1.1593585014343262, "rewards/margins": 1.5880155563354492, "rewards/rejected": -2.7473740577697754, "step": 1575 }, { "epoch": 0.33, "learning_rate": 1.3462184873949582e-05, "logits/chosen": -2.4267542362213135, "logits/rejected": -2.0636322498321533, "logps/chosen": -466.5465393066406, "logps/rejected": -398.1875915527344, "loss": 0.0363, "rewards/accuracies": 1.0, "rewards/chosen": -1.1418555974960327, "rewards/margins": 5.091866493225098, "rewards/rejected": -6.233721733093262, "step": 1576 }, { "epoch": 0.33, "learning_rate": 1.3457983193277314e-05, "logits/chosen": -2.1529932022094727, "logits/rejected": -2.2697815895080566, "logps/chosen": -293.11346435546875, "logps/rejected": -287.35882568359375, "loss": 0.2801, "rewards/accuracies": 0.875, "rewards/chosen": -2.4375574588775635, "rewards/margins": 3.544675827026367, "rewards/rejected": -5.982232570648193, "step": 1577 }, { "epoch": 0.33, "learning_rate": 1.3453781512605044e-05, "logits/chosen": -2.2474112510681152, "logits/rejected": -1.8983111381530762, "logps/chosen": -318.96844482421875, "logps/rejected": -273.52911376953125, "loss": 0.6269, "rewards/accuracies": 0.8125, "rewards/chosen": -2.342780351638794, "rewards/margins": 2.3649682998657227, "rewards/rejected": -4.7077484130859375, "step": 1578 }, { "epoch": 0.33, "learning_rate": 1.3449579831932776e-05, "logits/chosen": -2.4370815753936768, "logits/rejected": -1.7357474565505981, "logps/chosen": -281.50775146484375, "logps/rejected": -270.159912109375, "loss": 0.33, "rewards/accuracies": 0.875, "rewards/chosen": -3.201629638671875, "rewards/margins": 3.859788179397583, "rewards/rejected": -7.061417579650879, "step": 1579 }, { "epoch": 0.33, "learning_rate": 1.3445378151260506e-05, "logits/chosen": -2.2266438007354736, "logits/rejected": -1.5917251110076904, "logps/chosen": -461.8985290527344, "logps/rejected": -317.9659423828125, "loss": 0.2462, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3814454078674316, "rewards/margins": 4.184391975402832, "rewards/rejected": -6.565837860107422, "step": 1580 }, { "epoch": 0.33, "learning_rate": 1.3441176470588238e-05, "logits/chosen": -1.9953975677490234, "logits/rejected": -1.895160436630249, "logps/chosen": -355.9602966308594, "logps/rejected": -389.32464599609375, "loss": 0.3802, "rewards/accuracies": 0.875, "rewards/chosen": -2.3407206535339355, "rewards/margins": 3.418330430984497, "rewards/rejected": -5.759051322937012, "step": 1581 }, { "epoch": 0.33, "learning_rate": 1.3436974789915966e-05, "logits/chosen": -2.139695405960083, "logits/rejected": -2.2296462059020996, "logps/chosen": -266.51953125, "logps/rejected": -270.2725524902344, "loss": 0.5884, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8951754570007324, "rewards/margins": 1.4570629596710205, "rewards/rejected": -4.352237701416016, "step": 1582 }, { "epoch": 0.33, "learning_rate": 1.3432773109243698e-05, "logits/chosen": -2.320556163787842, "logits/rejected": -1.923557162284851, "logps/chosen": -382.3410949707031, "logps/rejected": -337.8780212402344, "loss": 0.2495, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8711174726486206, "rewards/margins": 3.854853391647339, "rewards/rejected": -5.72597074508667, "step": 1583 }, { "epoch": 0.33, "learning_rate": 1.3428571428571429e-05, "logits/chosen": -2.2081713676452637, "logits/rejected": -2.0048463344573975, "logps/chosen": -356.354736328125, "logps/rejected": -312.5806884765625, "loss": 0.1059, "rewards/accuracies": 1.0, "rewards/chosen": -1.9250987768173218, "rewards/margins": 4.980665683746338, "rewards/rejected": -6.905764579772949, "step": 1584 }, { "epoch": 0.33, "learning_rate": 1.342436974789916e-05, "logits/chosen": -2.0496139526367188, "logits/rejected": -2.1422438621520996, "logps/chosen": -326.78680419921875, "logps/rejected": -350.79803466796875, "loss": 0.384, "rewards/accuracies": 0.75, "rewards/chosen": -3.362083911895752, "rewards/margins": 2.575965404510498, "rewards/rejected": -5.93804931640625, "step": 1585 }, { "epoch": 0.33, "learning_rate": 1.342016806722689e-05, "logits/chosen": -2.0161404609680176, "logits/rejected": -1.8577167987823486, "logps/chosen": -273.4517517089844, "logps/rejected": -384.1920166015625, "loss": 0.1485, "rewards/accuracies": 0.9375, "rewards/chosen": -2.296023368835449, "rewards/margins": 4.978274822235107, "rewards/rejected": -7.274298667907715, "step": 1586 }, { "epoch": 0.33, "learning_rate": 1.3415966386554623e-05, "logits/chosen": -2.0820136070251465, "logits/rejected": -1.3261100053787231, "logps/chosen": -288.47271728515625, "logps/rejected": -273.7978210449219, "loss": 0.4761, "rewards/accuracies": 0.75, "rewards/chosen": -3.16377592086792, "rewards/margins": 2.335578441619873, "rewards/rejected": -5.499354362487793, "step": 1587 }, { "epoch": 0.33, "learning_rate": 1.3411764705882353e-05, "logits/chosen": -2.101217746734619, "logits/rejected": -1.9143552780151367, "logps/chosen": -262.65216064453125, "logps/rejected": -306.89154052734375, "loss": 0.325, "rewards/accuracies": 0.875, "rewards/chosen": -2.4857940673828125, "rewards/margins": 4.514498710632324, "rewards/rejected": -7.000293254852295, "step": 1588 }, { "epoch": 0.33, "learning_rate": 1.3407563025210085e-05, "logits/chosen": -1.752553939819336, "logits/rejected": -1.8540618419647217, "logps/chosen": -254.5794677734375, "logps/rejected": -311.7756652832031, "loss": 0.1714, "rewards/accuracies": 1.0, "rewards/chosen": -2.510009288787842, "rewards/margins": 3.2181200981140137, "rewards/rejected": -5.728129863739014, "step": 1589 }, { "epoch": 0.33, "learning_rate": 1.3403361344537815e-05, "logits/chosen": -1.9992791414260864, "logits/rejected": -1.8400518894195557, "logps/chosen": -334.0417175292969, "logps/rejected": -314.65203857421875, "loss": 0.3031, "rewards/accuracies": 0.875, "rewards/chosen": -2.1570544242858887, "rewards/margins": 2.4814817905426025, "rewards/rejected": -4.63853645324707, "step": 1590 }, { "epoch": 0.33, "learning_rate": 1.3399159663865547e-05, "logits/chosen": -2.3284952640533447, "logits/rejected": -1.8115315437316895, "logps/chosen": -456.9496154785156, "logps/rejected": -394.57647705078125, "loss": 0.2951, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4232962131500244, "rewards/margins": 3.6977291107177734, "rewards/rejected": -6.121025085449219, "step": 1591 }, { "epoch": 0.33, "learning_rate": 1.3394957983193277e-05, "logits/chosen": -2.32025146484375, "logits/rejected": -1.61851167678833, "logps/chosen": -358.76959228515625, "logps/rejected": -292.3694152832031, "loss": 0.3308, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7484536170959473, "rewards/margins": 4.471254348754883, "rewards/rejected": -7.219708442687988, "step": 1592 }, { "epoch": 0.33, "learning_rate": 1.3390756302521009e-05, "logits/chosen": -1.9726165533065796, "logits/rejected": -1.9987127780914307, "logps/chosen": -339.5418395996094, "logps/rejected": -375.7403259277344, "loss": 0.2997, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1259608268737793, "rewards/margins": 4.168889045715332, "rewards/rejected": -6.294849872589111, "step": 1593 }, { "epoch": 0.33, "learning_rate": 1.338655462184874e-05, "logits/chosen": -1.9736922979354858, "logits/rejected": -2.131272315979004, "logps/chosen": -250.42123413085938, "logps/rejected": -293.5691833496094, "loss": 0.1682, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9538503885269165, "rewards/margins": 4.706883907318115, "rewards/rejected": -6.660734176635742, "step": 1594 }, { "epoch": 0.33, "learning_rate": 1.3382352941176471e-05, "logits/chosen": -1.9635119438171387, "logits/rejected": -1.9307979345321655, "logps/chosen": -361.7359313964844, "logps/rejected": -383.010986328125, "loss": 0.4816, "rewards/accuracies": 0.75, "rewards/chosen": -3.316967487335205, "rewards/margins": 3.3284764289855957, "rewards/rejected": -6.645443916320801, "step": 1595 }, { "epoch": 0.33, "learning_rate": 1.3378151260504201e-05, "logits/chosen": -1.9345074892044067, "logits/rejected": -1.4651312828063965, "logps/chosen": -299.09796142578125, "logps/rejected": -350.7173156738281, "loss": 0.2306, "rewards/accuracies": 0.875, "rewards/chosen": -3.1147069931030273, "rewards/margins": 5.237401962280273, "rewards/rejected": -8.352108001708984, "step": 1596 }, { "epoch": 0.33, "learning_rate": 1.3373949579831933e-05, "logits/chosen": -1.8867934942245483, "logits/rejected": -2.0758984088897705, "logps/chosen": -397.86029052734375, "logps/rejected": -445.2793273925781, "loss": 0.4863, "rewards/accuracies": 0.8125, "rewards/chosen": -3.097174644470215, "rewards/margins": 5.038077354431152, "rewards/rejected": -8.135251998901367, "step": 1597 }, { "epoch": 0.33, "learning_rate": 1.3369747899159663e-05, "logits/chosen": -1.8559684753417969, "logits/rejected": -1.8518497943878174, "logps/chosen": -257.2762451171875, "logps/rejected": -309.48760986328125, "loss": 0.5987, "rewards/accuracies": 0.75, "rewards/chosen": -2.4215240478515625, "rewards/margins": 3.0474164485931396, "rewards/rejected": -5.468940258026123, "step": 1598 }, { "epoch": 0.33, "learning_rate": 1.3365546218487395e-05, "logits/chosen": -2.084303617477417, "logits/rejected": -1.8956245183944702, "logps/chosen": -282.5265197753906, "logps/rejected": -314.05340576171875, "loss": 0.2218, "rewards/accuracies": 0.8125, "rewards/chosen": -2.216721534729004, "rewards/margins": 3.907532215118408, "rewards/rejected": -6.124253749847412, "step": 1599 }, { "epoch": 0.33, "learning_rate": 1.3361344537815126e-05, "logits/chosen": -2.170419216156006, "logits/rejected": -1.6748894453048706, "logps/chosen": -311.447021484375, "logps/rejected": -375.8804931640625, "loss": 0.1649, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9995734691619873, "rewards/margins": 6.639828681945801, "rewards/rejected": -8.639402389526367, "step": 1600 }, { "epoch": 0.33, "learning_rate": 1.3357142857142858e-05, "logits/chosen": -2.301140785217285, "logits/rejected": -2.0822901725769043, "logps/chosen": -398.6423645019531, "logps/rejected": -423.72247314453125, "loss": 0.2527, "rewards/accuracies": 0.8125, "rewards/chosen": -2.662114143371582, "rewards/margins": 4.394876956939697, "rewards/rejected": -7.0569915771484375, "step": 1601 }, { "epoch": 0.34, "learning_rate": 1.3352941176470588e-05, "logits/chosen": -2.2291455268859863, "logits/rejected": -1.717329978942871, "logps/chosen": -428.1994934082031, "logps/rejected": -380.1053771972656, "loss": 0.0603, "rewards/accuracies": 1.0, "rewards/chosen": -2.5141818523406982, "rewards/margins": 4.748443603515625, "rewards/rejected": -7.262625694274902, "step": 1602 }, { "epoch": 0.34, "learning_rate": 1.334873949579832e-05, "logits/chosen": -1.7454628944396973, "logits/rejected": -1.5623927116394043, "logps/chosen": -356.7296142578125, "logps/rejected": -285.88934326171875, "loss": 0.4121, "rewards/accuracies": 0.75, "rewards/chosen": -3.248469829559326, "rewards/margins": 3.267338991165161, "rewards/rejected": -6.515809059143066, "step": 1603 }, { "epoch": 0.34, "learning_rate": 1.3344537815126052e-05, "logits/chosen": -1.997895359992981, "logits/rejected": -2.042762041091919, "logps/chosen": -325.62799072265625, "logps/rejected": -427.9535217285156, "loss": 0.5784, "rewards/accuracies": 0.6875, "rewards/chosen": -3.2319893836975098, "rewards/margins": 2.6737725734710693, "rewards/rejected": -5.90576171875, "step": 1604 }, { "epoch": 0.34, "learning_rate": 1.3340336134453782e-05, "logits/chosen": -2.14200496673584, "logits/rejected": -1.776376485824585, "logps/chosen": -338.14031982421875, "logps/rejected": -341.0394287109375, "loss": 0.2672, "rewards/accuracies": 0.9375, "rewards/chosen": -2.81601619720459, "rewards/margins": 4.751769065856934, "rewards/rejected": -7.567784786224365, "step": 1605 }, { "epoch": 0.34, "learning_rate": 1.3336134453781514e-05, "logits/chosen": -2.154301166534424, "logits/rejected": -2.2191789150238037, "logps/chosen": -245.09988403320312, "logps/rejected": -270.04296875, "loss": 0.4949, "rewards/accuracies": 0.6875, "rewards/chosen": -2.518545150756836, "rewards/margins": 3.6310489177703857, "rewards/rejected": -6.149594306945801, "step": 1606 }, { "epoch": 0.34, "learning_rate": 1.3331932773109244e-05, "logits/chosen": -2.1032347679138184, "logits/rejected": -1.9936023950576782, "logps/chosen": -279.7778015136719, "logps/rejected": -286.44573974609375, "loss": 0.5778, "rewards/accuracies": 0.875, "rewards/chosen": -2.5201468467712402, "rewards/margins": 4.267714977264404, "rewards/rejected": -6.7878618240356445, "step": 1607 }, { "epoch": 0.34, "learning_rate": 1.3327731092436976e-05, "logits/chosen": -2.084012269973755, "logits/rejected": -2.074237585067749, "logps/chosen": -279.6593017578125, "logps/rejected": -316.08209228515625, "loss": 0.203, "rewards/accuracies": 0.875, "rewards/chosen": -2.870898962020874, "rewards/margins": 4.5558671951293945, "rewards/rejected": -7.426766395568848, "step": 1608 }, { "epoch": 0.34, "learning_rate": 1.3323529411764706e-05, "logits/chosen": -1.9728329181671143, "logits/rejected": -2.1290392875671387, "logps/chosen": -295.37481689453125, "logps/rejected": -329.74774169921875, "loss": 0.6767, "rewards/accuracies": 0.6875, "rewards/chosen": -3.4919238090515137, "rewards/margins": 2.4638986587524414, "rewards/rejected": -5.955822467803955, "step": 1609 }, { "epoch": 0.34, "learning_rate": 1.3319327731092438e-05, "logits/chosen": -2.03446102142334, "logits/rejected": -2.120352029800415, "logps/chosen": -320.86419677734375, "logps/rejected": -396.77557373046875, "loss": 0.444, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7813310623168945, "rewards/margins": 4.058514595031738, "rewards/rejected": -6.839846134185791, "step": 1610 }, { "epoch": 0.34, "learning_rate": 1.3315126050420168e-05, "logits/chosen": -2.34096097946167, "logits/rejected": -2.0522515773773193, "logps/chosen": -285.84429931640625, "logps/rejected": -268.8486633300781, "loss": 0.7912, "rewards/accuracies": 0.75, "rewards/chosen": -3.580796480178833, "rewards/margins": 2.9453368186950684, "rewards/rejected": -6.5261335372924805, "step": 1611 }, { "epoch": 0.34, "learning_rate": 1.33109243697479e-05, "logits/chosen": -2.0140230655670166, "logits/rejected": -2.0360476970672607, "logps/chosen": -332.80133056640625, "logps/rejected": -365.2991943359375, "loss": 0.1464, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9450170993804932, "rewards/margins": 3.3869357109069824, "rewards/rejected": -5.331953048706055, "step": 1612 }, { "epoch": 0.34, "learning_rate": 1.330672268907563e-05, "logits/chosen": -2.324519634246826, "logits/rejected": -2.0606119632720947, "logps/chosen": -318.5538635253906, "logps/rejected": -316.2134094238281, "loss": 0.2262, "rewards/accuracies": 0.875, "rewards/chosen": -2.42474102973938, "rewards/margins": 3.705200433731079, "rewards/rejected": -6.129941463470459, "step": 1613 }, { "epoch": 0.34, "learning_rate": 1.3302521008403362e-05, "logits/chosen": -1.748196005821228, "logits/rejected": -1.6406477689743042, "logps/chosen": -226.12464904785156, "logps/rejected": -332.0560607910156, "loss": 0.0837, "rewards/accuracies": 1.0, "rewards/chosen": -2.4759981632232666, "rewards/margins": 4.606532096862793, "rewards/rejected": -7.0825300216674805, "step": 1614 }, { "epoch": 0.34, "learning_rate": 1.3298319327731092e-05, "logits/chosen": -1.9413701295852661, "logits/rejected": -1.6713558435440063, "logps/chosen": -279.9207763671875, "logps/rejected": -290.0434875488281, "loss": 0.4252, "rewards/accuracies": 0.875, "rewards/chosen": -4.070911884307861, "rewards/margins": 3.3115811347961426, "rewards/rejected": -7.382493019104004, "step": 1615 }, { "epoch": 0.34, "learning_rate": 1.3294117647058824e-05, "logits/chosen": -1.7115473747253418, "logits/rejected": -1.4939265251159668, "logps/chosen": -304.6336975097656, "logps/rejected": -362.10821533203125, "loss": 0.086, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4965295791625977, "rewards/margins": 4.550251007080078, "rewards/rejected": -6.046780109405518, "step": 1616 }, { "epoch": 0.34, "learning_rate": 1.3289915966386555e-05, "logits/chosen": -2.1417582035064697, "logits/rejected": -1.6566880941390991, "logps/chosen": -325.8069763183594, "logps/rejected": -319.3828125, "loss": 0.1858, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1280019283294678, "rewards/margins": 4.70501184463501, "rewards/rejected": -7.833014011383057, "step": 1617 }, { "epoch": 0.34, "learning_rate": 1.3285714285714287e-05, "logits/chosen": -2.29227876663208, "logits/rejected": -1.990504503250122, "logps/chosen": -370.6865539550781, "logps/rejected": -328.5603332519531, "loss": 0.3645, "rewards/accuracies": 0.875, "rewards/chosen": -1.699510931968689, "rewards/margins": 3.1582775115966797, "rewards/rejected": -4.857788562774658, "step": 1618 }, { "epoch": 0.34, "learning_rate": 1.3281512605042017e-05, "logits/chosen": -2.13417649269104, "logits/rejected": -1.991564393043518, "logps/chosen": -353.3240661621094, "logps/rejected": -299.34173583984375, "loss": 0.5271, "rewards/accuracies": 0.625, "rewards/chosen": -2.6974196434020996, "rewards/margins": 2.587613105773926, "rewards/rejected": -5.285032749176025, "step": 1619 }, { "epoch": 0.34, "learning_rate": 1.3277310924369749e-05, "logits/chosen": -2.203852415084839, "logits/rejected": -2.0532944202423096, "logps/chosen": -406.81585693359375, "logps/rejected": -375.972412109375, "loss": 0.3639, "rewards/accuracies": 0.8125, "rewards/chosen": -2.869723081588745, "rewards/margins": 3.372016668319702, "rewards/rejected": -6.241739749908447, "step": 1620 }, { "epoch": 0.34, "learning_rate": 1.3273109243697479e-05, "logits/chosen": -2.300136089324951, "logits/rejected": -2.291567325592041, "logps/chosen": -359.29046630859375, "logps/rejected": -418.3973388671875, "loss": 0.3779, "rewards/accuracies": 0.875, "rewards/chosen": -3.1010377407073975, "rewards/margins": 2.3206593990325928, "rewards/rejected": -5.42169713973999, "step": 1621 }, { "epoch": 0.34, "learning_rate": 1.326890756302521e-05, "logits/chosen": -2.215416669845581, "logits/rejected": -2.0284652709960938, "logps/chosen": -443.34149169921875, "logps/rejected": -392.05841064453125, "loss": 0.7605, "rewards/accuracies": 0.6875, "rewards/chosen": -2.803683042526245, "rewards/margins": 2.662907123565674, "rewards/rejected": -5.466590881347656, "step": 1622 }, { "epoch": 0.34, "learning_rate": 1.3264705882352941e-05, "logits/chosen": -1.876626968383789, "logits/rejected": -1.7013816833496094, "logps/chosen": -291.8648681640625, "logps/rejected": -270.8343811035156, "loss": 0.334, "rewards/accuracies": 0.875, "rewards/chosen": -2.566002368927002, "rewards/margins": 3.1309456825256348, "rewards/rejected": -5.696948051452637, "step": 1623 }, { "epoch": 0.34, "learning_rate": 1.3260504201680673e-05, "logits/chosen": -2.1808621883392334, "logits/rejected": -1.827493667602539, "logps/chosen": -394.2679138183594, "logps/rejected": -330.83563232421875, "loss": 0.2369, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4910149574279785, "rewards/margins": 3.987213373184204, "rewards/rejected": -6.478228569030762, "step": 1624 }, { "epoch": 0.34, "learning_rate": 1.3256302521008403e-05, "logits/chosen": -2.3626720905303955, "logits/rejected": -1.9575340747833252, "logps/chosen": -369.42230224609375, "logps/rejected": -374.7626037597656, "loss": 0.0749, "rewards/accuracies": 1.0, "rewards/chosen": -2.5884933471679688, "rewards/margins": 5.056520462036133, "rewards/rejected": -7.645013809204102, "step": 1625 }, { "epoch": 0.34, "learning_rate": 1.3252100840336135e-05, "logits/chosen": -1.9236102104187012, "logits/rejected": -1.849417805671692, "logps/chosen": -352.7290954589844, "logps/rejected": -333.08489990234375, "loss": 0.2424, "rewards/accuracies": 0.75, "rewards/chosen": -2.1833720207214355, "rewards/margins": 4.010716915130615, "rewards/rejected": -6.194088459014893, "step": 1626 }, { "epoch": 0.34, "learning_rate": 1.3247899159663867e-05, "logits/chosen": -2.1266355514526367, "logits/rejected": -2.061347484588623, "logps/chosen": -421.3648681640625, "logps/rejected": -310.83734130859375, "loss": 0.4052, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0288095474243164, "rewards/margins": 4.6182332038879395, "rewards/rejected": -6.647042274475098, "step": 1627 }, { "epoch": 0.34, "learning_rate": 1.3243697478991597e-05, "logits/chosen": -1.8946964740753174, "logits/rejected": -1.628699779510498, "logps/chosen": -332.45196533203125, "logps/rejected": -315.9475402832031, "loss": 0.1712, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3637945652008057, "rewards/margins": 5.166635513305664, "rewards/rejected": -7.530430316925049, "step": 1628 }, { "epoch": 0.34, "learning_rate": 1.3239495798319329e-05, "logits/chosen": -2.267000675201416, "logits/rejected": -2.2757129669189453, "logps/chosen": -337.1736145019531, "logps/rejected": -299.029541015625, "loss": 0.4137, "rewards/accuracies": 0.6875, "rewards/chosen": -3.241429328918457, "rewards/margins": 2.8422224521636963, "rewards/rejected": -6.083651542663574, "step": 1629 }, { "epoch": 0.34, "learning_rate": 1.323529411764706e-05, "logits/chosen": -2.2052745819091797, "logits/rejected": -2.096834897994995, "logps/chosen": -361.2476806640625, "logps/rejected": -363.7479553222656, "loss": 0.3502, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6909987926483154, "rewards/margins": 3.670335054397583, "rewards/rejected": -5.361333847045898, "step": 1630 }, { "epoch": 0.34, "learning_rate": 1.3231092436974791e-05, "logits/chosen": -1.9855976104736328, "logits/rejected": -2.1909592151641846, "logps/chosen": -375.8970642089844, "logps/rejected": -350.7624206542969, "loss": 0.4673, "rewards/accuracies": 0.75, "rewards/chosen": -1.4242157936096191, "rewards/margins": 3.23439884185791, "rewards/rejected": -4.658614635467529, "step": 1631 }, { "epoch": 0.34, "learning_rate": 1.3226890756302521e-05, "logits/chosen": -2.0138039588928223, "logits/rejected": -2.0690550804138184, "logps/chosen": -201.61300659179688, "logps/rejected": -325.20428466796875, "loss": 0.2314, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3579530715942383, "rewards/margins": 5.00855827331543, "rewards/rejected": -7.366511344909668, "step": 1632 }, { "epoch": 0.34, "learning_rate": 1.3222689075630253e-05, "logits/chosen": -2.2799272537231445, "logits/rejected": -1.7658333778381348, "logps/chosen": -409.2884826660156, "logps/rejected": -312.7856750488281, "loss": 0.4545, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5605814456939697, "rewards/margins": 2.6428394317626953, "rewards/rejected": -4.203420639038086, "step": 1633 }, { "epoch": 0.34, "learning_rate": 1.3218487394957984e-05, "logits/chosen": -2.151341438293457, "logits/rejected": -1.793632984161377, "logps/chosen": -322.8631591796875, "logps/rejected": -288.47479248046875, "loss": 0.1409, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0815314054489136, "rewards/margins": 4.677453517913818, "rewards/rejected": -5.7589850425720215, "step": 1634 }, { "epoch": 0.34, "learning_rate": 1.3214285714285716e-05, "logits/chosen": -2.164707899093628, "logits/rejected": -1.5787895917892456, "logps/chosen": -325.22772216796875, "logps/rejected": -293.02630615234375, "loss": 0.2226, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0245189666748047, "rewards/margins": 2.891779899597168, "rewards/rejected": -4.916299343109131, "step": 1635 }, { "epoch": 0.34, "learning_rate": 1.3210084033613446e-05, "logits/chosen": -2.020193576812744, "logits/rejected": -1.8130548000335693, "logps/chosen": -280.83221435546875, "logps/rejected": -333.7970886230469, "loss": 0.2689, "rewards/accuracies": 0.875, "rewards/chosen": -1.9360840320587158, "rewards/margins": 2.9368505477905273, "rewards/rejected": -4.872934818267822, "step": 1636 }, { "epoch": 0.34, "learning_rate": 1.3205882352941178e-05, "logits/chosen": -2.1149120330810547, "logits/rejected": -1.5744532346725464, "logps/chosen": -383.81488037109375, "logps/rejected": -387.3052978515625, "loss": 0.1363, "rewards/accuracies": 0.9375, "rewards/chosen": -0.796541154384613, "rewards/margins": 4.915252208709717, "rewards/rejected": -5.711793899536133, "step": 1637 }, { "epoch": 0.34, "learning_rate": 1.3201680672268908e-05, "logits/chosen": -1.8643388748168945, "logits/rejected": -2.0202724933624268, "logps/chosen": -333.83203125, "logps/rejected": -293.6918029785156, "loss": 0.4123, "rewards/accuracies": 0.75, "rewards/chosen": -2.1033411026000977, "rewards/margins": 1.726037859916687, "rewards/rejected": -3.829378843307495, "step": 1638 }, { "epoch": 0.34, "learning_rate": 1.319747899159664e-05, "logits/chosen": -2.349349021911621, "logits/rejected": -1.67177152633667, "logps/chosen": -328.877197265625, "logps/rejected": -352.9701232910156, "loss": 0.3002, "rewards/accuracies": 0.75, "rewards/chosen": -1.7484947443008423, "rewards/margins": 4.518703460693359, "rewards/rejected": -6.26719856262207, "step": 1639 }, { "epoch": 0.34, "learning_rate": 1.319327731092437e-05, "logits/chosen": -2.196012496948242, "logits/rejected": -1.8797670602798462, "logps/chosen": -371.23480224609375, "logps/rejected": -340.79345703125, "loss": 0.2041, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7328266501426697, "rewards/margins": 3.8800745010375977, "rewards/rejected": -4.612901210784912, "step": 1640 }, { "epoch": 0.34, "learning_rate": 1.3189075630252102e-05, "logits/chosen": -2.105725049972534, "logits/rejected": -2.1755247116088867, "logps/chosen": -318.401123046875, "logps/rejected": -389.2422790527344, "loss": 0.6833, "rewards/accuracies": 0.75, "rewards/chosen": -2.0785398483276367, "rewards/margins": 2.153319835662842, "rewards/rejected": -4.23185920715332, "step": 1641 }, { "epoch": 0.34, "learning_rate": 1.3184873949579832e-05, "logits/chosen": -1.8373757600784302, "logits/rejected": -1.813422441482544, "logps/chosen": -252.28561401367188, "logps/rejected": -243.92356872558594, "loss": 0.5042, "rewards/accuracies": 0.75, "rewards/chosen": -1.691725254058838, "rewards/margins": 2.1929168701171875, "rewards/rejected": -3.8846421241760254, "step": 1642 }, { "epoch": 0.34, "learning_rate": 1.3180672268907564e-05, "logits/chosen": -2.041912317276001, "logits/rejected": -1.4242440462112427, "logps/chosen": -298.460693359375, "logps/rejected": -293.2347106933594, "loss": 0.1318, "rewards/accuracies": 1.0, "rewards/chosen": -1.1325955390930176, "rewards/margins": 2.9155337810516357, "rewards/rejected": -4.048129081726074, "step": 1643 }, { "epoch": 0.34, "learning_rate": 1.3176470588235294e-05, "logits/chosen": -1.719459056854248, "logits/rejected": -1.5744354724884033, "logps/chosen": -363.67266845703125, "logps/rejected": -313.31292724609375, "loss": 0.2477, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8975053429603577, "rewards/margins": 4.022075653076172, "rewards/rejected": -4.919580936431885, "step": 1644 }, { "epoch": 0.34, "learning_rate": 1.3172268907563026e-05, "logits/chosen": -2.153738498687744, "logits/rejected": -1.793424129486084, "logps/chosen": -334.5963134765625, "logps/rejected": -268.81036376953125, "loss": 0.1327, "rewards/accuracies": 0.875, "rewards/chosen": -0.6021678447723389, "rewards/margins": 4.574833393096924, "rewards/rejected": -5.177000999450684, "step": 1645 }, { "epoch": 0.34, "learning_rate": 1.3168067226890756e-05, "logits/chosen": -2.1177711486816406, "logits/rejected": -2.199002742767334, "logps/chosen": -380.64990234375, "logps/rejected": -387.0482177734375, "loss": 0.406, "rewards/accuracies": 0.875, "rewards/chosen": 0.27392637729644775, "rewards/margins": 3.4210567474365234, "rewards/rejected": -3.1471304893493652, "step": 1646 }, { "epoch": 0.34, "learning_rate": 1.3163865546218488e-05, "logits/chosen": -2.1755337715148926, "logits/rejected": -1.8445426225662231, "logps/chosen": -306.31964111328125, "logps/rejected": -261.7821960449219, "loss": 0.4009, "rewards/accuracies": 0.875, "rewards/chosen": -1.680124282836914, "rewards/margins": 1.8594114780426025, "rewards/rejected": -3.5395355224609375, "step": 1647 }, { "epoch": 0.34, "learning_rate": 1.315966386554622e-05, "logits/chosen": -2.325439929962158, "logits/rejected": -1.9233131408691406, "logps/chosen": -283.2689208984375, "logps/rejected": -243.05316162109375, "loss": 0.4014, "rewards/accuracies": 0.8125, "rewards/chosen": -0.9223891496658325, "rewards/margins": 2.5972461700439453, "rewards/rejected": -3.5196352005004883, "step": 1648 }, { "epoch": 0.34, "learning_rate": 1.315546218487395e-05, "logits/chosen": -1.7490565776824951, "logits/rejected": -1.658849835395813, "logps/chosen": -238.8974609375, "logps/rejected": -272.6612548828125, "loss": 0.2375, "rewards/accuracies": 0.8125, "rewards/chosen": -1.3518321514129639, "rewards/margins": 3.057460069656372, "rewards/rejected": -4.409292221069336, "step": 1649 }, { "epoch": 0.35, "learning_rate": 1.3151260504201682e-05, "logits/chosen": -2.298921823501587, "logits/rejected": -2.1890370845794678, "logps/chosen": -340.6194152832031, "logps/rejected": -415.7105407714844, "loss": 0.5048, "rewards/accuracies": 0.75, "rewards/chosen": -1.0801947116851807, "rewards/margins": 2.944852352142334, "rewards/rejected": -4.0250468254089355, "step": 1650 }, { "epoch": 0.35, "learning_rate": 1.3147058823529413e-05, "logits/chosen": -2.0056240558624268, "logits/rejected": -2.009559154510498, "logps/chosen": -328.42962646484375, "logps/rejected": -442.249267578125, "loss": 0.0666, "rewards/accuracies": 0.9375, "rewards/chosen": -0.7312971949577332, "rewards/margins": 5.398517608642578, "rewards/rejected": -6.129814624786377, "step": 1651 }, { "epoch": 0.35, "learning_rate": 1.3142857142857145e-05, "logits/chosen": -2.2275142669677734, "logits/rejected": -1.7594361305236816, "logps/chosen": -312.7616271972656, "logps/rejected": -246.13156127929688, "loss": 0.2979, "rewards/accuracies": 0.8125, "rewards/chosen": -0.8992899060249329, "rewards/margins": 3.912074089050293, "rewards/rejected": -4.81136417388916, "step": 1652 }, { "epoch": 0.35, "learning_rate": 1.3138655462184875e-05, "logits/chosen": -2.0658066272735596, "logits/rejected": -1.7544759511947632, "logps/chosen": -440.46502685546875, "logps/rejected": -368.6729431152344, "loss": 0.1422, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9311587810516357, "rewards/margins": 5.469476222991943, "rewards/rejected": -6.400634765625, "step": 1653 }, { "epoch": 0.35, "learning_rate": 1.3134453781512607e-05, "logits/chosen": -2.035953998565674, "logits/rejected": -2.1727447509765625, "logps/chosen": -276.2041015625, "logps/rejected": -363.9278564453125, "loss": 0.3343, "rewards/accuracies": 0.75, "rewards/chosen": -1.0082136392593384, "rewards/margins": 3.557870626449585, "rewards/rejected": -4.566083908081055, "step": 1654 }, { "epoch": 0.35, "learning_rate": 1.3130252100840337e-05, "logits/chosen": -2.1151387691497803, "logits/rejected": -1.5189028978347778, "logps/chosen": -315.59197998046875, "logps/rejected": -293.00732421875, "loss": 0.2988, "rewards/accuracies": 0.8125, "rewards/chosen": -1.333604097366333, "rewards/margins": 2.8727025985717773, "rewards/rejected": -4.206306457519531, "step": 1655 }, { "epoch": 0.35, "learning_rate": 1.3126050420168069e-05, "logits/chosen": -2.190833568572998, "logits/rejected": -1.8973686695098877, "logps/chosen": -326.2862548828125, "logps/rejected": -229.34274291992188, "loss": 0.2783, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0256329774856567, "rewards/margins": 2.987743854522705, "rewards/rejected": -4.013376712799072, "step": 1656 }, { "epoch": 0.35, "learning_rate": 1.3121848739495799e-05, "logits/chosen": -2.1583807468414307, "logits/rejected": -1.9810147285461426, "logps/chosen": -289.7817687988281, "logps/rejected": -330.85675048828125, "loss": 0.104, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2317479848861694, "rewards/margins": 5.332660675048828, "rewards/rejected": -6.564408779144287, "step": 1657 }, { "epoch": 0.35, "learning_rate": 1.3117647058823531e-05, "logits/chosen": -1.7619329690933228, "logits/rejected": -1.868417501449585, "logps/chosen": -224.9616241455078, "logps/rejected": -288.0164794921875, "loss": 0.1409, "rewards/accuracies": 0.875, "rewards/chosen": -2.1625595092773438, "rewards/margins": 4.720888614654541, "rewards/rejected": -6.883448600769043, "step": 1658 }, { "epoch": 0.35, "learning_rate": 1.3113445378151261e-05, "logits/chosen": -2.2975878715515137, "logits/rejected": -1.8441202640533447, "logps/chosen": -465.9472351074219, "logps/rejected": -357.5421142578125, "loss": 0.1762, "rewards/accuracies": 0.875, "rewards/chosen": -0.5699071884155273, "rewards/margins": 4.306576728820801, "rewards/rejected": -4.876484394073486, "step": 1659 }, { "epoch": 0.35, "learning_rate": 1.3109243697478993e-05, "logits/chosen": -2.096003532409668, "logits/rejected": -1.9058789014816284, "logps/chosen": -388.0535888671875, "logps/rejected": -395.0906066894531, "loss": 0.5501, "rewards/accuracies": 0.75, "rewards/chosen": -1.2597575187683105, "rewards/margins": 2.4487240314483643, "rewards/rejected": -3.708481550216675, "step": 1660 }, { "epoch": 0.35, "learning_rate": 1.3105042016806723e-05, "logits/chosen": -2.369222402572632, "logits/rejected": -2.5502641201019287, "logps/chosen": -394.6534729003906, "logps/rejected": -359.2616271972656, "loss": 0.4286, "rewards/accuracies": 0.8125, "rewards/chosen": -1.1266310214996338, "rewards/margins": 2.82256817817688, "rewards/rejected": -3.9491991996765137, "step": 1661 }, { "epoch": 0.35, "learning_rate": 1.3100840336134455e-05, "logits/chosen": -1.8997111320495605, "logits/rejected": -2.1685891151428223, "logps/chosen": -400.38848876953125, "logps/rejected": -414.1773681640625, "loss": 0.0796, "rewards/accuracies": 1.0, "rewards/chosen": -0.8077974319458008, "rewards/margins": 4.451169967651367, "rewards/rejected": -5.258967399597168, "step": 1662 }, { "epoch": 0.35, "learning_rate": 1.3096638655462185e-05, "logits/chosen": -2.1664371490478516, "logits/rejected": -1.9259426593780518, "logps/chosen": -316.7283935546875, "logps/rejected": -343.28485107421875, "loss": 0.1062, "rewards/accuracies": 1.0, "rewards/chosen": -1.8430044651031494, "rewards/margins": 4.996984481811523, "rewards/rejected": -6.839988708496094, "step": 1663 }, { "epoch": 0.35, "learning_rate": 1.3092436974789917e-05, "logits/chosen": -2.1360716819763184, "logits/rejected": -1.9247126579284668, "logps/chosen": -354.1589050292969, "logps/rejected": -270.7493896484375, "loss": 0.2434, "rewards/accuracies": 0.875, "rewards/chosen": -1.0694624185562134, "rewards/margins": 3.043073892593384, "rewards/rejected": -4.1125359535217285, "step": 1664 }, { "epoch": 0.35, "learning_rate": 1.3088235294117648e-05, "logits/chosen": -2.1518170833587646, "logits/rejected": -1.7931030988693237, "logps/chosen": -371.633544921875, "logps/rejected": -380.6823425292969, "loss": 0.4844, "rewards/accuracies": 0.8125, "rewards/chosen": -0.833035409450531, "rewards/margins": 4.394563674926758, "rewards/rejected": -5.227599143981934, "step": 1665 }, { "epoch": 0.35, "learning_rate": 1.308403361344538e-05, "logits/chosen": -2.020862102508545, "logits/rejected": -1.6553512811660767, "logps/chosen": -469.6575927734375, "logps/rejected": -349.978515625, "loss": 0.1766, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8553345203399658, "rewards/margins": 4.1483259201049805, "rewards/rejected": -5.003661155700684, "step": 1666 }, { "epoch": 0.35, "learning_rate": 1.307983193277311e-05, "logits/chosen": -1.8492913246154785, "logits/rejected": -1.68524169921875, "logps/chosen": -261.5853271484375, "logps/rejected": -290.50189208984375, "loss": 0.5328, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9632350206375122, "rewards/margins": 2.1344101428985596, "rewards/rejected": -4.097645282745361, "step": 1667 }, { "epoch": 0.35, "learning_rate": 1.3075630252100842e-05, "logits/chosen": -2.036646842956543, "logits/rejected": -1.932445764541626, "logps/chosen": -334.4857482910156, "logps/rejected": -478.5420227050781, "loss": 0.3821, "rewards/accuracies": 0.875, "rewards/chosen": -1.255039930343628, "rewards/margins": 4.666387557983398, "rewards/rejected": -5.9214277267456055, "step": 1668 }, { "epoch": 0.35, "learning_rate": 1.3071428571428572e-05, "logits/chosen": -2.1748783588409424, "logits/rejected": -2.013597011566162, "logps/chosen": -269.46929931640625, "logps/rejected": -242.2169189453125, "loss": 0.3967, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2207040786743164, "rewards/margins": 3.145719289779663, "rewards/rejected": -4.3664231300354, "step": 1669 }, { "epoch": 0.35, "learning_rate": 1.3067226890756304e-05, "logits/chosen": -1.9189457893371582, "logits/rejected": -1.7714476585388184, "logps/chosen": -447.6824951171875, "logps/rejected": -468.66583251953125, "loss": 0.2026, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8013466000556946, "rewards/margins": 4.112392425537109, "rewards/rejected": -4.913739204406738, "step": 1670 }, { "epoch": 0.35, "learning_rate": 1.3063025210084036e-05, "logits/chosen": -1.7721881866455078, "logits/rejected": -1.9631613492965698, "logps/chosen": -361.6551513671875, "logps/rejected": -399.54364013671875, "loss": 0.4022, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5698039531707764, "rewards/margins": 3.922056198120117, "rewards/rejected": -5.491860389709473, "step": 1671 }, { "epoch": 0.35, "learning_rate": 1.3058823529411766e-05, "logits/chosen": -2.0548341274261475, "logits/rejected": -1.5207706689834595, "logps/chosen": -327.8741149902344, "logps/rejected": -278.2254638671875, "loss": 0.4888, "rewards/accuracies": 0.875, "rewards/chosen": -1.5704176425933838, "rewards/margins": 3.4351437091827393, "rewards/rejected": -5.005561828613281, "step": 1672 }, { "epoch": 0.35, "learning_rate": 1.3054621848739498e-05, "logits/chosen": -1.7771590948104858, "logits/rejected": -2.0469908714294434, "logps/chosen": -230.1295166015625, "logps/rejected": -401.906005859375, "loss": 0.1166, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2550369501113892, "rewards/margins": 4.642829895019531, "rewards/rejected": -5.897866725921631, "step": 1673 }, { "epoch": 0.35, "learning_rate": 1.3050420168067228e-05, "logits/chosen": -2.1374335289001465, "logits/rejected": -1.9265004396438599, "logps/chosen": -384.01922607421875, "logps/rejected": -340.26812744140625, "loss": 0.2386, "rewards/accuracies": 0.9375, "rewards/chosen": -1.002604365348816, "rewards/margins": 3.3432116508483887, "rewards/rejected": -4.345816135406494, "step": 1674 }, { "epoch": 0.35, "learning_rate": 1.304621848739496e-05, "logits/chosen": -1.818610429763794, "logits/rejected": -1.6062915325164795, "logps/chosen": -314.78363037109375, "logps/rejected": -473.2773132324219, "loss": 0.1387, "rewards/accuracies": 0.9375, "rewards/chosen": -0.6892642974853516, "rewards/margins": 4.108543872833252, "rewards/rejected": -4.7978081703186035, "step": 1675 }, { "epoch": 0.35, "learning_rate": 1.304201680672269e-05, "logits/chosen": -1.9655473232269287, "logits/rejected": -1.8932485580444336, "logps/chosen": -329.175048828125, "logps/rejected": -301.1618347167969, "loss": 0.5714, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2502942085266113, "rewards/margins": 1.7439815998077393, "rewards/rejected": -3.9942760467529297, "step": 1676 }, { "epoch": 0.35, "learning_rate": 1.3037815126050422e-05, "logits/chosen": -2.179201602935791, "logits/rejected": -1.374028205871582, "logps/chosen": -383.7030334472656, "logps/rejected": -332.54937744140625, "loss": 0.3797, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0847861766815186, "rewards/margins": 3.713181734085083, "rewards/rejected": -4.79796838760376, "step": 1677 }, { "epoch": 0.35, "learning_rate": 1.3033613445378152e-05, "logits/chosen": -2.0480589866638184, "logits/rejected": -1.5211020708084106, "logps/chosen": -469.7742004394531, "logps/rejected": -297.0853271484375, "loss": 0.0797, "rewards/accuracies": 1.0, "rewards/chosen": -0.6429076194763184, "rewards/margins": 4.332094192504883, "rewards/rejected": -4.975001335144043, "step": 1678 }, { "epoch": 0.35, "learning_rate": 1.3029411764705884e-05, "logits/chosen": -2.1266839504241943, "logits/rejected": -1.9897475242614746, "logps/chosen": -259.80059814453125, "logps/rejected": -320.8403015136719, "loss": 0.2212, "rewards/accuracies": 0.875, "rewards/chosen": -1.4387110471725464, "rewards/margins": 3.5361859798431396, "rewards/rejected": -4.9748969078063965, "step": 1679 }, { "epoch": 0.35, "learning_rate": 1.3025210084033614e-05, "logits/chosen": -2.203198194503784, "logits/rejected": -1.957892656326294, "logps/chosen": -353.9150085449219, "logps/rejected": -285.92095947265625, "loss": 0.2757, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6760694980621338, "rewards/margins": 2.700798988342285, "rewards/rejected": -4.376868724822998, "step": 1680 }, { "epoch": 0.35, "learning_rate": 1.3021008403361346e-05, "logits/chosen": -2.254056692123413, "logits/rejected": -1.9210858345031738, "logps/chosen": -306.054931640625, "logps/rejected": -313.51678466796875, "loss": 0.1053, "rewards/accuracies": 1.0, "rewards/chosen": -1.317150592803955, "rewards/margins": 4.63206672668457, "rewards/rejected": -5.949216842651367, "step": 1681 }, { "epoch": 0.35, "learning_rate": 1.3016806722689077e-05, "logits/chosen": -2.178771495819092, "logits/rejected": -1.9883192777633667, "logps/chosen": -380.27874755859375, "logps/rejected": -404.1409912109375, "loss": 0.1743, "rewards/accuracies": 0.875, "rewards/chosen": -0.469873309135437, "rewards/margins": 5.4599432945251465, "rewards/rejected": -5.929816722869873, "step": 1682 }, { "epoch": 0.35, "learning_rate": 1.3012605042016809e-05, "logits/chosen": -2.228998899459839, "logits/rejected": -1.970962643623352, "logps/chosen": -261.6079406738281, "logps/rejected": -294.96038818359375, "loss": 0.3451, "rewards/accuracies": 0.75, "rewards/chosen": -1.3882781267166138, "rewards/margins": 3.478013753890991, "rewards/rejected": -4.8662919998168945, "step": 1683 }, { "epoch": 0.35, "learning_rate": 1.3008403361344539e-05, "logits/chosen": -2.1628856658935547, "logits/rejected": -1.9326066970825195, "logps/chosen": -355.4634094238281, "logps/rejected": -398.8327331542969, "loss": 0.4217, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5831526517868042, "rewards/margins": 3.4703168869018555, "rewards/rejected": -5.053469657897949, "step": 1684 }, { "epoch": 0.35, "learning_rate": 1.300420168067227e-05, "logits/chosen": -1.8383350372314453, "logits/rejected": -1.75159752368927, "logps/chosen": -292.02459716796875, "logps/rejected": -351.68597412109375, "loss": 0.5695, "rewards/accuracies": 0.75, "rewards/chosen": -2.371532917022705, "rewards/margins": 2.804790496826172, "rewards/rejected": -5.176322937011719, "step": 1685 }, { "epoch": 0.35, "learning_rate": 1.3000000000000001e-05, "logits/chosen": -1.9471732378005981, "logits/rejected": -1.7567026615142822, "logps/chosen": -310.8711853027344, "logps/rejected": -369.2457275390625, "loss": 0.2648, "rewards/accuracies": 0.875, "rewards/chosen": -1.2930207252502441, "rewards/margins": 4.7606048583984375, "rewards/rejected": -6.053625583648682, "step": 1686 }, { "epoch": 0.35, "learning_rate": 1.2995798319327733e-05, "logits/chosen": -2.4826221466064453, "logits/rejected": -1.883579134941101, "logps/chosen": -353.9053039550781, "logps/rejected": -299.3062744140625, "loss": 0.2734, "rewards/accuracies": 0.875, "rewards/chosen": -1.2804255485534668, "rewards/margins": 3.731477737426758, "rewards/rejected": -5.011903285980225, "step": 1687 }, { "epoch": 0.35, "learning_rate": 1.2991596638655463e-05, "logits/chosen": -1.949670433998108, "logits/rejected": -2.1552414894104004, "logps/chosen": -287.7423400878906, "logps/rejected": -339.22406005859375, "loss": 0.2297, "rewards/accuracies": 0.9375, "rewards/chosen": -1.794440507888794, "rewards/margins": 3.315462350845337, "rewards/rejected": -5.109902858734131, "step": 1688 }, { "epoch": 0.35, "learning_rate": 1.2987394957983195e-05, "logits/chosen": -2.0938405990600586, "logits/rejected": -1.6132357120513916, "logps/chosen": -408.9571533203125, "logps/rejected": -335.2628479003906, "loss": 0.1852, "rewards/accuracies": 0.8125, "rewards/chosen": -1.367767095565796, "rewards/margins": 5.658818244934082, "rewards/rejected": -7.026585578918457, "step": 1689 }, { "epoch": 0.35, "learning_rate": 1.2983193277310925e-05, "logits/chosen": -2.1019012928009033, "logits/rejected": -2.130849599838257, "logps/chosen": -369.7879638671875, "logps/rejected": -396.8240966796875, "loss": 0.5204, "rewards/accuracies": 0.875, "rewards/chosen": -0.4950433373451233, "rewards/margins": 3.2884771823883057, "rewards/rejected": -3.783520221710205, "step": 1690 }, { "epoch": 0.35, "learning_rate": 1.2978991596638657e-05, "logits/chosen": -2.2379186153411865, "logits/rejected": -2.2304952144622803, "logps/chosen": -280.2316589355469, "logps/rejected": -361.62396240234375, "loss": 0.4174, "rewards/accuracies": 0.75, "rewards/chosen": -1.9434300661087036, "rewards/margins": 3.52789306640625, "rewards/rejected": -5.471323013305664, "step": 1691 }, { "epoch": 0.35, "learning_rate": 1.2974789915966387e-05, "logits/chosen": -2.225423574447632, "logits/rejected": -1.4466331005096436, "logps/chosen": -308.32159423828125, "logps/rejected": -233.5181884765625, "loss": 0.6001, "rewards/accuracies": 0.625, "rewards/chosen": -2.222628116607666, "rewards/margins": 1.9431620836257935, "rewards/rejected": -4.16579008102417, "step": 1692 }, { "epoch": 0.35, "learning_rate": 1.297058823529412e-05, "logits/chosen": -1.9872664213180542, "logits/rejected": -1.6573081016540527, "logps/chosen": -271.5209655761719, "logps/rejected": -313.69921875, "loss": 0.4143, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1143531799316406, "rewards/margins": 2.284224510192871, "rewards/rejected": -4.398577690124512, "step": 1693 }, { "epoch": 0.35, "learning_rate": 1.2966386554621851e-05, "logits/chosen": -2.2252771854400635, "logits/rejected": -2.257521390914917, "logps/chosen": -238.01895141601562, "logps/rejected": -240.06521606445312, "loss": 0.2024, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4267501831054688, "rewards/margins": 2.910149097442627, "rewards/rejected": -5.3368988037109375, "step": 1694 }, { "epoch": 0.35, "learning_rate": 1.2962184873949581e-05, "logits/chosen": -2.2101621627807617, "logits/rejected": -1.762978196144104, "logps/chosen": -242.93405151367188, "logps/rejected": -236.29269409179688, "loss": 0.3158, "rewards/accuracies": 0.875, "rewards/chosen": -1.816237449645996, "rewards/margins": 2.754617214202881, "rewards/rejected": -4.570854663848877, "step": 1695 }, { "epoch": 0.35, "learning_rate": 1.2957983193277313e-05, "logits/chosen": -2.1546010971069336, "logits/rejected": -1.8238812685012817, "logps/chosen": -285.2514953613281, "logps/rejected": -349.20550537109375, "loss": 0.1674, "rewards/accuracies": 0.875, "rewards/chosen": -1.3295536041259766, "rewards/margins": 3.6256442070007324, "rewards/rejected": -4.955197811126709, "step": 1696 }, { "epoch": 0.36, "learning_rate": 1.2953781512605043e-05, "logits/chosen": -2.1779394149780273, "logits/rejected": -1.8502485752105713, "logps/chosen": -292.621826171875, "logps/rejected": -299.5543212890625, "loss": 0.317, "rewards/accuracies": 0.875, "rewards/chosen": -1.6964635848999023, "rewards/margins": 2.1967406272888184, "rewards/rejected": -3.8932042121887207, "step": 1697 }, { "epoch": 0.36, "learning_rate": 1.2949579831932775e-05, "logits/chosen": -2.0696017742156982, "logits/rejected": -1.9356679916381836, "logps/chosen": -313.1915283203125, "logps/rejected": -340.41754150390625, "loss": 0.2216, "rewards/accuracies": 0.875, "rewards/chosen": -1.732241153717041, "rewards/margins": 4.313253879547119, "rewards/rejected": -6.045494556427002, "step": 1698 }, { "epoch": 0.36, "learning_rate": 1.2945378151260506e-05, "logits/chosen": -2.3744168281555176, "logits/rejected": -1.8174413442611694, "logps/chosen": -354.8280944824219, "logps/rejected": -304.28192138671875, "loss": 0.3596, "rewards/accuracies": 0.8125, "rewards/chosen": -0.7457529306411743, "rewards/margins": 3.110261917114258, "rewards/rejected": -3.856015205383301, "step": 1699 }, { "epoch": 0.36, "learning_rate": 1.2941176470588238e-05, "logits/chosen": -2.397106170654297, "logits/rejected": -2.2451157569885254, "logps/chosen": -337.36480712890625, "logps/rejected": -287.72027587890625, "loss": 0.4298, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0775704383850098, "rewards/margins": 2.816797971725464, "rewards/rejected": -4.894368648529053, "step": 1700 }, { "epoch": 0.36, "learning_rate": 1.2936974789915968e-05, "logits/chosen": -1.84895920753479, "logits/rejected": -1.7207789421081543, "logps/chosen": -374.7227478027344, "logps/rejected": -318.84088134765625, "loss": 0.3819, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4286924600601196, "rewards/margins": 2.8925764560699463, "rewards/rejected": -4.3212690353393555, "step": 1701 }, { "epoch": 0.36, "learning_rate": 1.29327731092437e-05, "logits/chosen": -2.4556751251220703, "logits/rejected": -1.8722951412200928, "logps/chosen": -341.0693664550781, "logps/rejected": -348.319091796875, "loss": 0.2113, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9394290447235107, "rewards/margins": 3.7422666549682617, "rewards/rejected": -5.681694984436035, "step": 1702 }, { "epoch": 0.36, "learning_rate": 1.292857142857143e-05, "logits/chosen": -2.3573899269104004, "logits/rejected": -2.2098684310913086, "logps/chosen": -405.17132568359375, "logps/rejected": -351.2940673828125, "loss": 0.1851, "rewards/accuracies": 1.0, "rewards/chosen": -1.8742220401763916, "rewards/margins": 3.484382152557373, "rewards/rejected": -5.3586039543151855, "step": 1703 }, { "epoch": 0.36, "learning_rate": 1.2924369747899162e-05, "logits/chosen": -2.083980083465576, "logits/rejected": -2.036158323287964, "logps/chosen": -341.2261962890625, "logps/rejected": -345.0203857421875, "loss": 0.237, "rewards/accuracies": 0.9375, "rewards/chosen": -1.434563159942627, "rewards/margins": 3.8078739643096924, "rewards/rejected": -5.242437362670898, "step": 1704 }, { "epoch": 0.36, "learning_rate": 1.2920168067226892e-05, "logits/chosen": -2.2365522384643555, "logits/rejected": -1.835016131401062, "logps/chosen": -326.5932312011719, "logps/rejected": -302.18243408203125, "loss": 0.2322, "rewards/accuracies": 0.875, "rewards/chosen": -1.9178954362869263, "rewards/margins": 3.2546377182006836, "rewards/rejected": -5.17253303527832, "step": 1705 }, { "epoch": 0.36, "learning_rate": 1.2915966386554624e-05, "logits/chosen": -1.9193065166473389, "logits/rejected": -1.8553701639175415, "logps/chosen": -295.0126953125, "logps/rejected": -276.7940979003906, "loss": 0.5254, "rewards/accuracies": 0.6875, "rewards/chosen": -2.5003440380096436, "rewards/margins": 3.9395179748535156, "rewards/rejected": -6.43986177444458, "step": 1706 }, { "epoch": 0.36, "learning_rate": 1.2911764705882354e-05, "logits/chosen": -2.281768798828125, "logits/rejected": -1.9560492038726807, "logps/chosen": -360.1552734375, "logps/rejected": -312.75018310546875, "loss": 0.2035, "rewards/accuracies": 0.875, "rewards/chosen": -1.6499032974243164, "rewards/margins": 3.5419821739196777, "rewards/rejected": -5.191885948181152, "step": 1707 }, { "epoch": 0.36, "learning_rate": 1.2907563025210086e-05, "logits/chosen": -2.2163238525390625, "logits/rejected": -2.014315128326416, "logps/chosen": -292.4384765625, "logps/rejected": -328.5286560058594, "loss": 0.2725, "rewards/accuracies": 0.875, "rewards/chosen": -1.8796565532684326, "rewards/margins": 3.0182971954345703, "rewards/rejected": -4.897953510284424, "step": 1708 }, { "epoch": 0.36, "learning_rate": 1.2903361344537816e-05, "logits/chosen": -2.0062756538391113, "logits/rejected": -1.6757999658584595, "logps/chosen": -375.4515686035156, "logps/rejected": -343.0420837402344, "loss": 0.3978, "rewards/accuracies": 0.75, "rewards/chosen": -2.3917741775512695, "rewards/margins": 3.356097459793091, "rewards/rejected": -5.747871398925781, "step": 1709 }, { "epoch": 0.36, "learning_rate": 1.2899159663865548e-05, "logits/chosen": -1.9768619537353516, "logits/rejected": -2.1316959857940674, "logps/chosen": -263.69525146484375, "logps/rejected": -283.38055419921875, "loss": 0.1474, "rewards/accuracies": 0.875, "rewards/chosen": -1.441432237625122, "rewards/margins": 3.5668373107910156, "rewards/rejected": -5.008269309997559, "step": 1710 }, { "epoch": 0.36, "learning_rate": 1.2894957983193278e-05, "logits/chosen": -2.216149091720581, "logits/rejected": -1.718430995941162, "logps/chosen": -354.5867004394531, "logps/rejected": -282.2593078613281, "loss": 0.165, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6971702575683594, "rewards/margins": 4.483598232269287, "rewards/rejected": -6.180768013000488, "step": 1711 }, { "epoch": 0.36, "learning_rate": 1.289075630252101e-05, "logits/chosen": -1.678892731666565, "logits/rejected": -1.6397696733474731, "logps/chosen": -243.97219848632812, "logps/rejected": -267.4880065917969, "loss": 0.3449, "rewards/accuracies": 0.875, "rewards/chosen": -1.9729036092758179, "rewards/margins": 3.4702534675598145, "rewards/rejected": -5.443157196044922, "step": 1712 }, { "epoch": 0.36, "learning_rate": 1.288655462184874e-05, "logits/chosen": -2.331613302230835, "logits/rejected": -2.3062655925750732, "logps/chosen": -310.0999755859375, "logps/rejected": -326.1746520996094, "loss": 0.4395, "rewards/accuracies": 0.9375, "rewards/chosen": -2.202488660812378, "rewards/margins": 2.093700408935547, "rewards/rejected": -4.296189308166504, "step": 1713 }, { "epoch": 0.36, "learning_rate": 1.2882352941176473e-05, "logits/chosen": -2.1066927909851074, "logits/rejected": -1.738647222518921, "logps/chosen": -486.341796875, "logps/rejected": -322.0362243652344, "loss": 0.3909, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0142428874969482, "rewards/margins": 4.968484878540039, "rewards/rejected": -5.982728004455566, "step": 1714 }, { "epoch": 0.36, "learning_rate": 1.2878151260504204e-05, "logits/chosen": -2.419487237930298, "logits/rejected": -2.159360408782959, "logps/chosen": -434.7996826171875, "logps/rejected": -349.596923828125, "loss": 0.4208, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8335366249084473, "rewards/margins": 3.2302937507629395, "rewards/rejected": -5.063830375671387, "step": 1715 }, { "epoch": 0.36, "learning_rate": 1.2873949579831935e-05, "logits/chosen": -2.220304489135742, "logits/rejected": -1.7708113193511963, "logps/chosen": -379.8236083984375, "logps/rejected": -368.7391357421875, "loss": 0.2752, "rewards/accuracies": 0.875, "rewards/chosen": -2.0264580249786377, "rewards/margins": 4.4852471351623535, "rewards/rejected": -6.51170539855957, "step": 1716 }, { "epoch": 0.36, "learning_rate": 1.2869747899159667e-05, "logits/chosen": -2.1748082637786865, "logits/rejected": -1.5972838401794434, "logps/chosen": -407.4959716796875, "logps/rejected": -305.56988525390625, "loss": 0.1021, "rewards/accuracies": 0.9375, "rewards/chosen": -1.893852949142456, "rewards/margins": 4.107840538024902, "rewards/rejected": -6.0016937255859375, "step": 1717 }, { "epoch": 0.36, "learning_rate": 1.2865546218487397e-05, "logits/chosen": -2.144066333770752, "logits/rejected": -1.5571010112762451, "logps/chosen": -267.53857421875, "logps/rejected": -250.59426879882812, "loss": 0.2017, "rewards/accuracies": 0.875, "rewards/chosen": -2.165712833404541, "rewards/margins": 4.660351753234863, "rewards/rejected": -6.826064586639404, "step": 1718 }, { "epoch": 0.36, "learning_rate": 1.2861344537815129e-05, "logits/chosen": -2.232433795928955, "logits/rejected": -1.9537347555160522, "logps/chosen": -430.7542724609375, "logps/rejected": -387.421630859375, "loss": 0.1598, "rewards/accuracies": 0.9375, "rewards/chosen": -1.114269733428955, "rewards/margins": 4.251499176025391, "rewards/rejected": -5.365768909454346, "step": 1719 }, { "epoch": 0.36, "learning_rate": 1.2857142857142859e-05, "logits/chosen": -1.731231927871704, "logits/rejected": -1.8613042831420898, "logps/chosen": -224.86611938476562, "logps/rejected": -280.61004638671875, "loss": 0.4473, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4507578611373901, "rewards/margins": 3.0061612129211426, "rewards/rejected": -4.456918716430664, "step": 1720 }, { "epoch": 0.36, "learning_rate": 1.285294117647059e-05, "logits/chosen": -2.3639960289001465, "logits/rejected": -1.8054587841033936, "logps/chosen": -391.4491271972656, "logps/rejected": -310.6698913574219, "loss": 0.1761, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1012992858886719, "rewards/margins": 3.7864742279052734, "rewards/rejected": -4.887773513793945, "step": 1721 }, { "epoch": 0.36, "learning_rate": 1.2848739495798321e-05, "logits/chosen": -2.1803512573242188, "logits/rejected": -2.035006046295166, "logps/chosen": -230.33721923828125, "logps/rejected": -293.82275390625, "loss": 0.2387, "rewards/accuracies": 0.9375, "rewards/chosen": -1.971229076385498, "rewards/margins": 2.6798977851867676, "rewards/rejected": -4.651126861572266, "step": 1722 }, { "epoch": 0.36, "learning_rate": 1.2844537815126053e-05, "logits/chosen": -1.9941805601119995, "logits/rejected": -1.542644739151001, "logps/chosen": -399.8520202636719, "logps/rejected": -358.58721923828125, "loss": 0.1589, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1562397480010986, "rewards/margins": 3.811459541320801, "rewards/rejected": -5.96769905090332, "step": 1723 }, { "epoch": 0.36, "learning_rate": 1.2840336134453783e-05, "logits/chosen": -1.867579460144043, "logits/rejected": -2.0439634323120117, "logps/chosen": -263.67864990234375, "logps/rejected": -341.67333984375, "loss": 0.5047, "rewards/accuracies": 0.875, "rewards/chosen": -2.1695754528045654, "rewards/margins": 3.127701997756958, "rewards/rejected": -5.297277450561523, "step": 1724 }, { "epoch": 0.36, "learning_rate": 1.2836134453781515e-05, "logits/chosen": -2.253267765045166, "logits/rejected": -2.07066011428833, "logps/chosen": -425.99786376953125, "logps/rejected": -325.1829833984375, "loss": 0.7645, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5230449438095093, "rewards/margins": 3.2469072341918945, "rewards/rejected": -4.769951820373535, "step": 1725 }, { "epoch": 0.36, "learning_rate": 1.2831932773109245e-05, "logits/chosen": -2.3167994022369385, "logits/rejected": -2.1905324459075928, "logps/chosen": -373.3262634277344, "logps/rejected": -357.4512939453125, "loss": 0.1458, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1414622068405151, "rewards/margins": 4.395660400390625, "rewards/rejected": -5.53712272644043, "step": 1726 }, { "epoch": 0.36, "learning_rate": 1.2827731092436977e-05, "logits/chosen": -2.2212750911712646, "logits/rejected": -1.9495553970336914, "logps/chosen": -336.9986877441406, "logps/rejected": -290.710693359375, "loss": 0.3887, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9888758659362793, "rewards/margins": 4.125742435455322, "rewards/rejected": -6.11461877822876, "step": 1727 }, { "epoch": 0.36, "learning_rate": 1.2823529411764707e-05, "logits/chosen": -1.922631025314331, "logits/rejected": -1.7247275114059448, "logps/chosen": -405.75860595703125, "logps/rejected": -395.06939697265625, "loss": 0.4779, "rewards/accuracies": 0.75, "rewards/chosen": -3.4472742080688477, "rewards/margins": 3.2764339447021484, "rewards/rejected": -6.723708152770996, "step": 1728 }, { "epoch": 0.36, "learning_rate": 1.281932773109244e-05, "logits/chosen": -1.857358455657959, "logits/rejected": -2.1009011268615723, "logps/chosen": -262.1487731933594, "logps/rejected": -258.7678527832031, "loss": 0.2587, "rewards/accuracies": 0.9375, "rewards/chosen": -2.333517074584961, "rewards/margins": 2.741753101348877, "rewards/rejected": -5.07526969909668, "step": 1729 }, { "epoch": 0.36, "learning_rate": 1.281512605042017e-05, "logits/chosen": -1.9830610752105713, "logits/rejected": -1.7838393449783325, "logps/chosen": -349.3528137207031, "logps/rejected": -388.6442565917969, "loss": 0.6238, "rewards/accuracies": 0.75, "rewards/chosen": -1.6089346408843994, "rewards/margins": 4.052203178405762, "rewards/rejected": -5.661137580871582, "step": 1730 }, { "epoch": 0.36, "learning_rate": 1.28109243697479e-05, "logits/chosen": -2.016331672668457, "logits/rejected": -1.0829658508300781, "logps/chosen": -321.3553466796875, "logps/rejected": -256.7331848144531, "loss": 0.101, "rewards/accuracies": 1.0, "rewards/chosen": -2.072201728820801, "rewards/margins": 4.869070529937744, "rewards/rejected": -6.941271781921387, "step": 1731 }, { "epoch": 0.36, "learning_rate": 1.280672268907563e-05, "logits/chosen": -1.9750113487243652, "logits/rejected": -1.803163766860962, "logps/chosen": -400.969482421875, "logps/rejected": -338.1429443359375, "loss": 0.1438, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5870110988616943, "rewards/margins": 5.043107986450195, "rewards/rejected": -6.630119323730469, "step": 1732 }, { "epoch": 0.36, "learning_rate": 1.2802521008403362e-05, "logits/chosen": -1.8410203456878662, "logits/rejected": -1.9059548377990723, "logps/chosen": -324.8757629394531, "logps/rejected": -383.896728515625, "loss": 0.7075, "rewards/accuracies": 0.625, "rewards/chosen": -2.1457061767578125, "rewards/margins": 2.7147622108459473, "rewards/rejected": -4.860467910766602, "step": 1733 }, { "epoch": 0.36, "learning_rate": 1.2798319327731092e-05, "logits/chosen": -1.937516689300537, "logits/rejected": -1.4987492561340332, "logps/chosen": -252.4307098388672, "logps/rejected": -281.6831359863281, "loss": 0.2898, "rewards/accuracies": 0.875, "rewards/chosen": -1.8815139532089233, "rewards/margins": 3.419353723526001, "rewards/rejected": -5.300867557525635, "step": 1734 }, { "epoch": 0.36, "learning_rate": 1.2794117647058824e-05, "logits/chosen": -2.0901570320129395, "logits/rejected": -1.9519517421722412, "logps/chosen": -375.2912902832031, "logps/rejected": -329.3047790527344, "loss": 0.2603, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5332554578781128, "rewards/margins": 3.8134186267852783, "rewards/rejected": -5.346673965454102, "step": 1735 }, { "epoch": 0.36, "learning_rate": 1.2789915966386554e-05, "logits/chosen": -1.781891942024231, "logits/rejected": -1.8139662742614746, "logps/chosen": -300.4434814453125, "logps/rejected": -304.4271240234375, "loss": 0.4837, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9548029899597168, "rewards/margins": 3.0653021335601807, "rewards/rejected": -5.020104885101318, "step": 1736 }, { "epoch": 0.36, "learning_rate": 1.2785714285714286e-05, "logits/chosen": -2.093195676803589, "logits/rejected": -1.839459776878357, "logps/chosen": -292.4847106933594, "logps/rejected": -354.5055236816406, "loss": 0.2414, "rewards/accuracies": 0.875, "rewards/chosen": -1.5953119993209839, "rewards/margins": 3.8514628410339355, "rewards/rejected": -5.446774959564209, "step": 1737 }, { "epoch": 0.36, "learning_rate": 1.2781512605042016e-05, "logits/chosen": -1.785282850265503, "logits/rejected": -1.9226555824279785, "logps/chosen": -287.1199951171875, "logps/rejected": -390.8167419433594, "loss": 0.2232, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9651846885681152, "rewards/margins": 4.778981685638428, "rewards/rejected": -6.744166374206543, "step": 1738 }, { "epoch": 0.36, "learning_rate": 1.2777310924369748e-05, "logits/chosen": -1.881270408630371, "logits/rejected": -2.089820146560669, "logps/chosen": -214.3849334716797, "logps/rejected": -250.7349853515625, "loss": 0.3226, "rewards/accuracies": 0.9375, "rewards/chosen": -1.861527919769287, "rewards/margins": 3.774306297302246, "rewards/rejected": -5.635834217071533, "step": 1739 }, { "epoch": 0.36, "learning_rate": 1.2773109243697479e-05, "logits/chosen": -2.109286308288574, "logits/rejected": -1.6026495695114136, "logps/chosen": -299.84893798828125, "logps/rejected": -299.3256530761719, "loss": 0.575, "rewards/accuracies": 0.8125, "rewards/chosen": -1.815997838973999, "rewards/margins": 2.7296788692474365, "rewards/rejected": -4.5456767082214355, "step": 1740 }, { "epoch": 0.36, "learning_rate": 1.276890756302521e-05, "logits/chosen": -1.9613996744155884, "logits/rejected": -1.8531324863433838, "logps/chosen": -271.5999755859375, "logps/rejected": -299.2306213378906, "loss": 0.4834, "rewards/accuracies": 0.875, "rewards/chosen": -1.6028615236282349, "rewards/margins": 3.492419958114624, "rewards/rejected": -5.095281600952148, "step": 1741 }, { "epoch": 0.36, "learning_rate": 1.276470588235294e-05, "logits/chosen": -1.491971731185913, "logits/rejected": -1.623412847518921, "logps/chosen": -319.4019775390625, "logps/rejected": -435.154052734375, "loss": 0.0974, "rewards/accuracies": 1.0, "rewards/chosen": -1.7717623710632324, "rewards/margins": 4.269248962402344, "rewards/rejected": -6.041011333465576, "step": 1742 }, { "epoch": 0.36, "learning_rate": 1.2760504201680673e-05, "logits/chosen": -2.0967891216278076, "logits/rejected": -1.8779772520065308, "logps/chosen": -376.2422180175781, "logps/rejected": -333.02972412109375, "loss": 0.6872, "rewards/accuracies": 0.6875, "rewards/chosen": -2.602220058441162, "rewards/margins": 2.32466459274292, "rewards/rejected": -4.926884651184082, "step": 1743 }, { "epoch": 0.36, "learning_rate": 1.2756302521008403e-05, "logits/chosen": -1.8823425769805908, "logits/rejected": -1.8060266971588135, "logps/chosen": -405.7274475097656, "logps/rejected": -409.785400390625, "loss": 0.2944, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4691033363342285, "rewards/margins": 5.218448638916016, "rewards/rejected": -7.687552452087402, "step": 1744 }, { "epoch": 0.37, "learning_rate": 1.2752100840336135e-05, "logits/chosen": -2.1131157875061035, "logits/rejected": -1.805212140083313, "logps/chosen": -228.3209686279297, "logps/rejected": -298.92724609375, "loss": 0.1408, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1740851402282715, "rewards/margins": 4.319099426269531, "rewards/rejected": -6.4931840896606445, "step": 1745 }, { "epoch": 0.37, "learning_rate": 1.2747899159663865e-05, "logits/chosen": -1.852203369140625, "logits/rejected": -2.099804639816284, "logps/chosen": -266.0347595214844, "logps/rejected": -323.8255310058594, "loss": 0.6344, "rewards/accuracies": 0.6875, "rewards/chosen": -2.4451680183410645, "rewards/margins": 3.8588805198669434, "rewards/rejected": -6.304048538208008, "step": 1746 }, { "epoch": 0.37, "learning_rate": 1.2743697478991597e-05, "logits/chosen": -2.2081375122070312, "logits/rejected": -1.62109375, "logps/chosen": -323.3291931152344, "logps/rejected": -277.005615234375, "loss": 0.1506, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2035279273986816, "rewards/margins": 4.323655128479004, "rewards/rejected": -6.527183532714844, "step": 1747 }, { "epoch": 0.37, "learning_rate": 1.2739495798319327e-05, "logits/chosen": -2.1493332386016846, "logits/rejected": -1.4306750297546387, "logps/chosen": -322.77001953125, "logps/rejected": -262.59930419921875, "loss": 0.1992, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7850747108459473, "rewards/margins": 2.7682077884674072, "rewards/rejected": -5.553282260894775, "step": 1748 }, { "epoch": 0.37, "learning_rate": 1.2735294117647059e-05, "logits/chosen": -1.9452846050262451, "logits/rejected": -1.942493200302124, "logps/chosen": -250.05810546875, "logps/rejected": -329.5069274902344, "loss": 0.1991, "rewards/accuracies": 0.9375, "rewards/chosen": -2.417715549468994, "rewards/margins": 3.924509048461914, "rewards/rejected": -6.342224597930908, "step": 1749 }, { "epoch": 0.37, "learning_rate": 1.2731092436974791e-05, "logits/chosen": -2.0381953716278076, "logits/rejected": -1.6223368644714355, "logps/chosen": -346.24871826171875, "logps/rejected": -302.119384765625, "loss": 0.4739, "rewards/accuracies": 0.8125, "rewards/chosen": -2.098102331161499, "rewards/margins": 3.149576187133789, "rewards/rejected": -5.247678756713867, "step": 1750 }, { "epoch": 0.37, "learning_rate": 1.2726890756302521e-05, "logits/chosen": -2.0168075561523438, "logits/rejected": -2.028434991836548, "logps/chosen": -261.8702697753906, "logps/rejected": -310.6669006347656, "loss": 0.367, "rewards/accuracies": 0.8125, "rewards/chosen": -2.033168315887451, "rewards/margins": 2.440186023712158, "rewards/rejected": -4.473354339599609, "step": 1751 }, { "epoch": 0.37, "learning_rate": 1.2722689075630253e-05, "logits/chosen": -2.227324962615967, "logits/rejected": -1.8990821838378906, "logps/chosen": -516.052978515625, "logps/rejected": -438.0694580078125, "loss": 0.5383, "rewards/accuracies": 0.8125, "rewards/chosen": -2.112281084060669, "rewards/margins": 3.0466365814208984, "rewards/rejected": -5.1589179039001465, "step": 1752 }, { "epoch": 0.37, "learning_rate": 1.2718487394957983e-05, "logits/chosen": -1.8136128187179565, "logits/rejected": -2.124232769012451, "logps/chosen": -258.5289306640625, "logps/rejected": -339.23126220703125, "loss": 0.2653, "rewards/accuracies": 0.9375, "rewards/chosen": -1.013992190361023, "rewards/margins": 2.954085350036621, "rewards/rejected": -3.9680776596069336, "step": 1753 }, { "epoch": 0.37, "learning_rate": 1.2714285714285715e-05, "logits/chosen": -2.2477266788482666, "logits/rejected": -1.9282069206237793, "logps/chosen": -310.39239501953125, "logps/rejected": -351.3646545410156, "loss": 0.306, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4616888761520386, "rewards/margins": 4.0910491943359375, "rewards/rejected": -5.552738189697266, "step": 1754 }, { "epoch": 0.37, "learning_rate": 1.2710084033613445e-05, "logits/chosen": -1.712146282196045, "logits/rejected": -1.875839114189148, "logps/chosen": -206.52664184570312, "logps/rejected": -245.27178955078125, "loss": 0.1806, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6540236473083496, "rewards/margins": 4.343644618988037, "rewards/rejected": -5.997668266296387, "step": 1755 }, { "epoch": 0.37, "learning_rate": 1.2705882352941177e-05, "logits/chosen": -2.070586919784546, "logits/rejected": -1.6574022769927979, "logps/chosen": -286.0633544921875, "logps/rejected": -268.3511962890625, "loss": 0.4091, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6145892143249512, "rewards/margins": 3.4328179359436035, "rewards/rejected": -5.047407150268555, "step": 1756 }, { "epoch": 0.37, "learning_rate": 1.2701680672268908e-05, "logits/chosen": -2.1463136672973633, "logits/rejected": -1.8782405853271484, "logps/chosen": -284.5740661621094, "logps/rejected": -271.18231201171875, "loss": 0.1689, "rewards/accuracies": 0.875, "rewards/chosen": -1.2716894149780273, "rewards/margins": 4.896943092346191, "rewards/rejected": -6.168633460998535, "step": 1757 }, { "epoch": 0.37, "learning_rate": 1.269747899159664e-05, "logits/chosen": -2.218388795852661, "logits/rejected": -1.7556936740875244, "logps/chosen": -237.75625610351562, "logps/rejected": -241.58062744140625, "loss": 0.2472, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8457057476043701, "rewards/margins": 3.9747471809387207, "rewards/rejected": -5.82045316696167, "step": 1758 }, { "epoch": 0.37, "learning_rate": 1.269327731092437e-05, "logits/chosen": -2.075504779815674, "logits/rejected": -1.9565949440002441, "logps/chosen": -424.69708251953125, "logps/rejected": -400.16796875, "loss": 0.5175, "rewards/accuracies": 0.75, "rewards/chosen": -2.5709381103515625, "rewards/margins": 3.244272232055664, "rewards/rejected": -5.815209865570068, "step": 1759 }, { "epoch": 0.37, "learning_rate": 1.2689075630252102e-05, "logits/chosen": -1.6349000930786133, "logits/rejected": -1.7339026927947998, "logps/chosen": -326.32177734375, "logps/rejected": -389.75250244140625, "loss": 0.4943, "rewards/accuracies": 0.6875, "rewards/chosen": -2.3336246013641357, "rewards/margins": 2.4318251609802246, "rewards/rejected": -4.765449523925781, "step": 1760 }, { "epoch": 0.37, "learning_rate": 1.2684873949579832e-05, "logits/chosen": -2.1046242713928223, "logits/rejected": -1.6036499738693237, "logps/chosen": -264.6695556640625, "logps/rejected": -248.53213500976562, "loss": 0.2808, "rewards/accuracies": 0.875, "rewards/chosen": -1.1439019441604614, "rewards/margins": 3.315765380859375, "rewards/rejected": -4.459667205810547, "step": 1761 }, { "epoch": 0.37, "learning_rate": 1.2680672268907564e-05, "logits/chosen": -2.101350784301758, "logits/rejected": -2.019341468811035, "logps/chosen": -265.4784240722656, "logps/rejected": -354.42535400390625, "loss": 0.0987, "rewards/accuracies": 1.0, "rewards/chosen": -2.0359983444213867, "rewards/margins": 4.140917778015137, "rewards/rejected": -6.176916122436523, "step": 1762 }, { "epoch": 0.37, "learning_rate": 1.2676470588235294e-05, "logits/chosen": -2.0482287406921387, "logits/rejected": -1.8655407428741455, "logps/chosen": -331.2798767089844, "logps/rejected": -361.16424560546875, "loss": 0.2392, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2306530475616455, "rewards/margins": 3.424342155456543, "rewards/rejected": -5.654995441436768, "step": 1763 }, { "epoch": 0.37, "learning_rate": 1.2672268907563026e-05, "logits/chosen": -1.7039258480072021, "logits/rejected": -1.7191208600997925, "logps/chosen": -304.27294921875, "logps/rejected": -319.51922607421875, "loss": 0.5112, "rewards/accuracies": 0.75, "rewards/chosen": -1.9424810409545898, "rewards/margins": 5.4296650886535645, "rewards/rejected": -7.372146129608154, "step": 1764 }, { "epoch": 0.37, "learning_rate": 1.2668067226890756e-05, "logits/chosen": -2.05804443359375, "logits/rejected": -1.5029891729354858, "logps/chosen": -347.4152526855469, "logps/rejected": -315.47039794921875, "loss": 0.1379, "rewards/accuracies": 0.9375, "rewards/chosen": -1.3976651430130005, "rewards/margins": 5.146216869354248, "rewards/rejected": -6.543881893157959, "step": 1765 }, { "epoch": 0.37, "learning_rate": 1.2663865546218488e-05, "logits/chosen": -2.1624886989593506, "logits/rejected": -1.7037053108215332, "logps/chosen": -347.3921813964844, "logps/rejected": -356.04449462890625, "loss": 0.1601, "rewards/accuracies": 0.875, "rewards/chosen": -1.8009859323501587, "rewards/margins": 4.418320178985596, "rewards/rejected": -6.219306468963623, "step": 1766 }, { "epoch": 0.37, "learning_rate": 1.2659663865546218e-05, "logits/chosen": -2.02056884765625, "logits/rejected": -1.9386581182479858, "logps/chosen": -345.57525634765625, "logps/rejected": -402.684326171875, "loss": 0.2409, "rewards/accuracies": 0.875, "rewards/chosen": -1.8323227167129517, "rewards/margins": 4.6929826736450195, "rewards/rejected": -6.52530574798584, "step": 1767 }, { "epoch": 0.37, "learning_rate": 1.265546218487395e-05, "logits/chosen": -2.2112550735473633, "logits/rejected": -1.8923091888427734, "logps/chosen": -477.8106384277344, "logps/rejected": -340.67205810546875, "loss": 0.1922, "rewards/accuracies": 0.875, "rewards/chosen": -0.971642017364502, "rewards/margins": 4.327385902404785, "rewards/rejected": -5.299027442932129, "step": 1768 }, { "epoch": 0.37, "learning_rate": 1.265126050420168e-05, "logits/chosen": -2.1023073196411133, "logits/rejected": -1.7306984663009644, "logps/chosen": -303.4323425292969, "logps/rejected": -323.16973876953125, "loss": 0.2769, "rewards/accuracies": 0.875, "rewards/chosen": -2.486727237701416, "rewards/margins": 4.5621747970581055, "rewards/rejected": -7.04890251159668, "step": 1769 }, { "epoch": 0.37, "learning_rate": 1.2647058823529412e-05, "logits/chosen": -1.9863693714141846, "logits/rejected": -1.9215013980865479, "logps/chosen": -454.6671447753906, "logps/rejected": -401.3580322265625, "loss": 0.392, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4198495149612427, "rewards/margins": 2.676342010498047, "rewards/rejected": -4.09619140625, "step": 1770 }, { "epoch": 0.37, "learning_rate": 1.2642857142857143e-05, "logits/chosen": -1.9747538566589355, "logits/rejected": -1.675748348236084, "logps/chosen": -375.435546875, "logps/rejected": -293.918701171875, "loss": 0.6753, "rewards/accuracies": 0.75, "rewards/chosen": -3.045513391494751, "rewards/margins": 2.489720106124878, "rewards/rejected": -5.535233497619629, "step": 1771 }, { "epoch": 0.37, "learning_rate": 1.2638655462184874e-05, "logits/chosen": -1.6234101057052612, "logits/rejected": -1.4660919904708862, "logps/chosen": -333.38330078125, "logps/rejected": -311.9102478027344, "loss": 0.2456, "rewards/accuracies": 0.875, "rewards/chosen": -1.617193579673767, "rewards/margins": 3.9928269386291504, "rewards/rejected": -5.610020637512207, "step": 1772 }, { "epoch": 0.37, "learning_rate": 1.2634453781512606e-05, "logits/chosen": -1.9884085655212402, "logits/rejected": -1.7980440855026245, "logps/chosen": -207.3913116455078, "logps/rejected": -298.2470397949219, "loss": 0.2065, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4766228199005127, "rewards/margins": 5.363602638244629, "rewards/rejected": -7.8402252197265625, "step": 1773 }, { "epoch": 0.37, "learning_rate": 1.2630252100840337e-05, "logits/chosen": -2.0810811519622803, "logits/rejected": -1.9490880966186523, "logps/chosen": -271.8350830078125, "logps/rejected": -317.02825927734375, "loss": 0.3528, "rewards/accuracies": 0.875, "rewards/chosen": -2.03255033493042, "rewards/margins": 3.89451265335083, "rewards/rejected": -5.927062511444092, "step": 1774 }, { "epoch": 0.37, "learning_rate": 1.2626050420168069e-05, "logits/chosen": -2.2524514198303223, "logits/rejected": -1.6927098035812378, "logps/chosen": -302.945556640625, "logps/rejected": -291.4638366699219, "loss": 0.2733, "rewards/accuracies": 0.875, "rewards/chosen": -1.8773443698883057, "rewards/margins": 4.05056619644165, "rewards/rejected": -5.927910327911377, "step": 1775 }, { "epoch": 0.37, "learning_rate": 1.2621848739495799e-05, "logits/chosen": -2.152149200439453, "logits/rejected": -1.9415919780731201, "logps/chosen": -373.915771484375, "logps/rejected": -305.147216796875, "loss": 0.3026, "rewards/accuracies": 0.8125, "rewards/chosen": -2.968032121658325, "rewards/margins": 4.385983943939209, "rewards/rejected": -7.354016304016113, "step": 1776 }, { "epoch": 0.37, "learning_rate": 1.261764705882353e-05, "logits/chosen": -2.1463778018951416, "logits/rejected": -2.111959218978882, "logps/chosen": -310.9941711425781, "logps/rejected": -276.97479248046875, "loss": 0.1454, "rewards/accuracies": 0.9375, "rewards/chosen": -1.508514404296875, "rewards/margins": 4.306140899658203, "rewards/rejected": -5.814655303955078, "step": 1777 }, { "epoch": 0.37, "learning_rate": 1.2613445378151261e-05, "logits/chosen": -1.9919275045394897, "logits/rejected": -2.049964666366577, "logps/chosen": -310.8265686035156, "logps/rejected": -322.8869934082031, "loss": 0.3032, "rewards/accuracies": 0.8125, "rewards/chosen": -2.978891372680664, "rewards/margins": 3.948613405227661, "rewards/rejected": -6.927504539489746, "step": 1778 }, { "epoch": 0.37, "learning_rate": 1.2609243697478993e-05, "logits/chosen": -2.1305489540100098, "logits/rejected": -2.024134635925293, "logps/chosen": -350.2703857421875, "logps/rejected": -362.7064514160156, "loss": 0.0997, "rewards/accuracies": 1.0, "rewards/chosen": -1.6748934984207153, "rewards/margins": 6.170192241668701, "rewards/rejected": -7.845085620880127, "step": 1779 }, { "epoch": 0.37, "learning_rate": 1.2605042016806723e-05, "logits/chosen": -2.060441493988037, "logits/rejected": -1.7549536228179932, "logps/chosen": -324.79705810546875, "logps/rejected": -316.6864929199219, "loss": 0.5761, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6303749084472656, "rewards/margins": 3.6565146446228027, "rewards/rejected": -6.286889553070068, "step": 1780 }, { "epoch": 0.37, "learning_rate": 1.2600840336134455e-05, "logits/chosen": -2.2033870220184326, "logits/rejected": -2.097784996032715, "logps/chosen": -274.9949645996094, "logps/rejected": -363.62841796875, "loss": 0.2504, "rewards/accuracies": 0.9375, "rewards/chosen": -2.490706205368042, "rewards/margins": 3.516636848449707, "rewards/rejected": -6.007343292236328, "step": 1781 }, { "epoch": 0.37, "learning_rate": 1.2596638655462185e-05, "logits/chosen": -2.078474521636963, "logits/rejected": -2.1691579818725586, "logps/chosen": -321.22637939453125, "logps/rejected": -319.9599914550781, "loss": 0.3183, "rewards/accuracies": 0.8125, "rewards/chosen": -2.421416997909546, "rewards/margins": 3.7803854942321777, "rewards/rejected": -6.201802730560303, "step": 1782 }, { "epoch": 0.37, "learning_rate": 1.2592436974789917e-05, "logits/chosen": -2.055881977081299, "logits/rejected": -1.9867157936096191, "logps/chosen": -284.76165771484375, "logps/rejected": -295.0223388671875, "loss": 0.1942, "rewards/accuracies": 1.0, "rewards/chosen": -1.6157869100570679, "rewards/margins": 3.34968638420105, "rewards/rejected": -4.965473175048828, "step": 1783 }, { "epoch": 0.37, "learning_rate": 1.2588235294117647e-05, "logits/chosen": -2.031898021697998, "logits/rejected": -1.9355424642562866, "logps/chosen": -348.9483642578125, "logps/rejected": -362.38568115234375, "loss": 0.1382, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9775464534759521, "rewards/margins": 3.9106125831604004, "rewards/rejected": -5.888158798217773, "step": 1784 }, { "epoch": 0.37, "learning_rate": 1.258403361344538e-05, "logits/chosen": -2.340874671936035, "logits/rejected": -2.0267221927642822, "logps/chosen": -359.5558776855469, "logps/rejected": -412.1551513671875, "loss": 0.2687, "rewards/accuracies": 0.9375, "rewards/chosen": -2.624579668045044, "rewards/margins": 5.184628486633301, "rewards/rejected": -7.809207439422607, "step": 1785 }, { "epoch": 0.37, "learning_rate": 1.257983193277311e-05, "logits/chosen": -2.0666234493255615, "logits/rejected": -1.7046170234680176, "logps/chosen": -346.9346923828125, "logps/rejected": -302.680419921875, "loss": 0.4075, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1801719665527344, "rewards/margins": 4.3180341720581055, "rewards/rejected": -7.49820613861084, "step": 1786 }, { "epoch": 0.37, "learning_rate": 1.2575630252100841e-05, "logits/chosen": -1.8434706926345825, "logits/rejected": -1.7718141078948975, "logps/chosen": -180.12802124023438, "logps/rejected": -277.0544738769531, "loss": 0.1289, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9735708236694336, "rewards/margins": 5.332880020141602, "rewards/rejected": -8.306450843811035, "step": 1787 }, { "epoch": 0.37, "learning_rate": 1.2571428571428572e-05, "logits/chosen": -1.8972259759902954, "logits/rejected": -1.821812629699707, "logps/chosen": -302.0366516113281, "logps/rejected": -306.9195556640625, "loss": 0.2788, "rewards/accuracies": 0.8125, "rewards/chosen": -2.321805477142334, "rewards/margins": 3.3359506130218506, "rewards/rejected": -5.6577558517456055, "step": 1788 }, { "epoch": 0.37, "learning_rate": 1.2567226890756304e-05, "logits/chosen": -1.8635022640228271, "logits/rejected": -1.8238285779953003, "logps/chosen": -432.66510009765625, "logps/rejected": -597.79248046875, "loss": 0.2711, "rewards/accuracies": 0.875, "rewards/chosen": -3.3408639430999756, "rewards/margins": 3.872145414352417, "rewards/rejected": -7.213009357452393, "step": 1789 }, { "epoch": 0.37, "learning_rate": 1.2563025210084034e-05, "logits/chosen": -2.29345703125, "logits/rejected": -1.7246224880218506, "logps/chosen": -506.98736572265625, "logps/rejected": -349.2126159667969, "loss": 0.5001, "rewards/accuracies": 0.875, "rewards/chosen": -1.9519894123077393, "rewards/margins": 4.988216400146484, "rewards/rejected": -6.940206050872803, "step": 1790 }, { "epoch": 0.37, "learning_rate": 1.2558823529411766e-05, "logits/chosen": -2.330901861190796, "logits/rejected": -1.635657548904419, "logps/chosen": -424.2184143066406, "logps/rejected": -334.1633605957031, "loss": 0.1574, "rewards/accuracies": 0.875, "rewards/chosen": -2.3283421993255615, "rewards/margins": 4.64754581451416, "rewards/rejected": -6.975888252258301, "step": 1791 }, { "epoch": 0.37, "learning_rate": 1.2554621848739496e-05, "logits/chosen": -2.1808953285217285, "logits/rejected": -1.8212199211120605, "logps/chosen": -305.38995361328125, "logps/rejected": -276.0369567871094, "loss": 0.2905, "rewards/accuracies": 0.75, "rewards/chosen": -3.2360754013061523, "rewards/margins": 2.9036636352539062, "rewards/rejected": -6.139739036560059, "step": 1792 }, { "epoch": 0.38, "learning_rate": 1.2550420168067228e-05, "logits/chosen": -1.8467988967895508, "logits/rejected": -2.1311261653900146, "logps/chosen": -357.418212890625, "logps/rejected": -379.6018371582031, "loss": 0.3273, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3376049995422363, "rewards/margins": 3.7321510314941406, "rewards/rejected": -6.069755554199219, "step": 1793 }, { "epoch": 0.38, "learning_rate": 1.254621848739496e-05, "logits/chosen": -2.3632607460021973, "logits/rejected": -1.9759379625320435, "logps/chosen": -416.6093444824219, "logps/rejected": -334.78594970703125, "loss": 0.5734, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3181886672973633, "rewards/margins": 3.4687159061431885, "rewards/rejected": -6.786904335021973, "step": 1794 }, { "epoch": 0.38, "learning_rate": 1.254201680672269e-05, "logits/chosen": -2.1788644790649414, "logits/rejected": -1.834309697151184, "logps/chosen": -338.6228332519531, "logps/rejected": -351.3233337402344, "loss": 0.0836, "rewards/accuracies": 0.9375, "rewards/chosen": -2.84698748588562, "rewards/margins": 4.984423637390137, "rewards/rejected": -7.831410884857178, "step": 1795 }, { "epoch": 0.38, "learning_rate": 1.2537815126050422e-05, "logits/chosen": -2.167393207550049, "logits/rejected": -1.8985700607299805, "logps/chosen": -296.945556640625, "logps/rejected": -323.76593017578125, "loss": 0.5838, "rewards/accuracies": 0.6875, "rewards/chosen": -3.3066606521606445, "rewards/margins": 2.161682605743408, "rewards/rejected": -5.468343257904053, "step": 1796 }, { "epoch": 0.38, "learning_rate": 1.2533613445378152e-05, "logits/chosen": -2.003908634185791, "logits/rejected": -2.1356709003448486, "logps/chosen": -451.6329345703125, "logps/rejected": -366.5298156738281, "loss": 0.2519, "rewards/accuracies": 0.875, "rewards/chosen": -3.0159528255462646, "rewards/margins": 4.419299125671387, "rewards/rejected": -7.435251712799072, "step": 1797 }, { "epoch": 0.38, "learning_rate": 1.2529411764705884e-05, "logits/chosen": -2.101807117462158, "logits/rejected": -1.8441613912582397, "logps/chosen": -382.4060974121094, "logps/rejected": -411.38531494140625, "loss": 0.1702, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4624245166778564, "rewards/margins": 4.282219886779785, "rewards/rejected": -6.744643688201904, "step": 1798 }, { "epoch": 0.38, "learning_rate": 1.2525210084033614e-05, "logits/chosen": -2.0932106971740723, "logits/rejected": -1.756744384765625, "logps/chosen": -321.96435546875, "logps/rejected": -413.52667236328125, "loss": 0.1299, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6015396118164062, "rewards/margins": 4.415146827697754, "rewards/rejected": -7.01668643951416, "step": 1799 }, { "epoch": 0.38, "learning_rate": 1.2521008403361346e-05, "logits/chosen": -2.006406545639038, "logits/rejected": -1.9936182498931885, "logps/chosen": -381.1437683105469, "logps/rejected": -385.24273681640625, "loss": 0.1648, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6243560314178467, "rewards/margins": 4.992252349853516, "rewards/rejected": -6.616608142852783, "step": 1800 }, { "epoch": 0.38, "learning_rate": 1.2516806722689076e-05, "logits/chosen": -1.9763023853302002, "logits/rejected": -2.1236701011657715, "logps/chosen": -384.1940612792969, "logps/rejected": -301.92425537109375, "loss": 0.3579, "rewards/accuracies": 0.8125, "rewards/chosen": -3.282367706298828, "rewards/margins": 3.205954074859619, "rewards/rejected": -6.488321781158447, "step": 1801 }, { "epoch": 0.38, "learning_rate": 1.2512605042016808e-05, "logits/chosen": -1.9965115785598755, "logits/rejected": -2.1180171966552734, "logps/chosen": -328.7938537597656, "logps/rejected": -369.47601318359375, "loss": 0.3751, "rewards/accuracies": 0.8125, "rewards/chosen": -2.850086212158203, "rewards/margins": 4.269739627838135, "rewards/rejected": -7.119826316833496, "step": 1802 }, { "epoch": 0.38, "learning_rate": 1.2508403361344538e-05, "logits/chosen": -1.8857592344284058, "logits/rejected": -1.5286210775375366, "logps/chosen": -413.4532165527344, "logps/rejected": -309.3803405761719, "loss": 0.8262, "rewards/accuracies": 0.625, "rewards/chosen": -3.072671413421631, "rewards/margins": 2.1183438301086426, "rewards/rejected": -5.191015243530273, "step": 1803 }, { "epoch": 0.38, "learning_rate": 1.250420168067227e-05, "logits/chosen": -1.9694397449493408, "logits/rejected": -1.7799674272537231, "logps/chosen": -340.10809326171875, "logps/rejected": -273.68365478515625, "loss": 0.2487, "rewards/accuracies": 0.875, "rewards/chosen": -2.5552663803100586, "rewards/margins": 4.07852029800415, "rewards/rejected": -6.633787155151367, "step": 1804 }, { "epoch": 0.38, "learning_rate": 1.25e-05, "logits/chosen": -2.1740562915802, "logits/rejected": -1.5397841930389404, "logps/chosen": -398.8807678222656, "logps/rejected": -398.06365966796875, "loss": 0.0683, "rewards/accuracies": 1.0, "rewards/chosen": -3.0995306968688965, "rewards/margins": 5.263266563415527, "rewards/rejected": -8.362797737121582, "step": 1805 }, { "epoch": 0.38, "learning_rate": 1.2495798319327733e-05, "logits/chosen": -2.008077621459961, "logits/rejected": -1.8429821729660034, "logps/chosen": -206.87705993652344, "logps/rejected": -377.868408203125, "loss": 0.0593, "rewards/accuracies": 1.0, "rewards/chosen": -2.930143117904663, "rewards/margins": 6.427976131439209, "rewards/rejected": -9.358119010925293, "step": 1806 }, { "epoch": 0.38, "learning_rate": 1.2491596638655463e-05, "logits/chosen": -2.292978286743164, "logits/rejected": -1.769348382949829, "logps/chosen": -449.630126953125, "logps/rejected": -356.318115234375, "loss": 0.3196, "rewards/accuracies": 0.75, "rewards/chosen": -2.5748438835144043, "rewards/margins": 3.6595332622528076, "rewards/rejected": -6.234376907348633, "step": 1807 }, { "epoch": 0.38, "learning_rate": 1.2487394957983195e-05, "logits/chosen": -1.9882423877716064, "logits/rejected": -1.6709835529327393, "logps/chosen": -363.9814453125, "logps/rejected": -350.1273193359375, "loss": 0.4197, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1633903980255127, "rewards/margins": 3.874863386154175, "rewards/rejected": -7.0382537841796875, "step": 1808 }, { "epoch": 0.38, "learning_rate": 1.2483193277310925e-05, "logits/chosen": -1.8266547918319702, "logits/rejected": -2.1173272132873535, "logps/chosen": -343.9117126464844, "logps/rejected": -471.7044372558594, "loss": 0.2111, "rewards/accuracies": 0.875, "rewards/chosen": -3.2196078300476074, "rewards/margins": 4.750805854797363, "rewards/rejected": -7.9704132080078125, "step": 1809 }, { "epoch": 0.38, "learning_rate": 1.2478991596638657e-05, "logits/chosen": -2.2981908321380615, "logits/rejected": -2.038205623626709, "logps/chosen": -289.29864501953125, "logps/rejected": -438.4106140136719, "loss": 0.4885, "rewards/accuracies": 0.75, "rewards/chosen": -3.077671766281128, "rewards/margins": 3.2957730293273926, "rewards/rejected": -6.373445510864258, "step": 1810 }, { "epoch": 0.38, "learning_rate": 1.2474789915966387e-05, "logits/chosen": -2.1740357875823975, "logits/rejected": -1.7964246273040771, "logps/chosen": -311.854248046875, "logps/rejected": -297.034423828125, "loss": 0.2577, "rewards/accuracies": 0.875, "rewards/chosen": -3.4747464656829834, "rewards/margins": 5.402400493621826, "rewards/rejected": -8.87714672088623, "step": 1811 }, { "epoch": 0.38, "learning_rate": 1.2470588235294119e-05, "logits/chosen": -1.9762654304504395, "logits/rejected": -2.1120619773864746, "logps/chosen": -319.1143493652344, "logps/rejected": -372.3591003417969, "loss": 0.3884, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8194522857666016, "rewards/margins": 3.894730806350708, "rewards/rejected": -6.714183807373047, "step": 1812 }, { "epoch": 0.38, "learning_rate": 1.2466386554621849e-05, "logits/chosen": -1.7402873039245605, "logits/rejected": -1.7582998275756836, "logps/chosen": -412.2801208496094, "logps/rejected": -320.85272216796875, "loss": 0.377, "rewards/accuracies": 0.8125, "rewards/chosen": -3.420719623565674, "rewards/margins": 4.192373752593994, "rewards/rejected": -7.613093376159668, "step": 1813 }, { "epoch": 0.38, "learning_rate": 1.2462184873949581e-05, "logits/chosen": -2.2431375980377197, "logits/rejected": -1.6594526767730713, "logps/chosen": -325.0775146484375, "logps/rejected": -374.4784240722656, "loss": 0.3304, "rewards/accuracies": 0.875, "rewards/chosen": -2.5062460899353027, "rewards/margins": 4.599099636077881, "rewards/rejected": -7.105345726013184, "step": 1814 }, { "epoch": 0.38, "learning_rate": 1.2457983193277311e-05, "logits/chosen": -2.1021182537078857, "logits/rejected": -1.9772509336471558, "logps/chosen": -365.6492614746094, "logps/rejected": -404.7083740234375, "loss": 0.1094, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4489972591400146, "rewards/margins": 7.313340663909912, "rewards/rejected": -9.762338638305664, "step": 1815 }, { "epoch": 0.38, "learning_rate": 1.2453781512605043e-05, "logits/chosen": -2.2247331142425537, "logits/rejected": -1.5387839078903198, "logps/chosen": -421.32513427734375, "logps/rejected": -293.9612731933594, "loss": 0.2203, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6782078742980957, "rewards/margins": 4.486258506774902, "rewards/rejected": -6.164466381072998, "step": 1816 }, { "epoch": 0.38, "learning_rate": 1.2449579831932775e-05, "logits/chosen": -1.4192893505096436, "logits/rejected": -1.1593286991119385, "logps/chosen": -247.40869140625, "logps/rejected": -279.9345397949219, "loss": 0.321, "rewards/accuracies": 0.875, "rewards/chosen": -2.771129608154297, "rewards/margins": 3.8587136268615723, "rewards/rejected": -6.629842758178711, "step": 1817 }, { "epoch": 0.38, "learning_rate": 1.2445378151260505e-05, "logits/chosen": -2.0051915645599365, "logits/rejected": -1.88638436794281, "logps/chosen": -332.06353759765625, "logps/rejected": -364.3916015625, "loss": 0.1771, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8817567825317383, "rewards/margins": 4.202718257904053, "rewards/rejected": -7.084474563598633, "step": 1818 }, { "epoch": 0.38, "learning_rate": 1.2441176470588237e-05, "logits/chosen": -2.141613006591797, "logits/rejected": -1.7716871500015259, "logps/chosen": -342.0851135253906, "logps/rejected": -266.565185546875, "loss": 0.9662, "rewards/accuracies": 0.75, "rewards/chosen": -3.471552610397339, "rewards/margins": 3.467884063720703, "rewards/rejected": -6.939435958862305, "step": 1819 }, { "epoch": 0.38, "learning_rate": 1.2436974789915967e-05, "logits/chosen": -1.7992281913757324, "logits/rejected": -1.7195043563842773, "logps/chosen": -266.8771057128906, "logps/rejected": -328.7740783691406, "loss": 0.5228, "rewards/accuracies": 0.75, "rewards/chosen": -3.260608434677124, "rewards/margins": 2.2436792850494385, "rewards/rejected": -5.5042877197265625, "step": 1820 }, { "epoch": 0.38, "learning_rate": 1.24327731092437e-05, "logits/chosen": -1.9437739849090576, "logits/rejected": -1.8591917753219604, "logps/chosen": -241.35433959960938, "logps/rejected": -244.63584899902344, "loss": 0.306, "rewards/accuracies": 0.9375, "rewards/chosen": -2.722043752670288, "rewards/margins": 3.924424171447754, "rewards/rejected": -6.646467685699463, "step": 1821 }, { "epoch": 0.38, "learning_rate": 1.242857142857143e-05, "logits/chosen": -2.0968518257141113, "logits/rejected": -1.806695818901062, "logps/chosen": -267.68896484375, "logps/rejected": -335.9344482421875, "loss": 0.2301, "rewards/accuracies": 0.875, "rewards/chosen": -2.919706344604492, "rewards/margins": 3.8995752334594727, "rewards/rejected": -6.819281101226807, "step": 1822 }, { "epoch": 0.38, "learning_rate": 1.2424369747899162e-05, "logits/chosen": -2.2418646812438965, "logits/rejected": -1.9673755168914795, "logps/chosen": -470.3053283691406, "logps/rejected": -350.61846923828125, "loss": 0.3785, "rewards/accuracies": 0.8125, "rewards/chosen": -2.542850971221924, "rewards/margins": 3.6771252155303955, "rewards/rejected": -6.219976425170898, "step": 1823 }, { "epoch": 0.38, "learning_rate": 1.2420168067226892e-05, "logits/chosen": -2.122089385986328, "logits/rejected": -1.7735962867736816, "logps/chosen": -298.9089050292969, "logps/rejected": -381.55462646484375, "loss": 0.8752, "rewards/accuracies": 0.875, "rewards/chosen": -3.350724458694458, "rewards/margins": 2.9452266693115234, "rewards/rejected": -6.295950889587402, "step": 1824 }, { "epoch": 0.38, "learning_rate": 1.2415966386554624e-05, "logits/chosen": -1.8487603664398193, "logits/rejected": -1.6678993701934814, "logps/chosen": -399.3428649902344, "logps/rejected": -364.6905517578125, "loss": 0.1929, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2664742469787598, "rewards/margins": 3.8054516315460205, "rewards/rejected": -6.071925163269043, "step": 1825 }, { "epoch": 0.38, "learning_rate": 1.2411764705882354e-05, "logits/chosen": -2.1750378608703613, "logits/rejected": -2.0403733253479004, "logps/chosen": -320.1820373535156, "logps/rejected": -306.345458984375, "loss": 0.3672, "rewards/accuracies": 0.8125, "rewards/chosen": -2.400602102279663, "rewards/margins": 3.223055601119995, "rewards/rejected": -5.623657703399658, "step": 1826 }, { "epoch": 0.38, "learning_rate": 1.2407563025210086e-05, "logits/chosen": -2.0814719200134277, "logits/rejected": -1.6131364107131958, "logps/chosen": -378.1086120605469, "logps/rejected": -374.85986328125, "loss": 0.1537, "rewards/accuracies": 0.9375, "rewards/chosen": -2.997178554534912, "rewards/margins": 2.981276750564575, "rewards/rejected": -5.978455543518066, "step": 1827 }, { "epoch": 0.38, "learning_rate": 1.2403361344537816e-05, "logits/chosen": -2.1030631065368652, "logits/rejected": -1.572075366973877, "logps/chosen": -389.2618408203125, "logps/rejected": -345.2249755859375, "loss": 0.3309, "rewards/accuracies": 0.8125, "rewards/chosen": -2.9812209606170654, "rewards/margins": 4.42304801940918, "rewards/rejected": -7.404269695281982, "step": 1828 }, { "epoch": 0.38, "learning_rate": 1.2399159663865548e-05, "logits/chosen": -1.9673033952713013, "logits/rejected": -1.3781259059906006, "logps/chosen": -368.08544921875, "logps/rejected": -273.9477233886719, "loss": 0.3418, "rewards/accuracies": 0.75, "rewards/chosen": -3.3179750442504883, "rewards/margins": 4.421310901641846, "rewards/rejected": -7.739285469055176, "step": 1829 }, { "epoch": 0.38, "learning_rate": 1.2394957983193278e-05, "logits/chosen": -1.919838547706604, "logits/rejected": -1.8430510759353638, "logps/chosen": -411.90325927734375, "logps/rejected": -366.18853759765625, "loss": 0.124, "rewards/accuracies": 0.9375, "rewards/chosen": -2.576273202896118, "rewards/margins": 3.9834365844726562, "rewards/rejected": -6.5597100257873535, "step": 1830 }, { "epoch": 0.38, "learning_rate": 1.239075630252101e-05, "logits/chosen": -1.9190537929534912, "logits/rejected": -1.440187692642212, "logps/chosen": -209.78269958496094, "logps/rejected": -237.6981201171875, "loss": 0.1538, "rewards/accuracies": 0.9375, "rewards/chosen": -2.269538640975952, "rewards/margins": 4.755407333374023, "rewards/rejected": -7.024946212768555, "step": 1831 }, { "epoch": 0.38, "learning_rate": 1.238655462184874e-05, "logits/chosen": -2.2594292163848877, "logits/rejected": -2.1560447216033936, "logps/chosen": -414.5213623046875, "logps/rejected": -395.2038879394531, "loss": 0.6985, "rewards/accuracies": 0.8125, "rewards/chosen": -2.41933012008667, "rewards/margins": 4.11453914642334, "rewards/rejected": -6.53386926651001, "step": 1832 }, { "epoch": 0.38, "learning_rate": 1.2382352941176472e-05, "logits/chosen": -2.161709785461426, "logits/rejected": -1.6761085987091064, "logps/chosen": -343.27886962890625, "logps/rejected": -285.697021484375, "loss": 0.2704, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8251807689666748, "rewards/margins": 3.791175365447998, "rewards/rejected": -5.616356372833252, "step": 1833 }, { "epoch": 0.38, "learning_rate": 1.2378151260504202e-05, "logits/chosen": -2.0671329498291016, "logits/rejected": -1.7918500900268555, "logps/chosen": -342.55145263671875, "logps/rejected": -354.6083984375, "loss": 0.5277, "rewards/accuracies": 0.875, "rewards/chosen": -2.6176323890686035, "rewards/margins": 4.317749977111816, "rewards/rejected": -6.93538236618042, "step": 1834 }, { "epoch": 0.38, "learning_rate": 1.2373949579831934e-05, "logits/chosen": -2.1687378883361816, "logits/rejected": -1.8899266719818115, "logps/chosen": -225.34622192382812, "logps/rejected": -234.0809783935547, "loss": 0.233, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9901856184005737, "rewards/margins": 3.417259931564331, "rewards/rejected": -5.407445907592773, "step": 1835 }, { "epoch": 0.38, "learning_rate": 1.2369747899159665e-05, "logits/chosen": -2.0599138736724854, "logits/rejected": -2.1695938110351562, "logps/chosen": -336.1058044433594, "logps/rejected": -477.3143310546875, "loss": 0.5413, "rewards/accuracies": 0.8125, "rewards/chosen": -2.623231887817383, "rewards/margins": 3.3073599338531494, "rewards/rejected": -5.930591583251953, "step": 1836 }, { "epoch": 0.38, "learning_rate": 1.2365546218487396e-05, "logits/chosen": -2.415325164794922, "logits/rejected": -2.036621332168579, "logps/chosen": -399.6528625488281, "logps/rejected": -404.95147705078125, "loss": 0.3727, "rewards/accuracies": 0.9375, "rewards/chosen": -2.274367094039917, "rewards/margins": 3.932096242904663, "rewards/rejected": -6.206463813781738, "step": 1837 }, { "epoch": 0.38, "learning_rate": 1.2361344537815127e-05, "logits/chosen": -1.9624621868133545, "logits/rejected": -1.967362403869629, "logps/chosen": -290.8499755859375, "logps/rejected": -354.1822509765625, "loss": 0.3604, "rewards/accuracies": 0.875, "rewards/chosen": -2.795387029647827, "rewards/margins": 4.234407424926758, "rewards/rejected": -7.029794692993164, "step": 1838 }, { "epoch": 0.38, "learning_rate": 1.2357142857142859e-05, "logits/chosen": -2.2429933547973633, "logits/rejected": -1.9684207439422607, "logps/chosen": -313.1953125, "logps/rejected": -408.3805236816406, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -1.9226566553115845, "rewards/margins": 5.263579368591309, "rewards/rejected": -7.1862359046936035, "step": 1839 }, { "epoch": 0.38, "learning_rate": 1.235294117647059e-05, "logits/chosen": -1.9952725172042847, "logits/rejected": -1.7642802000045776, "logps/chosen": -366.318115234375, "logps/rejected": -391.9005126953125, "loss": 0.144, "rewards/accuracies": 0.875, "rewards/chosen": -3.2164859771728516, "rewards/margins": 3.9159996509552, "rewards/rejected": -7.132485389709473, "step": 1840 }, { "epoch": 0.39, "learning_rate": 1.234873949579832e-05, "logits/chosen": -2.1719250679016113, "logits/rejected": -2.069453716278076, "logps/chosen": -291.865234375, "logps/rejected": -282.1805114746094, "loss": 0.3678, "rewards/accuracies": 0.875, "rewards/chosen": -2.3691577911376953, "rewards/margins": 4.0803680419921875, "rewards/rejected": -6.449525356292725, "step": 1841 }, { "epoch": 0.39, "learning_rate": 1.2344537815126053e-05, "logits/chosen": -2.4731712341308594, "logits/rejected": -2.0194525718688965, "logps/chosen": -328.2544250488281, "logps/rejected": -348.77032470703125, "loss": 0.395, "rewards/accuracies": 0.75, "rewards/chosen": -1.9667929410934448, "rewards/margins": 3.2032198905944824, "rewards/rejected": -5.170012474060059, "step": 1842 }, { "epoch": 0.39, "learning_rate": 1.2340336134453783e-05, "logits/chosen": -2.178130865097046, "logits/rejected": -1.832926869392395, "logps/chosen": -453.9772644042969, "logps/rejected": -282.71417236328125, "loss": 0.3241, "rewards/accuracies": 0.875, "rewards/chosen": -1.9788473844528198, "rewards/margins": 3.1136436462402344, "rewards/rejected": -5.0924906730651855, "step": 1843 }, { "epoch": 0.39, "learning_rate": 1.2336134453781515e-05, "logits/chosen": -2.3749067783355713, "logits/rejected": -1.684998869895935, "logps/chosen": -416.4408264160156, "logps/rejected": -350.44952392578125, "loss": 0.0157, "rewards/accuracies": 1.0, "rewards/chosen": -1.3178791999816895, "rewards/margins": 5.601921558380127, "rewards/rejected": -6.919800758361816, "step": 1844 }, { "epoch": 0.39, "learning_rate": 1.2331932773109245e-05, "logits/chosen": -2.029813528060913, "logits/rejected": -1.5848532915115356, "logps/chosen": -403.4461669921875, "logps/rejected": -388.6234436035156, "loss": 0.5381, "rewards/accuracies": 0.8125, "rewards/chosen": -2.925960063934326, "rewards/margins": 2.8877108097076416, "rewards/rejected": -5.813670635223389, "step": 1845 }, { "epoch": 0.39, "learning_rate": 1.2327731092436977e-05, "logits/chosen": -2.211223840713501, "logits/rejected": -1.8415236473083496, "logps/chosen": -335.36273193359375, "logps/rejected": -318.82379150390625, "loss": 0.2432, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6796250343322754, "rewards/margins": 4.041940212249756, "rewards/rejected": -6.721564769744873, "step": 1846 }, { "epoch": 0.39, "learning_rate": 1.2323529411764707e-05, "logits/chosen": -2.3169937133789062, "logits/rejected": -2.1106038093566895, "logps/chosen": -226.32061767578125, "logps/rejected": -293.0423583984375, "loss": 0.0759, "rewards/accuracies": 1.0, "rewards/chosen": -2.950911521911621, "rewards/margins": 4.783463478088379, "rewards/rejected": -7.734374523162842, "step": 1847 }, { "epoch": 0.39, "learning_rate": 1.2319327731092439e-05, "logits/chosen": -1.883255958557129, "logits/rejected": -1.7112150192260742, "logps/chosen": -315.20269775390625, "logps/rejected": -364.6358642578125, "loss": 0.1057, "rewards/accuracies": 1.0, "rewards/chosen": -3.330933094024658, "rewards/margins": 5.704405784606934, "rewards/rejected": -9.03533935546875, "step": 1848 }, { "epoch": 0.39, "learning_rate": 1.231512605042017e-05, "logits/chosen": -1.8779338598251343, "logits/rejected": -1.4539966583251953, "logps/chosen": -280.99468994140625, "logps/rejected": -277.5101623535156, "loss": 0.4459, "rewards/accuracies": 0.75, "rewards/chosen": -2.0637292861938477, "rewards/margins": 3.6735315322875977, "rewards/rejected": -5.7372612953186035, "step": 1849 }, { "epoch": 0.39, "learning_rate": 1.2310924369747901e-05, "logits/chosen": -2.295351028442383, "logits/rejected": -1.9756437540054321, "logps/chosen": -307.5069885253906, "logps/rejected": -249.9366455078125, "loss": 0.145, "rewards/accuracies": 0.9375, "rewards/chosen": -3.8945703506469727, "rewards/margins": 4.176760196685791, "rewards/rejected": -8.071331024169922, "step": 1850 }, { "epoch": 0.39, "learning_rate": 1.2306722689075631e-05, "logits/chosen": -1.9587208032608032, "logits/rejected": -1.7900536060333252, "logps/chosen": -344.981689453125, "logps/rejected": -399.29254150390625, "loss": 0.3901, "rewards/accuracies": 0.75, "rewards/chosen": -2.953500747680664, "rewards/margins": 2.9267892837524414, "rewards/rejected": -5.8802900314331055, "step": 1851 }, { "epoch": 0.39, "learning_rate": 1.2302521008403363e-05, "logits/chosen": -2.180280923843384, "logits/rejected": -1.8776546716690063, "logps/chosen": -298.33251953125, "logps/rejected": -330.929931640625, "loss": 0.2205, "rewards/accuracies": 0.875, "rewards/chosen": -3.1914925575256348, "rewards/margins": 3.972294569015503, "rewards/rejected": -7.163786888122559, "step": 1852 }, { "epoch": 0.39, "learning_rate": 1.2298319327731094e-05, "logits/chosen": -2.248622179031372, "logits/rejected": -1.5207862854003906, "logps/chosen": -312.92364501953125, "logps/rejected": -269.34326171875, "loss": 0.2348, "rewards/accuracies": 0.875, "rewards/chosen": -2.079177141189575, "rewards/margins": 4.692597389221191, "rewards/rejected": -6.771773815155029, "step": 1853 }, { "epoch": 0.39, "learning_rate": 1.2294117647058826e-05, "logits/chosen": -1.6843230724334717, "logits/rejected": -1.6593902111053467, "logps/chosen": -296.5410461425781, "logps/rejected": -300.2232360839844, "loss": 0.6384, "rewards/accuracies": 0.75, "rewards/chosen": -3.180907726287842, "rewards/margins": 2.1584579944610596, "rewards/rejected": -5.3393659591674805, "step": 1854 }, { "epoch": 0.39, "learning_rate": 1.2289915966386556e-05, "logits/chosen": -1.933302879333496, "logits/rejected": -1.5852653980255127, "logps/chosen": -334.7741394042969, "logps/rejected": -335.3190002441406, "loss": 0.318, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1619303226470947, "rewards/margins": 4.065133571624756, "rewards/rejected": -7.22706413269043, "step": 1855 }, { "epoch": 0.39, "learning_rate": 1.2285714285714288e-05, "logits/chosen": -1.9559996128082275, "logits/rejected": -2.0741889476776123, "logps/chosen": -335.0888671875, "logps/rejected": -390.7261962890625, "loss": 0.6481, "rewards/accuracies": 0.8125, "rewards/chosen": -2.708482503890991, "rewards/margins": 3.836520195007324, "rewards/rejected": -6.5450029373168945, "step": 1856 }, { "epoch": 0.39, "learning_rate": 1.2281512605042018e-05, "logits/chosen": -2.2684614658355713, "logits/rejected": -1.9043837785720825, "logps/chosen": -387.27740478515625, "logps/rejected": -390.257568359375, "loss": 0.2113, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9054386615753174, "rewards/margins": 5.122135162353516, "rewards/rejected": -7.027573585510254, "step": 1857 }, { "epoch": 0.39, "learning_rate": 1.227731092436975e-05, "logits/chosen": -2.1066670417785645, "logits/rejected": -1.9788291454315186, "logps/chosen": -367.8558349609375, "logps/rejected": -354.92803955078125, "loss": 0.173, "rewards/accuracies": 0.9375, "rewards/chosen": -2.427006483078003, "rewards/margins": 3.7116971015930176, "rewards/rejected": -6.1387038230896, "step": 1858 }, { "epoch": 0.39, "learning_rate": 1.227310924369748e-05, "logits/chosen": -2.207350730895996, "logits/rejected": -1.7087862491607666, "logps/chosen": -319.0138244628906, "logps/rejected": -294.2601318359375, "loss": 0.3015, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6084644794464111, "rewards/margins": 4.778830528259277, "rewards/rejected": -6.387295246124268, "step": 1859 }, { "epoch": 0.39, "learning_rate": 1.2268907563025212e-05, "logits/chosen": -1.8574326038360596, "logits/rejected": -1.946333646774292, "logps/chosen": -219.79058837890625, "logps/rejected": -243.98524475097656, "loss": 0.7268, "rewards/accuracies": 0.875, "rewards/chosen": -3.346604824066162, "rewards/margins": 2.4032351970672607, "rewards/rejected": -5.74984073638916, "step": 1860 }, { "epoch": 0.39, "learning_rate": 1.2264705882352944e-05, "logits/chosen": -2.2645788192749023, "logits/rejected": -1.8685322999954224, "logps/chosen": -400.17987060546875, "logps/rejected": -336.9418640136719, "loss": 0.4802, "rewards/accuracies": 0.875, "rewards/chosen": -2.426004648208618, "rewards/margins": 2.8071141242980957, "rewards/rejected": -5.233119010925293, "step": 1861 }, { "epoch": 0.39, "learning_rate": 1.2260504201680674e-05, "logits/chosen": -1.938511610031128, "logits/rejected": -1.6733431816101074, "logps/chosen": -272.3828125, "logps/rejected": -316.3158264160156, "loss": 0.229, "rewards/accuracies": 0.875, "rewards/chosen": -2.9613900184631348, "rewards/margins": 5.299942970275879, "rewards/rejected": -8.261332511901855, "step": 1862 }, { "epoch": 0.39, "learning_rate": 1.2256302521008406e-05, "logits/chosen": -2.374242067337036, "logits/rejected": -1.8657582998275757, "logps/chosen": -345.5589294433594, "logps/rejected": -280.80706787109375, "loss": 0.2125, "rewards/accuracies": 0.9375, "rewards/chosen": -2.298710346221924, "rewards/margins": 3.296266794204712, "rewards/rejected": -5.594976902008057, "step": 1863 }, { "epoch": 0.39, "learning_rate": 1.2252100840336136e-05, "logits/chosen": -2.059525728225708, "logits/rejected": -1.8084293603897095, "logps/chosen": -305.459716796875, "logps/rejected": -347.12939453125, "loss": 0.1119, "rewards/accuracies": 0.9375, "rewards/chosen": -1.765744924545288, "rewards/margins": 5.005342960357666, "rewards/rejected": -6.771087646484375, "step": 1864 }, { "epoch": 0.39, "learning_rate": 1.2247899159663868e-05, "logits/chosen": -2.169187545776367, "logits/rejected": -1.6504592895507812, "logps/chosen": -378.8183898925781, "logps/rejected": -385.246337890625, "loss": 0.1025, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1404271125793457, "rewards/margins": 5.499130725860596, "rewards/rejected": -6.639557838439941, "step": 1865 }, { "epoch": 0.39, "learning_rate": 1.2243697478991598e-05, "logits/chosen": -2.105647087097168, "logits/rejected": -1.946838140487671, "logps/chosen": -451.7643737792969, "logps/rejected": -392.23602294921875, "loss": 0.201, "rewards/accuracies": 0.875, "rewards/chosen": -1.2099411487579346, "rewards/margins": 4.705042839050293, "rewards/rejected": -5.914983749389648, "step": 1866 }, { "epoch": 0.39, "learning_rate": 1.223949579831933e-05, "logits/chosen": -2.0883190631866455, "logits/rejected": -1.921244740486145, "logps/chosen": -341.49237060546875, "logps/rejected": -408.92071533203125, "loss": 0.2228, "rewards/accuracies": 0.875, "rewards/chosen": -2.4461169242858887, "rewards/margins": 3.4331729412078857, "rewards/rejected": -5.879289627075195, "step": 1867 }, { "epoch": 0.39, "learning_rate": 1.223529411764706e-05, "logits/chosen": -2.150362730026245, "logits/rejected": -2.0336923599243164, "logps/chosen": -325.43865966796875, "logps/rejected": -318.94384765625, "loss": 0.4782, "rewards/accuracies": 0.75, "rewards/chosen": -2.354466199874878, "rewards/margins": 2.811051368713379, "rewards/rejected": -5.165517807006836, "step": 1868 }, { "epoch": 0.39, "learning_rate": 1.2231092436974792e-05, "logits/chosen": -2.0908029079437256, "logits/rejected": -2.0888607501983643, "logps/chosen": -376.7675476074219, "logps/rejected": -401.38800048828125, "loss": 0.0838, "rewards/accuracies": 1.0, "rewards/chosen": -2.6777400970458984, "rewards/margins": 5.11637020111084, "rewards/rejected": -7.7941107749938965, "step": 1869 }, { "epoch": 0.39, "learning_rate": 1.2226890756302523e-05, "logits/chosen": -2.0638866424560547, "logits/rejected": -2.0652823448181152, "logps/chosen": -240.63055419921875, "logps/rejected": -320.2286682128906, "loss": 0.4652, "rewards/accuracies": 0.8125, "rewards/chosen": -3.447099447250366, "rewards/margins": 3.8420157432556152, "rewards/rejected": -7.289114952087402, "step": 1870 }, { "epoch": 0.39, "learning_rate": 1.2222689075630255e-05, "logits/chosen": -2.0308241844177246, "logits/rejected": -1.9425818920135498, "logps/chosen": -311.0562744140625, "logps/rejected": -281.4987487792969, "loss": 0.5196, "rewards/accuracies": 0.75, "rewards/chosen": -2.1683919429779053, "rewards/margins": 3.7399520874023438, "rewards/rejected": -5.908344268798828, "step": 1871 }, { "epoch": 0.39, "learning_rate": 1.2218487394957985e-05, "logits/chosen": -2.0704848766326904, "logits/rejected": -2.019864797592163, "logps/chosen": -262.2308349609375, "logps/rejected": -347.4579772949219, "loss": 0.2654, "rewards/accuracies": 0.75, "rewards/chosen": -2.4987740516662598, "rewards/margins": 4.221687316894531, "rewards/rejected": -6.720461368560791, "step": 1872 }, { "epoch": 0.39, "learning_rate": 1.2214285714285717e-05, "logits/chosen": -2.205272674560547, "logits/rejected": -1.9787051677703857, "logps/chosen": -421.778564453125, "logps/rejected": -324.6605529785156, "loss": 0.4956, "rewards/accuracies": 0.6875, "rewards/chosen": -2.56027889251709, "rewards/margins": 3.219339370727539, "rewards/rejected": -5.779618263244629, "step": 1873 }, { "epoch": 0.39, "learning_rate": 1.2210084033613447e-05, "logits/chosen": -2.021812915802002, "logits/rejected": -1.8759019374847412, "logps/chosen": -241.0976104736328, "logps/rejected": -316.0867919921875, "loss": 0.2228, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9314661026000977, "rewards/margins": 4.005714416503906, "rewards/rejected": -6.937180519104004, "step": 1874 }, { "epoch": 0.39, "learning_rate": 1.2205882352941179e-05, "logits/chosen": -1.9348081350326538, "logits/rejected": -1.3799512386322021, "logps/chosen": -351.6767272949219, "logps/rejected": -270.9066467285156, "loss": 0.5187, "rewards/accuracies": 0.8125, "rewards/chosen": -2.621941089630127, "rewards/margins": 3.3479952812194824, "rewards/rejected": -5.969936370849609, "step": 1875 }, { "epoch": 0.39, "learning_rate": 1.2201680672268909e-05, "logits/chosen": -1.9474555253982544, "logits/rejected": -1.9348561763763428, "logps/chosen": -437.6739501953125, "logps/rejected": -411.3087158203125, "loss": 0.3923, "rewards/accuracies": 0.875, "rewards/chosen": -2.4875917434692383, "rewards/margins": 1.853659987449646, "rewards/rejected": -4.341251850128174, "step": 1876 }, { "epoch": 0.39, "learning_rate": 1.2197478991596641e-05, "logits/chosen": -1.7438081502914429, "logits/rejected": -1.4689557552337646, "logps/chosen": -440.9767761230469, "logps/rejected": -310.000732421875, "loss": 0.2247, "rewards/accuracies": 0.875, "rewards/chosen": -2.828077793121338, "rewards/margins": 3.6447439193725586, "rewards/rejected": -6.4728217124938965, "step": 1877 }, { "epoch": 0.39, "learning_rate": 1.2193277310924371e-05, "logits/chosen": -1.959306001663208, "logits/rejected": -2.010695219039917, "logps/chosen": -330.3531494140625, "logps/rejected": -386.5733642578125, "loss": 0.6733, "rewards/accuracies": 0.75, "rewards/chosen": -2.080143690109253, "rewards/margins": 2.886941909790039, "rewards/rejected": -4.967085838317871, "step": 1878 }, { "epoch": 0.39, "learning_rate": 1.2189075630252103e-05, "logits/chosen": -1.805151104927063, "logits/rejected": -1.7687690258026123, "logps/chosen": -322.3372802734375, "logps/rejected": -356.020751953125, "loss": 0.1384, "rewards/accuracies": 0.9375, "rewards/chosen": -2.123734951019287, "rewards/margins": 4.347909927368164, "rewards/rejected": -6.471644878387451, "step": 1879 }, { "epoch": 0.39, "learning_rate": 1.2184873949579832e-05, "logits/chosen": -2.183786153793335, "logits/rejected": -1.9782054424285889, "logps/chosen": -302.1595458984375, "logps/rejected": -330.3889465332031, "loss": 0.4196, "rewards/accuracies": 0.6875, "rewards/chosen": -1.5051831007003784, "rewards/margins": 2.912980556488037, "rewards/rejected": -4.418163299560547, "step": 1880 }, { "epoch": 0.39, "learning_rate": 1.2180672268907564e-05, "logits/chosen": -1.95109224319458, "logits/rejected": -2.0503132343292236, "logps/chosen": -272.64251708984375, "logps/rejected": -363.9761962890625, "loss": 0.2787, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5736207962036133, "rewards/margins": 4.0049285888671875, "rewards/rejected": -6.578548431396484, "step": 1881 }, { "epoch": 0.39, "learning_rate": 1.2176470588235294e-05, "logits/chosen": -2.1732020378112793, "logits/rejected": -2.153201103210449, "logps/chosen": -457.95361328125, "logps/rejected": -378.83099365234375, "loss": 0.4302, "rewards/accuracies": 0.8125, "rewards/chosen": -2.479139566421509, "rewards/margins": 2.250716209411621, "rewards/rejected": -4.729856014251709, "step": 1882 }, { "epoch": 0.39, "learning_rate": 1.2172268907563026e-05, "logits/chosen": -2.315631866455078, "logits/rejected": -1.9229233264923096, "logps/chosen": -488.20550537109375, "logps/rejected": -354.6662902832031, "loss": 0.2978, "rewards/accuracies": 0.875, "rewards/chosen": -1.7448663711547852, "rewards/margins": 3.9652786254882812, "rewards/rejected": -5.710144519805908, "step": 1883 }, { "epoch": 0.39, "learning_rate": 1.2168067226890756e-05, "logits/chosen": -2.077934980392456, "logits/rejected": -1.9388132095336914, "logps/chosen": -387.1546630859375, "logps/rejected": -347.7235412597656, "loss": 0.2535, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1852521896362305, "rewards/margins": 3.9739584922790527, "rewards/rejected": -6.159210681915283, "step": 1884 }, { "epoch": 0.39, "learning_rate": 1.2163865546218488e-05, "logits/chosen": -2.117459774017334, "logits/rejected": -2.051194906234741, "logps/chosen": -279.4011535644531, "logps/rejected": -305.2198486328125, "loss": 0.2484, "rewards/accuracies": 0.875, "rewards/chosen": -1.9879226684570312, "rewards/margins": 3.9829320907592773, "rewards/rejected": -5.970855236053467, "step": 1885 }, { "epoch": 0.39, "learning_rate": 1.2159663865546218e-05, "logits/chosen": -2.4349608421325684, "logits/rejected": -2.1075141429901123, "logps/chosen": -362.0382080078125, "logps/rejected": -391.5584716796875, "loss": 0.255, "rewards/accuracies": 0.9375, "rewards/chosen": -1.621695637702942, "rewards/margins": 4.289048194885254, "rewards/rejected": -5.910743713378906, "step": 1886 }, { "epoch": 0.39, "learning_rate": 1.215546218487395e-05, "logits/chosen": -1.9408646821975708, "logits/rejected": -2.12056827545166, "logps/chosen": -273.6898498535156, "logps/rejected": -376.93243408203125, "loss": 0.5742, "rewards/accuracies": 0.625, "rewards/chosen": -2.8257675170898438, "rewards/margins": 3.0630791187286377, "rewards/rejected": -5.888847351074219, "step": 1887 }, { "epoch": 0.39, "learning_rate": 1.215126050420168e-05, "logits/chosen": -2.063706159591675, "logits/rejected": -1.9158318042755127, "logps/chosen": -329.215087890625, "logps/rejected": -332.4383850097656, "loss": 0.5738, "rewards/accuracies": 0.5625, "rewards/chosen": -2.1032702922821045, "rewards/margins": 3.3517837524414062, "rewards/rejected": -5.45505428314209, "step": 1888 }, { "epoch": 0.4, "learning_rate": 1.2147058823529412e-05, "logits/chosen": -1.6034259796142578, "logits/rejected": -1.8200292587280273, "logps/chosen": -262.8677673339844, "logps/rejected": -364.11285400390625, "loss": 0.3117, "rewards/accuracies": 0.8125, "rewards/chosen": -1.900654911994934, "rewards/margins": 3.036217212677002, "rewards/rejected": -4.9368720054626465, "step": 1889 }, { "epoch": 0.4, "learning_rate": 1.2142857142857142e-05, "logits/chosen": -2.2216062545776367, "logits/rejected": -1.4916836023330688, "logps/chosen": -337.0675048828125, "logps/rejected": -309.1094055175781, "loss": 0.2597, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1138360500335693, "rewards/margins": 3.1984457969665527, "rewards/rejected": -5.312282085418701, "step": 1890 }, { "epoch": 0.4, "learning_rate": 1.2138655462184874e-05, "logits/chosen": -1.8620685338974, "logits/rejected": -1.8574579954147339, "logps/chosen": -267.0176696777344, "logps/rejected": -299.7701721191406, "loss": 0.2454, "rewards/accuracies": 0.875, "rewards/chosen": -2.9521660804748535, "rewards/margins": 3.5621039867401123, "rewards/rejected": -6.514269828796387, "step": 1891 }, { "epoch": 0.4, "learning_rate": 1.2134453781512604e-05, "logits/chosen": -2.2182257175445557, "logits/rejected": -2.009401798248291, "logps/chosen": -350.63861083984375, "logps/rejected": -344.4203796386719, "loss": 0.0989, "rewards/accuracies": 1.0, "rewards/chosen": -1.450014352798462, "rewards/margins": 5.047044277191162, "rewards/rejected": -6.497058868408203, "step": 1892 }, { "epoch": 0.4, "learning_rate": 1.2130252100840336e-05, "logits/chosen": -2.178387403488159, "logits/rejected": -1.8148460388183594, "logps/chosen": -261.49285888671875, "logps/rejected": -250.3060302734375, "loss": 0.2948, "rewards/accuracies": 0.9375, "rewards/chosen": -2.348268508911133, "rewards/margins": 4.149266719818115, "rewards/rejected": -6.49753475189209, "step": 1893 }, { "epoch": 0.4, "learning_rate": 1.2126050420168067e-05, "logits/chosen": -1.7209651470184326, "logits/rejected": -1.977144718170166, "logps/chosen": -221.2631072998047, "logps/rejected": -295.52752685546875, "loss": 0.0913, "rewards/accuracies": 1.0, "rewards/chosen": -3.125429153442383, "rewards/margins": 4.741949081420898, "rewards/rejected": -7.867377758026123, "step": 1894 }, { "epoch": 0.4, "learning_rate": 1.2121848739495798e-05, "logits/chosen": -2.089787483215332, "logits/rejected": -1.8356645107269287, "logps/chosen": -350.14373779296875, "logps/rejected": -305.8224792480469, "loss": 0.5869, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5162816047668457, "rewards/margins": 4.68880558013916, "rewards/rejected": -6.205087184906006, "step": 1895 }, { "epoch": 0.4, "learning_rate": 1.211764705882353e-05, "logits/chosen": -2.0634453296661377, "logits/rejected": -1.5936388969421387, "logps/chosen": -335.98223876953125, "logps/rejected": -396.586181640625, "loss": 0.404, "rewards/accuracies": 0.8125, "rewards/chosen": -1.992807149887085, "rewards/margins": 3.9616644382476807, "rewards/rejected": -5.954472064971924, "step": 1896 }, { "epoch": 0.4, "learning_rate": 1.211344537815126e-05, "logits/chosen": -2.292320966720581, "logits/rejected": -1.9044640064239502, "logps/chosen": -400.520263671875, "logps/rejected": -357.2868347167969, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -1.5305293798446655, "rewards/margins": 5.195306301116943, "rewards/rejected": -6.72583532333374, "step": 1897 }, { "epoch": 0.4, "learning_rate": 1.2109243697478993e-05, "logits/chosen": -2.240651845932007, "logits/rejected": -1.7099661827087402, "logps/chosen": -342.96990966796875, "logps/rejected": -262.08331298828125, "loss": 0.8412, "rewards/accuracies": 0.6875, "rewards/chosen": -2.9160306453704834, "rewards/margins": 3.297761917114258, "rewards/rejected": -6.21379280090332, "step": 1898 }, { "epoch": 0.4, "learning_rate": 1.2105042016806723e-05, "logits/chosen": -1.9975688457489014, "logits/rejected": -1.7819318771362305, "logps/chosen": -241.73753356933594, "logps/rejected": -228.4881134033203, "loss": 0.289, "rewards/accuracies": 0.875, "rewards/chosen": -2.0775880813598633, "rewards/margins": 3.5786678791046143, "rewards/rejected": -5.656255722045898, "step": 1899 }, { "epoch": 0.4, "learning_rate": 1.2100840336134455e-05, "logits/chosen": -1.7145962715148926, "logits/rejected": -1.9114662408828735, "logps/chosen": -354.4574279785156, "logps/rejected": -506.02679443359375, "loss": 0.0368, "rewards/accuracies": 1.0, "rewards/chosen": -2.365055561065674, "rewards/margins": 5.540973663330078, "rewards/rejected": -7.906028747558594, "step": 1900 }, { "epoch": 0.4, "learning_rate": 1.2096638655462185e-05, "logits/chosen": -1.8172868490219116, "logits/rejected": -1.8095623254776, "logps/chosen": -382.0679931640625, "logps/rejected": -406.56988525390625, "loss": 0.1669, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6362740993499756, "rewards/margins": 4.088894367218018, "rewards/rejected": -5.725168228149414, "step": 1901 }, { "epoch": 0.4, "learning_rate": 1.2092436974789917e-05, "logits/chosen": -2.0657129287719727, "logits/rejected": -1.6017169952392578, "logps/chosen": -367.9393310546875, "logps/rejected": -360.8873596191406, "loss": 0.2588, "rewards/accuracies": 0.9375, "rewards/chosen": -2.119049072265625, "rewards/margins": 3.978104591369629, "rewards/rejected": -6.097153663635254, "step": 1902 }, { "epoch": 0.4, "learning_rate": 1.2088235294117647e-05, "logits/chosen": -1.9344240427017212, "logits/rejected": -1.7728099822998047, "logps/chosen": -351.71624755859375, "logps/rejected": -399.22308349609375, "loss": 0.2671, "rewards/accuracies": 0.875, "rewards/chosen": -2.720890522003174, "rewards/margins": 4.908173561096191, "rewards/rejected": -7.629063606262207, "step": 1903 }, { "epoch": 0.4, "learning_rate": 1.2084033613445379e-05, "logits/chosen": -2.136500597000122, "logits/rejected": -1.817636251449585, "logps/chosen": -378.0855407714844, "logps/rejected": -467.0699462890625, "loss": 0.2881, "rewards/accuracies": 0.875, "rewards/chosen": -1.8037837743759155, "rewards/margins": 4.506923675537109, "rewards/rejected": -6.310707092285156, "step": 1904 }, { "epoch": 0.4, "learning_rate": 1.207983193277311e-05, "logits/chosen": -1.924896001815796, "logits/rejected": -1.7402033805847168, "logps/chosen": -317.7908020019531, "logps/rejected": -314.0490417480469, "loss": 0.393, "rewards/accuracies": 0.875, "rewards/chosen": -2.650522232055664, "rewards/margins": 3.0206260681152344, "rewards/rejected": -5.671148777008057, "step": 1905 }, { "epoch": 0.4, "learning_rate": 1.2075630252100841e-05, "logits/chosen": -2.1404194831848145, "logits/rejected": -2.0721638202667236, "logps/chosen": -376.66436767578125, "logps/rejected": -394.8505859375, "loss": 0.156, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9328482151031494, "rewards/margins": 3.471165180206299, "rewards/rejected": -5.404013633728027, "step": 1906 }, { "epoch": 0.4, "learning_rate": 1.2071428571428571e-05, "logits/chosen": -2.142035961151123, "logits/rejected": -1.7251577377319336, "logps/chosen": -376.7251892089844, "logps/rejected": -284.1921691894531, "loss": 0.3614, "rewards/accuracies": 0.75, "rewards/chosen": -2.8773622512817383, "rewards/margins": 2.6040382385253906, "rewards/rejected": -5.481400489807129, "step": 1907 }, { "epoch": 0.4, "learning_rate": 1.2067226890756303e-05, "logits/chosen": -2.1734681129455566, "logits/rejected": -2.0065765380859375, "logps/chosen": -334.5392150878906, "logps/rejected": -393.1302185058594, "loss": 0.1577, "rewards/accuracies": 0.875, "rewards/chosen": -2.3483502864837646, "rewards/margins": 6.308602333068848, "rewards/rejected": -8.656951904296875, "step": 1908 }, { "epoch": 0.4, "learning_rate": 1.2063025210084033e-05, "logits/chosen": -1.9934320449829102, "logits/rejected": -2.2016761302948, "logps/chosen": -228.0828399658203, "logps/rejected": -319.03955078125, "loss": 0.5653, "rewards/accuracies": 0.875, "rewards/chosen": -2.9895856380462646, "rewards/margins": 3.5494492053985596, "rewards/rejected": -6.539034843444824, "step": 1909 }, { "epoch": 0.4, "learning_rate": 1.2058823529411765e-05, "logits/chosen": -2.2420475482940674, "logits/rejected": -2.120718002319336, "logps/chosen": -301.90911865234375, "logps/rejected": -350.76873779296875, "loss": 0.3288, "rewards/accuracies": 0.875, "rewards/chosen": -2.2650742530822754, "rewards/margins": 3.5707528591156006, "rewards/rejected": -5.835826873779297, "step": 1910 }, { "epoch": 0.4, "learning_rate": 1.2054621848739496e-05, "logits/chosen": -1.7912968397140503, "logits/rejected": -1.8730591535568237, "logps/chosen": -295.6524658203125, "logps/rejected": -340.78265380859375, "loss": 0.1912, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5242538452148438, "rewards/margins": 4.108933925628662, "rewards/rejected": -5.633187770843506, "step": 1911 }, { "epoch": 0.4, "learning_rate": 1.2050420168067227e-05, "logits/chosen": -2.1771109104156494, "logits/rejected": -1.5272819995880127, "logps/chosen": -390.6642761230469, "logps/rejected": -357.1390075683594, "loss": 0.0604, "rewards/accuracies": 1.0, "rewards/chosen": -0.7402662038803101, "rewards/margins": 6.469948768615723, "rewards/rejected": -7.2102155685424805, "step": 1912 }, { "epoch": 0.4, "learning_rate": 1.2046218487394958e-05, "logits/chosen": -2.007319927215576, "logits/rejected": -1.7263262271881104, "logps/chosen": -396.69183349609375, "logps/rejected": -383.7300109863281, "loss": 0.2062, "rewards/accuracies": 0.9375, "rewards/chosen": -0.8319088220596313, "rewards/margins": 3.7907354831695557, "rewards/rejected": -4.622644424438477, "step": 1913 }, { "epoch": 0.4, "learning_rate": 1.204201680672269e-05, "logits/chosen": -2.216395378112793, "logits/rejected": -1.9293133020401, "logps/chosen": -277.431396484375, "logps/rejected": -282.9858703613281, "loss": 0.2279, "rewards/accuracies": 0.875, "rewards/chosen": -2.076395034790039, "rewards/margins": 4.433126449584961, "rewards/rejected": -6.509521484375, "step": 1914 }, { "epoch": 0.4, "learning_rate": 1.203781512605042e-05, "logits/chosen": -1.8018733263015747, "logits/rejected": -2.0765953063964844, "logps/chosen": -308.0455017089844, "logps/rejected": -368.81005859375, "loss": 0.2452, "rewards/accuracies": 0.8125, "rewards/chosen": -2.542095899581909, "rewards/margins": 4.802630424499512, "rewards/rejected": -7.344725608825684, "step": 1915 }, { "epoch": 0.4, "learning_rate": 1.2033613445378152e-05, "logits/chosen": -1.9174859523773193, "logits/rejected": -1.7762975692749023, "logps/chosen": -418.926513671875, "logps/rejected": -354.2291259765625, "loss": 0.0848, "rewards/accuracies": 1.0, "rewards/chosen": -1.6412107944488525, "rewards/margins": 5.765614986419678, "rewards/rejected": -7.406826019287109, "step": 1916 }, { "epoch": 0.4, "learning_rate": 1.2029411764705882e-05, "logits/chosen": -2.0088179111480713, "logits/rejected": -1.851726770401001, "logps/chosen": -344.12451171875, "logps/rejected": -348.35101318359375, "loss": 0.591, "rewards/accuracies": 0.6875, "rewards/chosen": -2.588670492172241, "rewards/margins": 3.583779811859131, "rewards/rejected": -6.172450542449951, "step": 1917 }, { "epoch": 0.4, "learning_rate": 1.2025210084033614e-05, "logits/chosen": -1.945975661277771, "logits/rejected": -1.8420360088348389, "logps/chosen": -228.79737854003906, "logps/rejected": -242.62191772460938, "loss": 0.3266, "rewards/accuracies": 0.875, "rewards/chosen": -2.1105878353118896, "rewards/margins": 2.999886989593506, "rewards/rejected": -5.110475063323975, "step": 1918 }, { "epoch": 0.4, "learning_rate": 1.2021008403361346e-05, "logits/chosen": -2.0177059173583984, "logits/rejected": -1.9783623218536377, "logps/chosen": -383.1611022949219, "logps/rejected": -450.2672424316406, "loss": 0.3609, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5593056678771973, "rewards/margins": 4.635698318481445, "rewards/rejected": -6.195004463195801, "step": 1919 }, { "epoch": 0.4, "learning_rate": 1.2016806722689076e-05, "logits/chosen": -2.3523528575897217, "logits/rejected": -2.425680637359619, "logps/chosen": -428.9132080078125, "logps/rejected": -518.2432250976562, "loss": 0.3788, "rewards/accuracies": 0.8125, "rewards/chosen": -2.144625186920166, "rewards/margins": 3.4645891189575195, "rewards/rejected": -5.609213829040527, "step": 1920 }, { "epoch": 0.4, "learning_rate": 1.2012605042016808e-05, "logits/chosen": -2.0367918014526367, "logits/rejected": -2.1302733421325684, "logps/chosen": -296.88134765625, "logps/rejected": -338.58477783203125, "loss": 0.5792, "rewards/accuracies": 0.8125, "rewards/chosen": -2.708970069885254, "rewards/margins": 3.3739919662475586, "rewards/rejected": -6.0829620361328125, "step": 1921 }, { "epoch": 0.4, "learning_rate": 1.2008403361344538e-05, "logits/chosen": -2.3181698322296143, "logits/rejected": -1.9248433113098145, "logps/chosen": -428.5474548339844, "logps/rejected": -337.886474609375, "loss": 0.3008, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7446939945220947, "rewards/margins": 3.9507875442504883, "rewards/rejected": -5.695481777191162, "step": 1922 }, { "epoch": 0.4, "learning_rate": 1.200420168067227e-05, "logits/chosen": -2.095181465148926, "logits/rejected": -1.7831504344940186, "logps/chosen": -401.6945495605469, "logps/rejected": -413.27313232421875, "loss": 0.5541, "rewards/accuracies": 0.875, "rewards/chosen": -2.7652153968811035, "rewards/margins": 3.8777570724487305, "rewards/rejected": -6.642971992492676, "step": 1923 }, { "epoch": 0.4, "learning_rate": 1.2e-05, "logits/chosen": -2.323911190032959, "logits/rejected": -2.004767417907715, "logps/chosen": -362.94989013671875, "logps/rejected": -304.14801025390625, "loss": 0.3896, "rewards/accuracies": 0.75, "rewards/chosen": -3.179198741912842, "rewards/margins": 2.648852825164795, "rewards/rejected": -5.828051567077637, "step": 1924 }, { "epoch": 0.4, "learning_rate": 1.1995798319327732e-05, "logits/chosen": -1.9243475198745728, "logits/rejected": -2.0698821544647217, "logps/chosen": -276.5430908203125, "logps/rejected": -395.3537292480469, "loss": 0.11, "rewards/accuracies": 0.9375, "rewards/chosen": -1.197965383529663, "rewards/margins": 5.2543487548828125, "rewards/rejected": -6.452314376831055, "step": 1925 }, { "epoch": 0.4, "learning_rate": 1.1991596638655462e-05, "logits/chosen": -1.8655226230621338, "logits/rejected": -1.7618821859359741, "logps/chosen": -218.9773406982422, "logps/rejected": -318.39959716796875, "loss": 0.366, "rewards/accuracies": 0.75, "rewards/chosen": -2.919647216796875, "rewards/margins": 2.703221082687378, "rewards/rejected": -5.622868061065674, "step": 1926 }, { "epoch": 0.4, "learning_rate": 1.1987394957983194e-05, "logits/chosen": -2.188274621963501, "logits/rejected": -2.0795950889587402, "logps/chosen": -339.4609069824219, "logps/rejected": -515.133544921875, "loss": 0.5193, "rewards/accuracies": 0.875, "rewards/chosen": -1.6383622884750366, "rewards/margins": 4.2760515213012695, "rewards/rejected": -5.914413928985596, "step": 1927 }, { "epoch": 0.4, "learning_rate": 1.1983193277310925e-05, "logits/chosen": -2.1272454261779785, "logits/rejected": -1.6379809379577637, "logps/chosen": -296.60980224609375, "logps/rejected": -249.27235412597656, "loss": 0.6232, "rewards/accuracies": 0.75, "rewards/chosen": -2.6637842655181885, "rewards/margins": 3.8892006874084473, "rewards/rejected": -6.552984714508057, "step": 1928 }, { "epoch": 0.4, "learning_rate": 1.1978991596638656e-05, "logits/chosen": -2.289217472076416, "logits/rejected": -2.0336754322052, "logps/chosen": -430.8250732421875, "logps/rejected": -327.38641357421875, "loss": 0.3064, "rewards/accuracies": 0.875, "rewards/chosen": -1.0705857276916504, "rewards/margins": 3.771559476852417, "rewards/rejected": -4.842144966125488, "step": 1929 }, { "epoch": 0.4, "learning_rate": 1.1974789915966387e-05, "logits/chosen": -2.120927572250366, "logits/rejected": -1.6716057062149048, "logps/chosen": -346.63397216796875, "logps/rejected": -313.1705322265625, "loss": 0.2674, "rewards/accuracies": 0.9375, "rewards/chosen": -1.612196922302246, "rewards/margins": 3.1841797828674316, "rewards/rejected": -4.7963762283325195, "step": 1930 }, { "epoch": 0.4, "learning_rate": 1.1970588235294119e-05, "logits/chosen": -2.410149574279785, "logits/rejected": -1.9183396100997925, "logps/chosen": -411.2257385253906, "logps/rejected": -329.4946594238281, "loss": 0.3493, "rewards/accuracies": 0.875, "rewards/chosen": -1.0278282165527344, "rewards/margins": 2.2552437782287598, "rewards/rejected": -3.283071756362915, "step": 1931 }, { "epoch": 0.4, "learning_rate": 1.1966386554621849e-05, "logits/chosen": -1.3189826011657715, "logits/rejected": -1.9186497926712036, "logps/chosen": -193.09617614746094, "logps/rejected": -335.5715637207031, "loss": 0.372, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0491819381713867, "rewards/margins": 4.139082908630371, "rewards/rejected": -6.188264846801758, "step": 1932 }, { "epoch": 0.4, "learning_rate": 1.196218487394958e-05, "logits/chosen": -2.284926414489746, "logits/rejected": -1.7808797359466553, "logps/chosen": -335.343017578125, "logps/rejected": -321.68231201171875, "loss": 0.2559, "rewards/accuracies": 0.875, "rewards/chosen": -1.4183425903320312, "rewards/margins": 4.103165149688721, "rewards/rejected": -5.52150821685791, "step": 1933 }, { "epoch": 0.4, "learning_rate": 1.1957983193277311e-05, "logits/chosen": -2.2952332496643066, "logits/rejected": -1.4111660718917847, "logps/chosen": -424.5174560546875, "logps/rejected": -392.2184143066406, "loss": 0.4404, "rewards/accuracies": 0.75, "rewards/chosen": -2.0654237270355225, "rewards/margins": 3.078819751739502, "rewards/rejected": -5.1442437171936035, "step": 1934 }, { "epoch": 0.4, "learning_rate": 1.1953781512605043e-05, "logits/chosen": -2.0491909980773926, "logits/rejected": -2.1064460277557373, "logps/chosen": -193.53875732421875, "logps/rejected": -303.26409912109375, "loss": 0.1201, "rewards/accuracies": 1.0, "rewards/chosen": -1.657051682472229, "rewards/margins": 4.737081527709961, "rewards/rejected": -6.3941330909729, "step": 1935 }, { "epoch": 0.41, "learning_rate": 1.1949579831932773e-05, "logits/chosen": -2.1119136810302734, "logits/rejected": -2.1079163551330566, "logps/chosen": -265.5458068847656, "logps/rejected": -367.0487060546875, "loss": 0.3151, "rewards/accuracies": 0.8125, "rewards/chosen": -2.417665958404541, "rewards/margins": 3.9206326007843018, "rewards/rejected": -6.338298320770264, "step": 1936 }, { "epoch": 0.41, "learning_rate": 1.1945378151260505e-05, "logits/chosen": -2.200597047805786, "logits/rejected": -1.979295015335083, "logps/chosen": -415.5914306640625, "logps/rejected": -437.1701354980469, "loss": 0.2908, "rewards/accuracies": 0.875, "rewards/chosen": -1.3065394163131714, "rewards/margins": 3.6270809173583984, "rewards/rejected": -4.933620452880859, "step": 1937 }, { "epoch": 0.41, "learning_rate": 1.1941176470588235e-05, "logits/chosen": -1.958099126815796, "logits/rejected": -1.7996703386306763, "logps/chosen": -329.54888916015625, "logps/rejected": -380.7408142089844, "loss": 0.1874, "rewards/accuracies": 1.0, "rewards/chosen": -0.8060333132743835, "rewards/margins": 2.933206081390381, "rewards/rejected": -3.739239454269409, "step": 1938 }, { "epoch": 0.41, "learning_rate": 1.1936974789915967e-05, "logits/chosen": -1.9268754720687866, "logits/rejected": -2.0633370876312256, "logps/chosen": -254.12820434570312, "logps/rejected": -280.59991455078125, "loss": 0.1357, "rewards/accuracies": 1.0, "rewards/chosen": -1.9287095069885254, "rewards/margins": 3.3217313289642334, "rewards/rejected": -5.25044059753418, "step": 1939 }, { "epoch": 0.41, "learning_rate": 1.1932773109243699e-05, "logits/chosen": -2.182180166244507, "logits/rejected": -1.7079010009765625, "logps/chosen": -423.8414306640625, "logps/rejected": -335.9015197753906, "loss": 0.4899, "rewards/accuracies": 0.875, "rewards/chosen": -1.6346639394760132, "rewards/margins": 4.65989351272583, "rewards/rejected": -6.294557571411133, "step": 1940 }, { "epoch": 0.41, "learning_rate": 1.192857142857143e-05, "logits/chosen": -2.022777557373047, "logits/rejected": -2.0109386444091797, "logps/chosen": -307.6396484375, "logps/rejected": -387.5233154296875, "loss": 0.6461, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8571763038635254, "rewards/margins": 3.773679733276367, "rewards/rejected": -6.630856037139893, "step": 1941 }, { "epoch": 0.41, "learning_rate": 1.1924369747899161e-05, "logits/chosen": -2.147752285003662, "logits/rejected": -1.888809084892273, "logps/chosen": -259.9430236816406, "logps/rejected": -292.3148193359375, "loss": 0.1, "rewards/accuracies": 1.0, "rewards/chosen": -1.3286447525024414, "rewards/margins": 5.61132287979126, "rewards/rejected": -6.939967632293701, "step": 1942 }, { "epoch": 0.41, "learning_rate": 1.1920168067226891e-05, "logits/chosen": -2.0400469303131104, "logits/rejected": -1.971203088760376, "logps/chosen": -339.0711669921875, "logps/rejected": -396.9685363769531, "loss": 0.2274, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1302807331085205, "rewards/margins": 5.530862331390381, "rewards/rejected": -7.6611433029174805, "step": 1943 }, { "epoch": 0.41, "learning_rate": 1.1915966386554623e-05, "logits/chosen": -2.0932838916778564, "logits/rejected": -2.1938164234161377, "logps/chosen": -280.6672058105469, "logps/rejected": -329.2666931152344, "loss": 0.2732, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8697800636291504, "rewards/margins": 3.28078556060791, "rewards/rejected": -5.150566101074219, "step": 1944 }, { "epoch": 0.41, "learning_rate": 1.1911764705882354e-05, "logits/chosen": -2.123795986175537, "logits/rejected": -1.7978296279907227, "logps/chosen": -181.64398193359375, "logps/rejected": -172.8799285888672, "loss": 0.2655, "rewards/accuracies": 0.75, "rewards/chosen": -1.8274486064910889, "rewards/margins": 3.6413755416870117, "rewards/rejected": -5.46882438659668, "step": 1945 }, { "epoch": 0.41, "learning_rate": 1.1907563025210086e-05, "logits/chosen": -2.057217597961426, "logits/rejected": -1.7277305126190186, "logps/chosen": -345.43341064453125, "logps/rejected": -342.9769287109375, "loss": 0.4275, "rewards/accuracies": 0.75, "rewards/chosen": -1.9175856113433838, "rewards/margins": 4.27393102645874, "rewards/rejected": -6.191516876220703, "step": 1946 }, { "epoch": 0.41, "learning_rate": 1.1903361344537816e-05, "logits/chosen": -2.2791125774383545, "logits/rejected": -2.1852200031280518, "logps/chosen": -447.7438659667969, "logps/rejected": -433.920166015625, "loss": 0.4509, "rewards/accuracies": 0.75, "rewards/chosen": -1.8635163307189941, "rewards/margins": 3.2856340408325195, "rewards/rejected": -5.149150371551514, "step": 1947 }, { "epoch": 0.41, "learning_rate": 1.1899159663865548e-05, "logits/chosen": -2.1836092472076416, "logits/rejected": -1.7901263236999512, "logps/chosen": -236.86294555664062, "logps/rejected": -298.5499267578125, "loss": 0.239, "rewards/accuracies": 0.875, "rewards/chosen": -1.6819275617599487, "rewards/margins": 4.898286819458008, "rewards/rejected": -6.580214500427246, "step": 1948 }, { "epoch": 0.41, "learning_rate": 1.1894957983193278e-05, "logits/chosen": -1.8687278032302856, "logits/rejected": -1.7982490062713623, "logps/chosen": -411.6961975097656, "logps/rejected": -383.32257080078125, "loss": 0.464, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2086563110351562, "rewards/margins": 3.933460235595703, "rewards/rejected": -6.142116069793701, "step": 1949 }, { "epoch": 0.41, "learning_rate": 1.189075630252101e-05, "logits/chosen": -2.2042768001556396, "logits/rejected": -1.9281728267669678, "logps/chosen": -375.22100830078125, "logps/rejected": -343.9200439453125, "loss": 0.4534, "rewards/accuracies": 0.8125, "rewards/chosen": -2.310399293899536, "rewards/margins": 3.9504218101501465, "rewards/rejected": -6.260821342468262, "step": 1950 }, { "epoch": 0.41, "learning_rate": 1.188655462184874e-05, "logits/chosen": -1.6523381471633911, "logits/rejected": -1.7892065048217773, "logps/chosen": -291.7080078125, "logps/rejected": -394.178466796875, "loss": 0.3157, "rewards/accuracies": 0.875, "rewards/chosen": -2.075544834136963, "rewards/margins": 3.866364002227783, "rewards/rejected": -5.941908836364746, "step": 1951 }, { "epoch": 0.41, "learning_rate": 1.1882352941176472e-05, "logits/chosen": -1.688197135925293, "logits/rejected": -1.9073342084884644, "logps/chosen": -182.81512451171875, "logps/rejected": -294.6404113769531, "loss": 0.1558, "rewards/accuracies": 0.9375, "rewards/chosen": -1.539276361465454, "rewards/margins": 4.741386890411377, "rewards/rejected": -6.280663013458252, "step": 1952 }, { "epoch": 0.41, "learning_rate": 1.1878151260504202e-05, "logits/chosen": -2.3379714488983154, "logits/rejected": -1.7211012840270996, "logps/chosen": -554.7611083984375, "logps/rejected": -393.25482177734375, "loss": 0.4411, "rewards/accuracies": 0.875, "rewards/chosen": -1.9788715839385986, "rewards/margins": 3.947096109390259, "rewards/rejected": -5.925967693328857, "step": 1953 }, { "epoch": 0.41, "learning_rate": 1.1873949579831934e-05, "logits/chosen": -2.1366307735443115, "logits/rejected": -2.145113468170166, "logps/chosen": -354.5496826171875, "logps/rejected": -384.859619140625, "loss": 0.2317, "rewards/accuracies": 0.875, "rewards/chosen": -2.0397887229919434, "rewards/margins": 4.053105354309082, "rewards/rejected": -6.092894554138184, "step": 1954 }, { "epoch": 0.41, "learning_rate": 1.1869747899159664e-05, "logits/chosen": -2.2341721057891846, "logits/rejected": -1.98061203956604, "logps/chosen": -338.8604736328125, "logps/rejected": -304.5364990234375, "loss": 0.4437, "rewards/accuracies": 0.75, "rewards/chosen": -2.230681896209717, "rewards/margins": 3.2862181663513184, "rewards/rejected": -5.516900539398193, "step": 1955 }, { "epoch": 0.41, "learning_rate": 1.1865546218487396e-05, "logits/chosen": -1.887587308883667, "logits/rejected": -1.7623660564422607, "logps/chosen": -306.985595703125, "logps/rejected": -350.65142822265625, "loss": 0.4348, "rewards/accuracies": 0.625, "rewards/chosen": -3.3750853538513184, "rewards/margins": 1.4892942905426025, "rewards/rejected": -4.8643798828125, "step": 1956 }, { "epoch": 0.41, "learning_rate": 1.1861344537815126e-05, "logits/chosen": -2.128178119659424, "logits/rejected": -1.4239622354507446, "logps/chosen": -389.30242919921875, "logps/rejected": -272.2572021484375, "loss": 0.3527, "rewards/accuracies": 0.875, "rewards/chosen": -1.3870751857757568, "rewards/margins": 4.557292938232422, "rewards/rejected": -5.9443678855896, "step": 1957 }, { "epoch": 0.41, "learning_rate": 1.1857142857142858e-05, "logits/chosen": -2.092729330062866, "logits/rejected": -2.0663514137268066, "logps/chosen": -248.1607666015625, "logps/rejected": -274.74151611328125, "loss": 0.4519, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9701026678085327, "rewards/margins": 2.9491186141967773, "rewards/rejected": -4.919220924377441, "step": 1958 }, { "epoch": 0.41, "learning_rate": 1.1852941176470589e-05, "logits/chosen": -1.8019726276397705, "logits/rejected": -1.7665395736694336, "logps/chosen": -251.14222717285156, "logps/rejected": -362.126708984375, "loss": 0.079, "rewards/accuracies": 1.0, "rewards/chosen": -1.9113917350769043, "rewards/margins": 4.477956771850586, "rewards/rejected": -6.389348030090332, "step": 1959 }, { "epoch": 0.41, "learning_rate": 1.184873949579832e-05, "logits/chosen": -1.7693349123001099, "logits/rejected": -1.6798259019851685, "logps/chosen": -330.01531982421875, "logps/rejected": -336.0985107421875, "loss": 0.1821, "rewards/accuracies": 0.9375, "rewards/chosen": -2.097743272781372, "rewards/margins": 3.837233543395996, "rewards/rejected": -5.934976577758789, "step": 1960 }, { "epoch": 0.41, "learning_rate": 1.184453781512605e-05, "logits/chosen": -1.6896579265594482, "logits/rejected": -1.8994159698486328, "logps/chosen": -373.4425048828125, "logps/rejected": -480.84454345703125, "loss": 0.6525, "rewards/accuracies": 0.6875, "rewards/chosen": -3.2558233737945557, "rewards/margins": 2.759458065032959, "rewards/rejected": -6.0152812004089355, "step": 1961 }, { "epoch": 0.41, "learning_rate": 1.1840336134453783e-05, "logits/chosen": -2.0631000995635986, "logits/rejected": -1.8703677654266357, "logps/chosen": -342.1109619140625, "logps/rejected": -299.40411376953125, "loss": 0.4489, "rewards/accuracies": 0.875, "rewards/chosen": -2.010925531387329, "rewards/margins": 3.1662964820861816, "rewards/rejected": -5.177222728729248, "step": 1962 }, { "epoch": 0.41, "learning_rate": 1.1836134453781515e-05, "logits/chosen": -2.199766159057617, "logits/rejected": -2.1669607162475586, "logps/chosen": -330.13623046875, "logps/rejected": -331.3076477050781, "loss": 0.5619, "rewards/accuracies": 0.8125, "rewards/chosen": -2.482279062271118, "rewards/margins": 4.212305068969727, "rewards/rejected": -6.694583892822266, "step": 1963 }, { "epoch": 0.41, "learning_rate": 1.1831932773109245e-05, "logits/chosen": -2.0139265060424805, "logits/rejected": -1.939925193786621, "logps/chosen": -277.1054992675781, "logps/rejected": -270.07830810546875, "loss": 0.603, "rewards/accuracies": 0.6875, "rewards/chosen": -2.6427929401397705, "rewards/margins": 2.453946590423584, "rewards/rejected": -5.096739768981934, "step": 1964 }, { "epoch": 0.41, "learning_rate": 1.1827731092436977e-05, "logits/chosen": -2.074214458465576, "logits/rejected": -1.5690877437591553, "logps/chosen": -382.2853698730469, "logps/rejected": -397.92059326171875, "loss": 0.15, "rewards/accuracies": 0.9375, "rewards/chosen": -2.129748582839966, "rewards/margins": 4.783637523651123, "rewards/rejected": -6.91338586807251, "step": 1965 }, { "epoch": 0.41, "learning_rate": 1.1823529411764707e-05, "logits/chosen": -1.486264705657959, "logits/rejected": -1.6873584985733032, "logps/chosen": -313.6159362792969, "logps/rejected": -409.6959228515625, "loss": 0.2055, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4844980239868164, "rewards/margins": 3.7648491859436035, "rewards/rejected": -6.249347686767578, "step": 1966 }, { "epoch": 0.41, "learning_rate": 1.1819327731092439e-05, "logits/chosen": -2.1372106075286865, "logits/rejected": -1.5233962535858154, "logps/chosen": -303.3284606933594, "logps/rejected": -292.6336364746094, "loss": 0.2732, "rewards/accuracies": 0.875, "rewards/chosen": -1.993044137954712, "rewards/margins": 4.036615371704102, "rewards/rejected": -6.029659748077393, "step": 1967 }, { "epoch": 0.41, "learning_rate": 1.1815126050420169e-05, "logits/chosen": -1.8164833784103394, "logits/rejected": -1.5731630325317383, "logps/chosen": -268.54144287109375, "logps/rejected": -219.06790161132812, "loss": 0.5545, "rewards/accuracies": 0.6875, "rewards/chosen": -2.839538812637329, "rewards/margins": 2.256260633468628, "rewards/rejected": -5.095799446105957, "step": 1968 }, { "epoch": 0.41, "learning_rate": 1.1810924369747901e-05, "logits/chosen": -1.9257806539535522, "logits/rejected": -1.5365087985992432, "logps/chosen": -392.1906433105469, "logps/rejected": -377.35791015625, "loss": 0.2562, "rewards/accuracies": 0.9375, "rewards/chosen": -2.231613874435425, "rewards/margins": 4.251580238342285, "rewards/rejected": -6.483194351196289, "step": 1969 }, { "epoch": 0.41, "learning_rate": 1.1806722689075631e-05, "logits/chosen": -2.223924398422241, "logits/rejected": -2.3198938369750977, "logps/chosen": -455.7251281738281, "logps/rejected": -424.00750732421875, "loss": 0.4177, "rewards/accuracies": 0.875, "rewards/chosen": -1.4913262128829956, "rewards/margins": 2.4450111389160156, "rewards/rejected": -3.9363372325897217, "step": 1970 }, { "epoch": 0.41, "learning_rate": 1.1802521008403363e-05, "logits/chosen": -2.1450693607330322, "logits/rejected": -2.015467405319214, "logps/chosen": -551.3428344726562, "logps/rejected": -442.72283935546875, "loss": 0.1146, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1253373622894287, "rewards/margins": 4.3501081466674805, "rewards/rejected": -5.475445747375488, "step": 1971 }, { "epoch": 0.41, "learning_rate": 1.1798319327731093e-05, "logits/chosen": -1.9290738105773926, "logits/rejected": -1.543068289756775, "logps/chosen": -406.9100036621094, "logps/rejected": -354.5848083496094, "loss": 0.3529, "rewards/accuracies": 0.8125, "rewards/chosen": -1.880568504333496, "rewards/margins": 3.7765603065490723, "rewards/rejected": -5.65712833404541, "step": 1972 }, { "epoch": 0.41, "learning_rate": 1.1794117647058825e-05, "logits/chosen": -2.038806676864624, "logits/rejected": -2.015094757080078, "logps/chosen": -350.6685485839844, "logps/rejected": -344.201416015625, "loss": 0.3596, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4985814094543457, "rewards/margins": 4.5968017578125, "rewards/rejected": -7.095383644104004, "step": 1973 }, { "epoch": 0.41, "learning_rate": 1.1789915966386555e-05, "logits/chosen": -1.9172277450561523, "logits/rejected": -2.393588066101074, "logps/chosen": -273.54412841796875, "logps/rejected": -362.3873596191406, "loss": 0.1462, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1986079216003418, "rewards/margins": 5.007539749145508, "rewards/rejected": -6.206147193908691, "step": 1974 }, { "epoch": 0.41, "learning_rate": 1.1785714285714287e-05, "logits/chosen": -2.0969595909118652, "logits/rejected": -1.7913565635681152, "logps/chosen": -397.072509765625, "logps/rejected": -355.4930114746094, "loss": 0.3659, "rewards/accuracies": 0.9375, "rewards/chosen": -1.811758041381836, "rewards/margins": 4.794989585876465, "rewards/rejected": -6.606747627258301, "step": 1975 }, { "epoch": 0.41, "learning_rate": 1.1781512605042018e-05, "logits/chosen": -2.117095947265625, "logits/rejected": -1.6759452819824219, "logps/chosen": -429.91754150390625, "logps/rejected": -372.3650207519531, "loss": 0.2085, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8025895357131958, "rewards/margins": 3.76780104637146, "rewards/rejected": -5.570390701293945, "step": 1976 }, { "epoch": 0.41, "learning_rate": 1.177731092436975e-05, "logits/chosen": -2.346609354019165, "logits/rejected": -2.261892080307007, "logps/chosen": -257.985107421875, "logps/rejected": -361.9310607910156, "loss": 0.0974, "rewards/accuracies": 1.0, "rewards/chosen": -1.8649860620498657, "rewards/margins": 3.897449016571045, "rewards/rejected": -5.762434959411621, "step": 1977 }, { "epoch": 0.41, "learning_rate": 1.177310924369748e-05, "logits/chosen": -2.1725616455078125, "logits/rejected": -1.791488766670227, "logps/chosen": -330.4352111816406, "logps/rejected": -270.24859619140625, "loss": 0.2262, "rewards/accuracies": 0.875, "rewards/chosen": -2.191359758377075, "rewards/margins": 3.675600051879883, "rewards/rejected": -5.866960048675537, "step": 1978 }, { "epoch": 0.41, "learning_rate": 1.1768907563025212e-05, "logits/chosen": -2.2235729694366455, "logits/rejected": -1.9197649955749512, "logps/chosen": -341.85516357421875, "logps/rejected": -293.85821533203125, "loss": 0.1469, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0742182731628418, "rewards/margins": 3.1900198459625244, "rewards/rejected": -4.264238357543945, "step": 1979 }, { "epoch": 0.41, "learning_rate": 1.1764705882352942e-05, "logits/chosen": -2.2083446979522705, "logits/rejected": -2.09649920463562, "logps/chosen": -358.7520751953125, "logps/rejected": -356.1342468261719, "loss": 0.0713, "rewards/accuracies": 1.0, "rewards/chosen": -2.455860137939453, "rewards/margins": 3.830432176589966, "rewards/rejected": -6.28629207611084, "step": 1980 }, { "epoch": 0.41, "learning_rate": 1.1760504201680674e-05, "logits/chosen": -2.045534133911133, "logits/rejected": -2.037423610687256, "logps/chosen": -334.7827453613281, "logps/rejected": -393.2997741699219, "loss": 0.2295, "rewards/accuracies": 0.875, "rewards/chosen": -2.02451753616333, "rewards/margins": 3.3219265937805176, "rewards/rejected": -5.346444129943848, "step": 1981 }, { "epoch": 0.41, "learning_rate": 1.1756302521008404e-05, "logits/chosen": -1.9969927072525024, "logits/rejected": -2.108992099761963, "logps/chosen": -300.56158447265625, "logps/rejected": -276.80181884765625, "loss": 0.2839, "rewards/accuracies": 0.9375, "rewards/chosen": -2.421304225921631, "rewards/margins": 3.123438596725464, "rewards/rejected": -5.544742584228516, "step": 1982 }, { "epoch": 0.41, "learning_rate": 1.1752100840336136e-05, "logits/chosen": -2.492192029953003, "logits/rejected": -1.85874605178833, "logps/chosen": -424.85296630859375, "logps/rejected": -295.4246826171875, "loss": 0.2656, "rewards/accuracies": 0.875, "rewards/chosen": -1.951184630393982, "rewards/margins": 3.984459400177002, "rewards/rejected": -5.935644149780273, "step": 1983 }, { "epoch": 0.42, "learning_rate": 1.1747899159663866e-05, "logits/chosen": -2.1601483821868896, "logits/rejected": -2.0993869304656982, "logps/chosen": -359.14013671875, "logps/rejected": -285.28759765625, "loss": 0.2903, "rewards/accuracies": 0.875, "rewards/chosen": -2.616579294204712, "rewards/margins": 2.754587173461914, "rewards/rejected": -5.371166229248047, "step": 1984 }, { "epoch": 0.42, "learning_rate": 1.1743697478991598e-05, "logits/chosen": -2.106593370437622, "logits/rejected": -1.70912504196167, "logps/chosen": -435.725830078125, "logps/rejected": -388.1470642089844, "loss": 0.1443, "rewards/accuracies": 0.875, "rewards/chosen": -1.3147165775299072, "rewards/margins": 5.514959812164307, "rewards/rejected": -6.829676628112793, "step": 1985 }, { "epoch": 0.42, "learning_rate": 1.173949579831933e-05, "logits/chosen": -2.2300922870635986, "logits/rejected": -1.7195117473602295, "logps/chosen": -255.38682556152344, "logps/rejected": -241.39251708984375, "loss": 0.2197, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2693943977355957, "rewards/margins": 3.3821494579315186, "rewards/rejected": -5.651543617248535, "step": 1986 }, { "epoch": 0.42, "learning_rate": 1.173529411764706e-05, "logits/chosen": -2.233675479888916, "logits/rejected": -1.6387104988098145, "logps/chosen": -288.41064453125, "logps/rejected": -294.75640869140625, "loss": 0.301, "rewards/accuracies": 0.8125, "rewards/chosen": -2.454026699066162, "rewards/margins": 3.922327995300293, "rewards/rejected": -6.376354694366455, "step": 1987 }, { "epoch": 0.42, "learning_rate": 1.1731092436974792e-05, "logits/chosen": -1.9402329921722412, "logits/rejected": -1.9833506345748901, "logps/chosen": -223.27432250976562, "logps/rejected": -307.5994873046875, "loss": 0.3893, "rewards/accuracies": 0.875, "rewards/chosen": -2.535097599029541, "rewards/margins": 4.090473175048828, "rewards/rejected": -6.625570297241211, "step": 1988 }, { "epoch": 0.42, "learning_rate": 1.1726890756302522e-05, "logits/chosen": -1.8817272186279297, "logits/rejected": -1.883370041847229, "logps/chosen": -295.766357421875, "logps/rejected": -317.646240234375, "loss": 0.5354, "rewards/accuracies": 0.75, "rewards/chosen": -2.3192965984344482, "rewards/margins": 3.371514320373535, "rewards/rejected": -5.6908111572265625, "step": 1989 }, { "epoch": 0.42, "learning_rate": 1.1722689075630254e-05, "logits/chosen": -2.5206491947174072, "logits/rejected": -2.1297101974487305, "logps/chosen": -335.68402099609375, "logps/rejected": -323.77972412109375, "loss": 0.1752, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9038801193237305, "rewards/margins": 3.6221609115600586, "rewards/rejected": -5.526040554046631, "step": 1990 }, { "epoch": 0.42, "learning_rate": 1.1718487394957984e-05, "logits/chosen": -2.2206225395202637, "logits/rejected": -1.6062289476394653, "logps/chosen": -365.5782470703125, "logps/rejected": -308.41986083984375, "loss": 0.5274, "rewards/accuracies": 0.75, "rewards/chosen": -2.1508214473724365, "rewards/margins": 3.382694959640503, "rewards/rejected": -5.533515930175781, "step": 1991 }, { "epoch": 0.42, "learning_rate": 1.1714285714285716e-05, "logits/chosen": -1.9700405597686768, "logits/rejected": -1.9780497550964355, "logps/chosen": -342.9854736328125, "logps/rejected": -407.82861328125, "loss": 0.3352, "rewards/accuracies": 0.875, "rewards/chosen": -2.7476754188537598, "rewards/margins": 3.291015625, "rewards/rejected": -6.038690567016602, "step": 1992 }, { "epoch": 0.42, "learning_rate": 1.1710084033613447e-05, "logits/chosen": -2.1418800354003906, "logits/rejected": -1.5256165266036987, "logps/chosen": -338.65557861328125, "logps/rejected": -274.3718566894531, "loss": 0.4076, "rewards/accuracies": 0.75, "rewards/chosen": -2.764272928237915, "rewards/margins": 3.890021324157715, "rewards/rejected": -6.654294967651367, "step": 1993 }, { "epoch": 0.42, "learning_rate": 1.1705882352941178e-05, "logits/chosen": -2.095531463623047, "logits/rejected": -1.8915833234786987, "logps/chosen": -250.8558349609375, "logps/rejected": -297.199462890625, "loss": 0.2868, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9347187280654907, "rewards/margins": 3.844944477081299, "rewards/rejected": -5.7796630859375, "step": 1994 }, { "epoch": 0.42, "learning_rate": 1.1701680672268909e-05, "logits/chosen": -2.112004041671753, "logits/rejected": -1.9657294750213623, "logps/chosen": -353.8191223144531, "logps/rejected": -346.2870788574219, "loss": 0.4182, "rewards/accuracies": 0.75, "rewards/chosen": -2.6809327602386475, "rewards/margins": 2.743990421295166, "rewards/rejected": -5.424922943115234, "step": 1995 }, { "epoch": 0.42, "learning_rate": 1.169747899159664e-05, "logits/chosen": -2.249419689178467, "logits/rejected": -2.2966601848602295, "logps/chosen": -411.1002502441406, "logps/rejected": -422.5325927734375, "loss": 0.4391, "rewards/accuracies": 0.75, "rewards/chosen": -2.4471569061279297, "rewards/margins": 3.2927207946777344, "rewards/rejected": -5.739877700805664, "step": 1996 }, { "epoch": 0.42, "learning_rate": 1.1693277310924371e-05, "logits/chosen": -2.2589008808135986, "logits/rejected": -2.1911535263061523, "logps/chosen": -347.05303955078125, "logps/rejected": -376.7115478515625, "loss": 0.6299, "rewards/accuracies": 0.75, "rewards/chosen": -2.8977396488189697, "rewards/margins": 3.968108654022217, "rewards/rejected": -6.865848541259766, "step": 1997 }, { "epoch": 0.42, "learning_rate": 1.1689075630252103e-05, "logits/chosen": -2.438709259033203, "logits/rejected": -2.026749849319458, "logps/chosen": -372.5621337890625, "logps/rejected": -345.9267578125, "loss": 0.4279, "rewards/accuracies": 0.75, "rewards/chosen": -1.9480586051940918, "rewards/margins": 3.7759501934051514, "rewards/rejected": -5.724008560180664, "step": 1998 }, { "epoch": 0.42, "learning_rate": 1.1684873949579833e-05, "logits/chosen": -2.129971504211426, "logits/rejected": -2.127574920654297, "logps/chosen": -368.34283447265625, "logps/rejected": -411.705322265625, "loss": 0.3494, "rewards/accuracies": 0.875, "rewards/chosen": -2.4318723678588867, "rewards/margins": 3.5859169960021973, "rewards/rejected": -6.017788887023926, "step": 1999 }, { "epoch": 0.42, "learning_rate": 1.1680672268907565e-05, "logits/chosen": -2.024904727935791, "logits/rejected": -1.8718531131744385, "logps/chosen": -199.9012451171875, "logps/rejected": -270.2091979980469, "loss": 0.256, "rewards/accuracies": 0.875, "rewards/chosen": -2.6000401973724365, "rewards/margins": 3.5901875495910645, "rewards/rejected": -6.190227031707764, "step": 2000 }, { "epoch": 0.42, "learning_rate": 1.1676470588235295e-05, "logits/chosen": -2.079197406768799, "logits/rejected": -1.696530818939209, "logps/chosen": -283.9945983886719, "logps/rejected": -273.9361572265625, "loss": 0.7153, "rewards/accuracies": 0.75, "rewards/chosen": -2.8693621158599854, "rewards/margins": 2.799614191055298, "rewards/rejected": -5.668976783752441, "step": 2001 }, { "epoch": 0.42, "learning_rate": 1.1672268907563027e-05, "logits/chosen": -2.105051040649414, "logits/rejected": -2.029066562652588, "logps/chosen": -357.68682861328125, "logps/rejected": -345.736572265625, "loss": 0.264, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7269809246063232, "rewards/margins": 3.248204231262207, "rewards/rejected": -4.975184440612793, "step": 2002 }, { "epoch": 0.42, "learning_rate": 1.1668067226890757e-05, "logits/chosen": -1.910569429397583, "logits/rejected": -1.6828595399856567, "logps/chosen": -331.3682861328125, "logps/rejected": -296.7809753417969, "loss": 0.2823, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4297759532928467, "rewards/margins": 3.8586106300354004, "rewards/rejected": -6.288386821746826, "step": 2003 }, { "epoch": 0.42, "learning_rate": 1.166386554621849e-05, "logits/chosen": -2.074186325073242, "logits/rejected": -1.9179534912109375, "logps/chosen": -240.02157592773438, "logps/rejected": -334.829833984375, "loss": 0.159, "rewards/accuracies": 0.9375, "rewards/chosen": -2.493454933166504, "rewards/margins": 3.1100752353668213, "rewards/rejected": -5.603529930114746, "step": 2004 }, { "epoch": 0.42, "learning_rate": 1.165966386554622e-05, "logits/chosen": -2.3214378356933594, "logits/rejected": -2.194861888885498, "logps/chosen": -294.208740234375, "logps/rejected": -304.8033447265625, "loss": 0.3174, "rewards/accuracies": 0.8125, "rewards/chosen": -1.840536117553711, "rewards/margins": 3.5510988235473633, "rewards/rejected": -5.391634464263916, "step": 2005 }, { "epoch": 0.42, "learning_rate": 1.1655462184873951e-05, "logits/chosen": -2.4050040245056152, "logits/rejected": -1.8576184511184692, "logps/chosen": -396.2915344238281, "logps/rejected": -283.24090576171875, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -1.8042290210723877, "rewards/margins": 5.0325493812561035, "rewards/rejected": -6.83677864074707, "step": 2006 }, { "epoch": 0.42, "learning_rate": 1.1651260504201683e-05, "logits/chosen": -1.7313112020492554, "logits/rejected": -1.535030484199524, "logps/chosen": -278.0242004394531, "logps/rejected": -248.71359252929688, "loss": 0.4917, "rewards/accuracies": 0.6875, "rewards/chosen": -2.7757339477539062, "rewards/margins": 2.6418771743774414, "rewards/rejected": -5.417611122131348, "step": 2007 }, { "epoch": 0.42, "learning_rate": 1.1647058823529413e-05, "logits/chosen": -1.8838014602661133, "logits/rejected": -2.0262277126312256, "logps/chosen": -330.3498229980469, "logps/rejected": -304.90771484375, "loss": 0.588, "rewards/accuracies": 0.75, "rewards/chosen": -2.2751355171203613, "rewards/margins": 2.4011154174804688, "rewards/rejected": -4.676250457763672, "step": 2008 }, { "epoch": 0.42, "learning_rate": 1.1642857142857145e-05, "logits/chosen": -2.2164459228515625, "logits/rejected": -1.8665568828582764, "logps/chosen": -349.1014404296875, "logps/rejected": -224.43362426757812, "loss": 0.1701, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1316754817962646, "rewards/margins": 4.139419078826904, "rewards/rejected": -6.271094799041748, "step": 2009 }, { "epoch": 0.42, "learning_rate": 1.1638655462184876e-05, "logits/chosen": -2.43519926071167, "logits/rejected": -1.8341882228851318, "logps/chosen": -335.29107666015625, "logps/rejected": -237.13446044921875, "loss": 0.1021, "rewards/accuracies": 1.0, "rewards/chosen": -2.127246379852295, "rewards/margins": 4.704779148101807, "rewards/rejected": -6.832025527954102, "step": 2010 }, { "epoch": 0.42, "learning_rate": 1.1634453781512608e-05, "logits/chosen": -2.3218064308166504, "logits/rejected": -2.132877826690674, "logps/chosen": -412.05755615234375, "logps/rejected": -369.7252197265625, "loss": 0.3225, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2949604988098145, "rewards/margins": 3.5905070304870605, "rewards/rejected": -5.885467529296875, "step": 2011 }, { "epoch": 0.42, "learning_rate": 1.1630252100840338e-05, "logits/chosen": -2.15934681892395, "logits/rejected": -2.282438039779663, "logps/chosen": -388.4600830078125, "logps/rejected": -455.7470397949219, "loss": 0.2872, "rewards/accuracies": 0.875, "rewards/chosen": -2.475032091140747, "rewards/margins": 3.115109920501709, "rewards/rejected": -5.590141773223877, "step": 2012 }, { "epoch": 0.42, "learning_rate": 1.162605042016807e-05, "logits/chosen": -2.0802018642425537, "logits/rejected": -1.5434036254882812, "logps/chosen": -299.9101867675781, "logps/rejected": -278.4464111328125, "loss": 0.1651, "rewards/accuracies": 1.0, "rewards/chosen": -2.55178165435791, "rewards/margins": 3.4816830158233643, "rewards/rejected": -6.033464431762695, "step": 2013 }, { "epoch": 0.42, "learning_rate": 1.16218487394958e-05, "logits/chosen": -1.8953118324279785, "logits/rejected": -2.399675130844116, "logps/chosen": -242.6870880126953, "logps/rejected": -345.7003173828125, "loss": 0.8016, "rewards/accuracies": 0.5625, "rewards/chosen": -3.0013341903686523, "rewards/margins": 1.4316017627716064, "rewards/rejected": -4.43293571472168, "step": 2014 }, { "epoch": 0.42, "learning_rate": 1.1617647058823532e-05, "logits/chosen": -2.285087823867798, "logits/rejected": -1.8152977228164673, "logps/chosen": -422.3471984863281, "logps/rejected": -356.77178955078125, "loss": 0.3376, "rewards/accuracies": 0.875, "rewards/chosen": -1.9482060670852661, "rewards/margins": 3.8550796508789062, "rewards/rejected": -5.803285598754883, "step": 2015 }, { "epoch": 0.42, "learning_rate": 1.1613445378151262e-05, "logits/chosen": -2.2776033878326416, "logits/rejected": -2.055415391921997, "logps/chosen": -353.1641540527344, "logps/rejected": -294.0531005859375, "loss": 0.2981, "rewards/accuracies": 0.9375, "rewards/chosen": -2.145308494567871, "rewards/margins": 3.6759276390075684, "rewards/rejected": -5.8212361335754395, "step": 2016 }, { "epoch": 0.42, "learning_rate": 1.1609243697478994e-05, "logits/chosen": -2.163743734359741, "logits/rejected": -1.9288283586502075, "logps/chosen": -383.09661865234375, "logps/rejected": -379.734619140625, "loss": 0.155, "rewards/accuracies": 0.875, "rewards/chosen": -1.4958137273788452, "rewards/margins": 4.644062042236328, "rewards/rejected": -6.139875411987305, "step": 2017 }, { "epoch": 0.42, "learning_rate": 1.1605042016806724e-05, "logits/chosen": -2.2524187564849854, "logits/rejected": -1.9637539386749268, "logps/chosen": -381.77923583984375, "logps/rejected": -385.3320007324219, "loss": 0.3817, "rewards/accuracies": 0.875, "rewards/chosen": -1.3869824409484863, "rewards/margins": 3.772944927215576, "rewards/rejected": -5.159926891326904, "step": 2018 }, { "epoch": 0.42, "learning_rate": 1.1600840336134456e-05, "logits/chosen": -2.2648332118988037, "logits/rejected": -2.156357765197754, "logps/chosen": -339.6280517578125, "logps/rejected": -316.3348693847656, "loss": 0.2546, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4685208797454834, "rewards/margins": 4.188183307647705, "rewards/rejected": -6.656703948974609, "step": 2019 }, { "epoch": 0.42, "learning_rate": 1.1596638655462186e-05, "logits/chosen": -2.208322048187256, "logits/rejected": -1.9311819076538086, "logps/chosen": -281.691650390625, "logps/rejected": -298.8689270019531, "loss": 0.3471, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3511407375335693, "rewards/margins": 4.380074977874756, "rewards/rejected": -6.731215476989746, "step": 2020 }, { "epoch": 0.42, "learning_rate": 1.1592436974789918e-05, "logits/chosen": -2.2703824043273926, "logits/rejected": -1.9259319305419922, "logps/chosen": -483.55999755859375, "logps/rejected": -356.62371826171875, "loss": 0.2327, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9399688243865967, "rewards/margins": 2.9370155334472656, "rewards/rejected": -4.876984119415283, "step": 2021 }, { "epoch": 0.42, "learning_rate": 1.1588235294117648e-05, "logits/chosen": -2.048274040222168, "logits/rejected": -2.2095186710357666, "logps/chosen": -287.12921142578125, "logps/rejected": -294.0669860839844, "loss": 0.5658, "rewards/accuracies": 0.6875, "rewards/chosen": -2.329941749572754, "rewards/margins": 2.1715383529663086, "rewards/rejected": -4.5014801025390625, "step": 2022 }, { "epoch": 0.42, "learning_rate": 1.158403361344538e-05, "logits/chosen": -2.269408702850342, "logits/rejected": -2.075428009033203, "logps/chosen": -402.91607666015625, "logps/rejected": -356.7300720214844, "loss": 0.2597, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4409866333007812, "rewards/margins": 4.622939586639404, "rewards/rejected": -6.0639262199401855, "step": 2023 }, { "epoch": 0.42, "learning_rate": 1.157983193277311e-05, "logits/chosen": -2.1971328258514404, "logits/rejected": -2.0909547805786133, "logps/chosen": -359.8511962890625, "logps/rejected": -321.28973388671875, "loss": 0.573, "rewards/accuracies": 0.75, "rewards/chosen": -3.780590534210205, "rewards/margins": 2.45827054977417, "rewards/rejected": -6.238861083984375, "step": 2024 }, { "epoch": 0.42, "learning_rate": 1.1575630252100842e-05, "logits/chosen": -1.920001745223999, "logits/rejected": -2.0093905925750732, "logps/chosen": -373.7569580078125, "logps/rejected": -357.22515869140625, "loss": 0.2532, "rewards/accuracies": 0.875, "rewards/chosen": -1.3780170679092407, "rewards/margins": 2.417965888977051, "rewards/rejected": -3.795982599258423, "step": 2025 }, { "epoch": 0.42, "learning_rate": 1.1571428571428573e-05, "logits/chosen": -2.2176761627197266, "logits/rejected": -1.9830904006958008, "logps/chosen": -329.3486328125, "logps/rejected": -342.1224060058594, "loss": 0.182, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7171614170074463, "rewards/margins": 4.0453410148620605, "rewards/rejected": -6.762502670288086, "step": 2026 }, { "epoch": 0.42, "learning_rate": 1.1567226890756305e-05, "logits/chosen": -2.058716297149658, "logits/rejected": -1.8591029644012451, "logps/chosen": -383.6545104980469, "logps/rejected": -344.69195556640625, "loss": 0.6941, "rewards/accuracies": 0.75, "rewards/chosen": -2.4524264335632324, "rewards/margins": 2.642935276031494, "rewards/rejected": -5.095361709594727, "step": 2027 }, { "epoch": 0.42, "learning_rate": 1.1563025210084035e-05, "logits/chosen": -2.2357912063598633, "logits/rejected": -1.981215238571167, "logps/chosen": -372.9283142089844, "logps/rejected": -298.0931701660156, "loss": 0.1775, "rewards/accuracies": 0.9375, "rewards/chosen": -1.313389539718628, "rewards/margins": 4.310908317565918, "rewards/rejected": -5.624298572540283, "step": 2028 }, { "epoch": 0.42, "learning_rate": 1.1558823529411765e-05, "logits/chosen": -1.8320471048355103, "logits/rejected": -1.7739046812057495, "logps/chosen": -259.5126953125, "logps/rejected": -312.6507568359375, "loss": 0.2925, "rewards/accuracies": 0.8125, "rewards/chosen": -2.192497730255127, "rewards/margins": 2.910689353942871, "rewards/rejected": -5.103187084197998, "step": 2029 }, { "epoch": 0.42, "learning_rate": 1.1554621848739495e-05, "logits/chosen": -2.0798726081848145, "logits/rejected": -2.0783588886260986, "logps/chosen": -304.9694519042969, "logps/rejected": -366.6223449707031, "loss": 0.1726, "rewards/accuracies": 0.9375, "rewards/chosen": -1.851839303970337, "rewards/margins": 3.8220221996307373, "rewards/rejected": -5.673861503601074, "step": 2030 }, { "epoch": 0.42, "learning_rate": 1.1550420168067227e-05, "logits/chosen": -2.346567153930664, "logits/rejected": -2.2844364643096924, "logps/chosen": -296.7462158203125, "logps/rejected": -325.58624267578125, "loss": 0.3088, "rewards/accuracies": 0.75, "rewards/chosen": -1.9146637916564941, "rewards/margins": 3.0064821243286133, "rewards/rejected": -4.921145915985107, "step": 2031 }, { "epoch": 0.43, "learning_rate": 1.1546218487394957e-05, "logits/chosen": -2.3003039360046387, "logits/rejected": -1.7692413330078125, "logps/chosen": -389.7088928222656, "logps/rejected": -356.91424560546875, "loss": 0.1603, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9001538753509521, "rewards/margins": 4.908247470855713, "rewards/rejected": -6.808401584625244, "step": 2032 }, { "epoch": 0.43, "learning_rate": 1.154201680672269e-05, "logits/chosen": -1.9862852096557617, "logits/rejected": -2.068901538848877, "logps/chosen": -370.74969482421875, "logps/rejected": -373.4486083984375, "loss": 1.1811, "rewards/accuracies": 0.8125, "rewards/chosen": -3.051518201828003, "rewards/margins": 2.2835683822631836, "rewards/rejected": -5.335086822509766, "step": 2033 }, { "epoch": 0.43, "learning_rate": 1.153781512605042e-05, "logits/chosen": -2.0426383018493652, "logits/rejected": -1.8522021770477295, "logps/chosen": -236.17050170898438, "logps/rejected": -281.18817138671875, "loss": 0.3454, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7310116291046143, "rewards/margins": 3.253507375717163, "rewards/rejected": -5.984519004821777, "step": 2034 }, { "epoch": 0.43, "learning_rate": 1.1533613445378151e-05, "logits/chosen": -2.3632569313049316, "logits/rejected": -2.0204625129699707, "logps/chosen": -285.57525634765625, "logps/rejected": -314.8445129394531, "loss": 0.2469, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7121021747589111, "rewards/margins": 3.498154401779175, "rewards/rejected": -5.210256576538086, "step": 2035 }, { "epoch": 0.43, "learning_rate": 1.1529411764705882e-05, "logits/chosen": -2.1731150150299072, "logits/rejected": -1.7058427333831787, "logps/chosen": -302.15435791015625, "logps/rejected": -299.70880126953125, "loss": 0.3348, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2195417881011963, "rewards/margins": 3.850309371948242, "rewards/rejected": -6.069850921630859, "step": 2036 }, { "epoch": 0.43, "learning_rate": 1.1525210084033614e-05, "logits/chosen": -2.0464119911193848, "logits/rejected": -2.1896140575408936, "logps/chosen": -249.23846435546875, "logps/rejected": -341.70574951171875, "loss": 0.2352, "rewards/accuracies": 0.875, "rewards/chosen": -2.360543966293335, "rewards/margins": 3.9387807846069336, "rewards/rejected": -6.299324989318848, "step": 2037 }, { "epoch": 0.43, "learning_rate": 1.1521008403361344e-05, "logits/chosen": -2.086638927459717, "logits/rejected": -1.889849305152893, "logps/chosen": -251.52032470703125, "logps/rejected": -281.9322814941406, "loss": 0.203, "rewards/accuracies": 0.875, "rewards/chosen": -1.70516037940979, "rewards/margins": 4.880710601806641, "rewards/rejected": -6.585871696472168, "step": 2038 }, { "epoch": 0.43, "learning_rate": 1.1516806722689076e-05, "logits/chosen": -1.9713646173477173, "logits/rejected": -1.8880610466003418, "logps/chosen": -320.4168395996094, "logps/rejected": -385.7452392578125, "loss": 0.3768, "rewards/accuracies": 0.875, "rewards/chosen": -1.7439255714416504, "rewards/margins": 3.454660415649414, "rewards/rejected": -5.1985859870910645, "step": 2039 }, { "epoch": 0.43, "learning_rate": 1.1512605042016806e-05, "logits/chosen": -2.108473300933838, "logits/rejected": -1.8333741426467896, "logps/chosen": -347.6883544921875, "logps/rejected": -345.1535949707031, "loss": 0.4493, "rewards/accuracies": 0.75, "rewards/chosen": -2.845118522644043, "rewards/margins": 3.5066580772399902, "rewards/rejected": -6.351777076721191, "step": 2040 }, { "epoch": 0.43, "learning_rate": 1.1508403361344538e-05, "logits/chosen": -2.1279444694519043, "logits/rejected": -2.014584541320801, "logps/chosen": -361.067138671875, "logps/rejected": -425.7370910644531, "loss": 0.1364, "rewards/accuracies": 0.9375, "rewards/chosen": -1.5322329998016357, "rewards/margins": 4.137571811676025, "rewards/rejected": -5.66980504989624, "step": 2041 }, { "epoch": 0.43, "learning_rate": 1.150420168067227e-05, "logits/chosen": -2.379103899002075, "logits/rejected": -2.2502808570861816, "logps/chosen": -423.4561767578125, "logps/rejected": -397.1187744140625, "loss": 0.3148, "rewards/accuracies": 0.875, "rewards/chosen": -1.7356139421463013, "rewards/margins": 3.093418598175049, "rewards/rejected": -4.8290324211120605, "step": 2042 }, { "epoch": 0.43, "learning_rate": 1.15e-05, "logits/chosen": -2.2598257064819336, "logits/rejected": -1.6651756763458252, "logps/chosen": -386.95611572265625, "logps/rejected": -287.5246887207031, "loss": 0.2514, "rewards/accuracies": 0.875, "rewards/chosen": -2.4715769290924072, "rewards/margins": 3.0825605392456055, "rewards/rejected": -5.554137229919434, "step": 2043 }, { "epoch": 0.43, "learning_rate": 1.1495798319327732e-05, "logits/chosen": -2.1495726108551025, "logits/rejected": -1.9794659614562988, "logps/chosen": -388.66326904296875, "logps/rejected": -351.50335693359375, "loss": 0.1272, "rewards/accuracies": 1.0, "rewards/chosen": -2.5405120849609375, "rewards/margins": 3.1952106952667236, "rewards/rejected": -5.735722541809082, "step": 2044 }, { "epoch": 0.43, "learning_rate": 1.1491596638655462e-05, "logits/chosen": -2.0491394996643066, "logits/rejected": -1.9735050201416016, "logps/chosen": -263.5440673828125, "logps/rejected": -291.84661865234375, "loss": 0.3734, "rewards/accuracies": 0.875, "rewards/chosen": -2.613436698913574, "rewards/margins": 2.9057528972625732, "rewards/rejected": -5.51918888092041, "step": 2045 }, { "epoch": 0.43, "learning_rate": 1.1487394957983194e-05, "logits/chosen": -2.035627841949463, "logits/rejected": -2.0443267822265625, "logps/chosen": -329.2646484375, "logps/rejected": -437.09130859375, "loss": 0.1607, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0102715492248535, "rewards/margins": 3.5766985416412354, "rewards/rejected": -5.586970329284668, "step": 2046 }, { "epoch": 0.43, "learning_rate": 1.1483193277310924e-05, "logits/chosen": -1.938293695449829, "logits/rejected": -2.2565715312957764, "logps/chosen": -251.23255920410156, "logps/rejected": -378.9906311035156, "loss": 0.5835, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1010117530822754, "rewards/margins": 2.8104801177978516, "rewards/rejected": -5.911492347717285, "step": 2047 }, { "epoch": 0.43, "learning_rate": 1.1478991596638656e-05, "logits/chosen": -1.8902827501296997, "logits/rejected": -2.066514730453491, "logps/chosen": -278.21295166015625, "logps/rejected": -309.6601867675781, "loss": 0.1273, "rewards/accuracies": 1.0, "rewards/chosen": -2.574343204498291, "rewards/margins": 4.671230792999268, "rewards/rejected": -7.245573997497559, "step": 2048 }, { "epoch": 0.43, "learning_rate": 1.1474789915966386e-05, "logits/chosen": -2.1475770473480225, "logits/rejected": -1.978366732597351, "logps/chosen": -324.9825439453125, "logps/rejected": -328.5615234375, "loss": 0.5464, "rewards/accuracies": 0.6875, "rewards/chosen": -2.5895750522613525, "rewards/margins": 2.695735216140747, "rewards/rejected": -5.2853102684021, "step": 2049 }, { "epoch": 0.43, "learning_rate": 1.1470588235294118e-05, "logits/chosen": -2.171236991882324, "logits/rejected": -2.039135694503784, "logps/chosen": -287.06365966796875, "logps/rejected": -313.1839599609375, "loss": 0.2787, "rewards/accuracies": 0.8125, "rewards/chosen": -2.255889415740967, "rewards/margins": 4.388477325439453, "rewards/rejected": -6.644366264343262, "step": 2050 }, { "epoch": 0.43, "learning_rate": 1.1466386554621849e-05, "logits/chosen": -2.3846218585968018, "logits/rejected": -2.0171148777008057, "logps/chosen": -430.9451904296875, "logps/rejected": -428.7223815917969, "loss": 0.4191, "rewards/accuracies": 0.875, "rewards/chosen": -2.113434076309204, "rewards/margins": 2.795745849609375, "rewards/rejected": -4.9091796875, "step": 2051 }, { "epoch": 0.43, "learning_rate": 1.146218487394958e-05, "logits/chosen": -2.164930820465088, "logits/rejected": -1.7924004793167114, "logps/chosen": -345.96697998046875, "logps/rejected": -243.38168334960938, "loss": 0.2555, "rewards/accuracies": 0.875, "rewards/chosen": -2.1616978645324707, "rewards/margins": 3.265957832336426, "rewards/rejected": -5.4276556968688965, "step": 2052 }, { "epoch": 0.43, "learning_rate": 1.145798319327731e-05, "logits/chosen": -1.9983105659484863, "logits/rejected": -2.1830124855041504, "logps/chosen": -300.07928466796875, "logps/rejected": -378.98406982421875, "loss": 0.2549, "rewards/accuracies": 0.875, "rewards/chosen": -2.333488941192627, "rewards/margins": 3.7451555728912354, "rewards/rejected": -6.078644275665283, "step": 2053 }, { "epoch": 0.43, "learning_rate": 1.1453781512605043e-05, "logits/chosen": -1.919573187828064, "logits/rejected": -2.1717214584350586, "logps/chosen": -226.31471252441406, "logps/rejected": -268.5181884765625, "loss": 0.6907, "rewards/accuracies": 0.6875, "rewards/chosen": -2.896489381790161, "rewards/margins": 1.5752509832382202, "rewards/rejected": -4.47174072265625, "step": 2054 }, { "epoch": 0.43, "learning_rate": 1.1449579831932773e-05, "logits/chosen": -2.1140739917755127, "logits/rejected": -2.202899932861328, "logps/chosen": -365.08599853515625, "logps/rejected": -420.6261291503906, "loss": 0.3417, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3386406898498535, "rewards/margins": 2.8463761806488037, "rewards/rejected": -6.185017108917236, "step": 2055 }, { "epoch": 0.43, "learning_rate": 1.1445378151260505e-05, "logits/chosen": -1.8200733661651611, "logits/rejected": -2.191620111465454, "logps/chosen": -282.4912109375, "logps/rejected": -390.8856201171875, "loss": 0.3245, "rewards/accuracies": 0.875, "rewards/chosen": -3.0334620475769043, "rewards/margins": 3.3059329986572266, "rewards/rejected": -6.339395046234131, "step": 2056 }, { "epoch": 0.43, "learning_rate": 1.1441176470588235e-05, "logits/chosen": -2.20247483253479, "logits/rejected": -2.1901540756225586, "logps/chosen": -292.2968444824219, "logps/rejected": -309.931884765625, "loss": 0.2491, "rewards/accuracies": 0.875, "rewards/chosen": -1.9867068529129028, "rewards/margins": 3.555753231048584, "rewards/rejected": -5.542459964752197, "step": 2057 }, { "epoch": 0.43, "learning_rate": 1.1436974789915967e-05, "logits/chosen": -2.0039258003234863, "logits/rejected": -1.9227045774459839, "logps/chosen": -250.51931762695312, "logps/rejected": -339.1800537109375, "loss": 0.2171, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2139534950256348, "rewards/margins": 3.0042152404785156, "rewards/rejected": -6.218169212341309, "step": 2058 }, { "epoch": 0.43, "learning_rate": 1.1432773109243697e-05, "logits/chosen": -1.9491963386535645, "logits/rejected": -2.0885331630706787, "logps/chosen": -247.37596130371094, "logps/rejected": -319.37261962890625, "loss": 0.404, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2546679973602295, "rewards/margins": 3.4546799659729004, "rewards/rejected": -5.709348201751709, "step": 2059 }, { "epoch": 0.43, "learning_rate": 1.1428571428571429e-05, "logits/chosen": -2.337691068649292, "logits/rejected": -1.9971985816955566, "logps/chosen": -377.20257568359375, "logps/rejected": -356.19903564453125, "loss": 0.2222, "rewards/accuracies": 0.875, "rewards/chosen": -2.640131950378418, "rewards/margins": 3.9922690391540527, "rewards/rejected": -6.632400989532471, "step": 2060 }, { "epoch": 0.43, "learning_rate": 1.142436974789916e-05, "logits/chosen": -1.8078107833862305, "logits/rejected": -1.9563109874725342, "logps/chosen": -343.4014892578125, "logps/rejected": -369.037109375, "loss": 0.4549, "rewards/accuracies": 0.75, "rewards/chosen": -2.637943983078003, "rewards/margins": 2.3987245559692383, "rewards/rejected": -5.03666877746582, "step": 2061 }, { "epoch": 0.43, "learning_rate": 1.1420168067226891e-05, "logits/chosen": -2.0936930179595947, "logits/rejected": -1.9929474592208862, "logps/chosen": -422.263671875, "logps/rejected": -383.9462890625, "loss": 0.324, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1107659339904785, "rewards/margins": 3.9146995544433594, "rewards/rejected": -6.025465488433838, "step": 2062 }, { "epoch": 0.43, "learning_rate": 1.1415966386554621e-05, "logits/chosen": -2.2708230018615723, "logits/rejected": -1.8042092323303223, "logps/chosen": -339.3675842285156, "logps/rejected": -272.2579345703125, "loss": 0.3843, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3816967010498047, "rewards/margins": 4.644870281219482, "rewards/rejected": -7.026566982269287, "step": 2063 }, { "epoch": 0.43, "learning_rate": 1.1411764705882353e-05, "logits/chosen": -2.100377082824707, "logits/rejected": -1.805079460144043, "logps/chosen": -436.5498046875, "logps/rejected": -280.3676452636719, "loss": 0.233, "rewards/accuracies": 0.875, "rewards/chosen": -1.8288683891296387, "rewards/margins": 3.222707986831665, "rewards/rejected": -5.051576614379883, "step": 2064 }, { "epoch": 0.43, "learning_rate": 1.1407563025210085e-05, "logits/chosen": -2.2598533630371094, "logits/rejected": -1.8235297203063965, "logps/chosen": -359.3331298828125, "logps/rejected": -332.5981750488281, "loss": 0.1902, "rewards/accuracies": 0.9375, "rewards/chosen": -2.383780002593994, "rewards/margins": 4.61159610748291, "rewards/rejected": -6.9953765869140625, "step": 2065 }, { "epoch": 0.43, "learning_rate": 1.1403361344537815e-05, "logits/chosen": -2.011625051498413, "logits/rejected": -2.0912132263183594, "logps/chosen": -266.5169372558594, "logps/rejected": -274.8509216308594, "loss": 0.1585, "rewards/accuracies": 0.9375, "rewards/chosen": -2.510806083679199, "rewards/margins": 3.6269052028656006, "rewards/rejected": -6.137711524963379, "step": 2066 }, { "epoch": 0.43, "learning_rate": 1.1399159663865547e-05, "logits/chosen": -2.01057767868042, "logits/rejected": -1.691219687461853, "logps/chosen": -380.3301696777344, "logps/rejected": -364.795166015625, "loss": 0.0959, "rewards/accuracies": 1.0, "rewards/chosen": -1.495618224143982, "rewards/margins": 4.659554481506348, "rewards/rejected": -6.155173301696777, "step": 2067 }, { "epoch": 0.43, "learning_rate": 1.1394957983193278e-05, "logits/chosen": -2.1961684226989746, "logits/rejected": -2.151221513748169, "logps/chosen": -290.4610290527344, "logps/rejected": -376.06927490234375, "loss": 0.6426, "rewards/accuracies": 0.75, "rewards/chosen": -3.5207271575927734, "rewards/margins": 2.2443108558654785, "rewards/rejected": -5.765038013458252, "step": 2068 }, { "epoch": 0.43, "learning_rate": 1.139075630252101e-05, "logits/chosen": -1.9307758808135986, "logits/rejected": -1.6932154893875122, "logps/chosen": -250.8085174560547, "logps/rejected": -266.97637939453125, "loss": 0.4998, "rewards/accuracies": 0.875, "rewards/chosen": -2.792975902557373, "rewards/margins": 3.3512892723083496, "rewards/rejected": -6.144266128540039, "step": 2069 }, { "epoch": 0.43, "learning_rate": 1.138655462184874e-05, "logits/chosen": -2.2497262954711914, "logits/rejected": -2.2222139835357666, "logps/chosen": -398.31011962890625, "logps/rejected": -472.5359191894531, "loss": 0.3235, "rewards/accuracies": 0.875, "rewards/chosen": -2.7597455978393555, "rewards/margins": 4.6423258781433105, "rewards/rejected": -7.402071952819824, "step": 2070 }, { "epoch": 0.43, "learning_rate": 1.1382352941176472e-05, "logits/chosen": -1.9763514995574951, "logits/rejected": -1.9554386138916016, "logps/chosen": -344.38006591796875, "logps/rejected": -343.58050537109375, "loss": 0.2937, "rewards/accuracies": 0.875, "rewards/chosen": -2.546339273452759, "rewards/margins": 3.6334803104400635, "rewards/rejected": -6.1798200607299805, "step": 2071 }, { "epoch": 0.43, "learning_rate": 1.1378151260504202e-05, "logits/chosen": -2.1349334716796875, "logits/rejected": -2.1206350326538086, "logps/chosen": -313.98760986328125, "logps/rejected": -341.19451904296875, "loss": 0.37, "rewards/accuracies": 0.6875, "rewards/chosen": -2.2789769172668457, "rewards/margins": 4.54546594619751, "rewards/rejected": -6.8244428634643555, "step": 2072 }, { "epoch": 0.43, "learning_rate": 1.1373949579831934e-05, "logits/chosen": -2.3179574012756348, "logits/rejected": -1.8872590065002441, "logps/chosen": -340.9142150878906, "logps/rejected": -356.84735107421875, "loss": 0.6791, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6295087337493896, "rewards/margins": 2.722949743270874, "rewards/rejected": -5.352458953857422, "step": 2073 }, { "epoch": 0.43, "learning_rate": 1.1369747899159664e-05, "logits/chosen": -2.2450451850891113, "logits/rejected": -1.8917794227600098, "logps/chosen": -260.1798095703125, "logps/rejected": -357.92303466796875, "loss": 0.1589, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1378841400146484, "rewards/margins": 5.381170272827148, "rewards/rejected": -7.519054412841797, "step": 2074 }, { "epoch": 0.43, "learning_rate": 1.1365546218487396e-05, "logits/chosen": -2.0596959590911865, "logits/rejected": -1.9514442682266235, "logps/chosen": -301.93731689453125, "logps/rejected": -374.26031494140625, "loss": 0.228, "rewards/accuracies": 0.875, "rewards/chosen": -2.668936014175415, "rewards/margins": 4.4316558837890625, "rewards/rejected": -7.100592613220215, "step": 2075 }, { "epoch": 0.43, "learning_rate": 1.1361344537815126e-05, "logits/chosen": -1.8287434577941895, "logits/rejected": -1.4872750043869019, "logps/chosen": -307.71392822265625, "logps/rejected": -246.12623596191406, "loss": 1.5487, "rewards/accuracies": 0.625, "rewards/chosen": -3.520806074142456, "rewards/margins": 1.386871576309204, "rewards/rejected": -4.90767765045166, "step": 2076 }, { "epoch": 0.43, "learning_rate": 1.1357142857142858e-05, "logits/chosen": -2.097133159637451, "logits/rejected": -2.046030044555664, "logps/chosen": -286.571044921875, "logps/rejected": -301.0132141113281, "loss": 0.3351, "rewards/accuracies": 0.75, "rewards/chosen": -2.337228775024414, "rewards/margins": 3.833244800567627, "rewards/rejected": -6.170473575592041, "step": 2077 }, { "epoch": 0.43, "learning_rate": 1.1352941176470588e-05, "logits/chosen": -2.148763656616211, "logits/rejected": -2.0681514739990234, "logps/chosen": -237.3824462890625, "logps/rejected": -325.4210510253906, "loss": 0.3731, "rewards/accuracies": 0.75, "rewards/chosen": -2.7078304290771484, "rewards/margins": 4.451881408691406, "rewards/rejected": -7.159711837768555, "step": 2078 }, { "epoch": 0.43, "learning_rate": 1.134873949579832e-05, "logits/chosen": -1.891608715057373, "logits/rejected": -1.807741641998291, "logps/chosen": -314.13287353515625, "logps/rejected": -319.89892578125, "loss": 0.4133, "rewards/accuracies": 0.75, "rewards/chosen": -3.19529390335083, "rewards/margins": 2.7128686904907227, "rewards/rejected": -5.9081621170043945, "step": 2079 }, { "epoch": 0.44, "learning_rate": 1.134453781512605e-05, "logits/chosen": -2.2419192790985107, "logits/rejected": -1.522952675819397, "logps/chosen": -299.9716796875, "logps/rejected": -298.77581787109375, "loss": 0.5774, "rewards/accuracies": 0.75, "rewards/chosen": -3.1659088134765625, "rewards/margins": 2.2226810455322266, "rewards/rejected": -5.388589859008789, "step": 2080 }, { "epoch": 0.44, "learning_rate": 1.1340336134453782e-05, "logits/chosen": -2.11307954788208, "logits/rejected": -1.7963145971298218, "logps/chosen": -409.2318420410156, "logps/rejected": -387.35028076171875, "loss": 0.7589, "rewards/accuracies": 0.75, "rewards/chosen": -2.7387428283691406, "rewards/margins": 2.3906798362731934, "rewards/rejected": -5.129422187805176, "step": 2081 }, { "epoch": 0.44, "learning_rate": 1.1336134453781513e-05, "logits/chosen": -2.056333065032959, "logits/rejected": -1.7021454572677612, "logps/chosen": -266.5394287109375, "logps/rejected": -356.16705322265625, "loss": 0.3234, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8668999671936035, "rewards/margins": 3.9565298557281494, "rewards/rejected": -6.823429107666016, "step": 2082 }, { "epoch": 0.44, "learning_rate": 1.1331932773109244e-05, "logits/chosen": -2.128643035888672, "logits/rejected": -2.211228847503662, "logps/chosen": -265.26055908203125, "logps/rejected": -286.2271423339844, "loss": 0.4343, "rewards/accuracies": 0.875, "rewards/chosen": -3.1598892211914062, "rewards/margins": 3.7516732215881348, "rewards/rejected": -6.911562919616699, "step": 2083 }, { "epoch": 0.44, "learning_rate": 1.1327731092436975e-05, "logits/chosen": -1.9741413593292236, "logits/rejected": -1.753514289855957, "logps/chosen": -282.8174743652344, "logps/rejected": -285.58099365234375, "loss": 0.2032, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6921117305755615, "rewards/margins": 4.4303131103515625, "rewards/rejected": -6.122424602508545, "step": 2084 }, { "epoch": 0.44, "learning_rate": 1.1323529411764707e-05, "logits/chosen": -1.9598090648651123, "logits/rejected": -1.7841306924819946, "logps/chosen": -267.2647399902344, "logps/rejected": -262.5185852050781, "loss": 0.2939, "rewards/accuracies": 0.875, "rewards/chosen": -2.8312489986419678, "rewards/margins": 3.528136968612671, "rewards/rejected": -6.359385967254639, "step": 2085 }, { "epoch": 0.44, "learning_rate": 1.1319327731092439e-05, "logits/chosen": -2.2115390300750732, "logits/rejected": -2.172428846359253, "logps/chosen": -329.2820129394531, "logps/rejected": -415.5566711425781, "loss": 0.3283, "rewards/accuracies": 0.875, "rewards/chosen": -2.2381389141082764, "rewards/margins": 3.159836769104004, "rewards/rejected": -5.397975921630859, "step": 2086 }, { "epoch": 0.44, "learning_rate": 1.1315126050420169e-05, "logits/chosen": -2.237715244293213, "logits/rejected": -2.0016987323760986, "logps/chosen": -263.94195556640625, "logps/rejected": -311.2554931640625, "loss": 0.289, "rewards/accuracies": 0.75, "rewards/chosen": -2.5013856887817383, "rewards/margins": 5.073391914367676, "rewards/rejected": -7.574777126312256, "step": 2087 }, { "epoch": 0.44, "learning_rate": 1.13109243697479e-05, "logits/chosen": -2.374457359313965, "logits/rejected": -1.9431941509246826, "logps/chosen": -539.1976318359375, "logps/rejected": -495.11175537109375, "loss": 0.0688, "rewards/accuracies": 1.0, "rewards/chosen": -1.0853111743927002, "rewards/margins": 5.777039051055908, "rewards/rejected": -6.862349987030029, "step": 2088 }, { "epoch": 0.44, "learning_rate": 1.1306722689075631e-05, "logits/chosen": -1.7959283590316772, "logits/rejected": -1.8263540267944336, "logps/chosen": -394.26483154296875, "logps/rejected": -279.94378662109375, "loss": 0.6829, "rewards/accuracies": 0.8125, "rewards/chosen": -3.368159294128418, "rewards/margins": 2.0748109817504883, "rewards/rejected": -5.442970275878906, "step": 2089 }, { "epoch": 0.44, "learning_rate": 1.1302521008403363e-05, "logits/chosen": -2.4007978439331055, "logits/rejected": -1.8503947257995605, "logps/chosen": -430.58837890625, "logps/rejected": -389.11370849609375, "loss": 0.1725, "rewards/accuracies": 0.9375, "rewards/chosen": -2.239142417907715, "rewards/margins": 3.3423779010772705, "rewards/rejected": -5.581520080566406, "step": 2090 }, { "epoch": 0.44, "learning_rate": 1.1298319327731093e-05, "logits/chosen": -2.0770785808563232, "logits/rejected": -2.238295555114746, "logps/chosen": -210.31890869140625, "logps/rejected": -340.9302673339844, "loss": 0.4006, "rewards/accuracies": 0.75, "rewards/chosen": -2.704918384552002, "rewards/margins": 3.581852912902832, "rewards/rejected": -6.286771774291992, "step": 2091 }, { "epoch": 0.44, "learning_rate": 1.1294117647058825e-05, "logits/chosen": -1.7465485334396362, "logits/rejected": -2.1249797344207764, "logps/chosen": -197.56185913085938, "logps/rejected": -380.1848449707031, "loss": 0.1264, "rewards/accuracies": 1.0, "rewards/chosen": -2.786294937133789, "rewards/margins": 4.378789901733398, "rewards/rejected": -7.165083885192871, "step": 2092 }, { "epoch": 0.44, "learning_rate": 1.1289915966386555e-05, "logits/chosen": -1.9650239944458008, "logits/rejected": -1.7887494564056396, "logps/chosen": -327.19775390625, "logps/rejected": -273.880615234375, "loss": 0.6284, "rewards/accuracies": 0.75, "rewards/chosen": -2.466172218322754, "rewards/margins": 2.816950798034668, "rewards/rejected": -5.283123016357422, "step": 2093 }, { "epoch": 0.44, "learning_rate": 1.1285714285714287e-05, "logits/chosen": -2.035176992416382, "logits/rejected": -1.6705471277236938, "logps/chosen": -230.17788696289062, "logps/rejected": -295.9740295410156, "loss": 0.1913, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7242836952209473, "rewards/margins": 3.9833855628967285, "rewards/rejected": -6.707669734954834, "step": 2094 }, { "epoch": 0.44, "learning_rate": 1.1281512605042017e-05, "logits/chosen": -2.362687349319458, "logits/rejected": -2.008629322052002, "logps/chosen": -354.7229309082031, "logps/rejected": -328.02349853515625, "loss": 0.6579, "rewards/accuracies": 0.75, "rewards/chosen": -2.88460111618042, "rewards/margins": 3.4469523429870605, "rewards/rejected": -6.3315534591674805, "step": 2095 }, { "epoch": 0.44, "learning_rate": 1.127731092436975e-05, "logits/chosen": -1.960052251815796, "logits/rejected": -1.850200891494751, "logps/chosen": -277.55389404296875, "logps/rejected": -382.36468505859375, "loss": 0.1642, "rewards/accuracies": 0.9375, "rewards/chosen": -2.284581184387207, "rewards/margins": 3.7295641899108887, "rewards/rejected": -6.014145374298096, "step": 2096 }, { "epoch": 0.44, "learning_rate": 1.127310924369748e-05, "logits/chosen": -2.088284492492676, "logits/rejected": -1.6026382446289062, "logps/chosen": -403.4964904785156, "logps/rejected": -332.2255859375, "loss": 0.1538, "rewards/accuracies": 0.875, "rewards/chosen": -2.50394868850708, "rewards/margins": 4.2063798904418945, "rewards/rejected": -6.710328578948975, "step": 2097 }, { "epoch": 0.44, "learning_rate": 1.1268907563025211e-05, "logits/chosen": -2.0808637142181396, "logits/rejected": -2.164285182952881, "logps/chosen": -346.36553955078125, "logps/rejected": -343.06097412109375, "loss": 0.1462, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0551834106445312, "rewards/margins": 3.9173378944396973, "rewards/rejected": -5.97252082824707, "step": 2098 }, { "epoch": 0.44, "learning_rate": 1.1264705882352942e-05, "logits/chosen": -2.0195608139038086, "logits/rejected": -1.7941190004348755, "logps/chosen": -264.0512390136719, "logps/rejected": -266.022216796875, "loss": 0.3801, "rewards/accuracies": 0.8125, "rewards/chosen": -2.468585968017578, "rewards/margins": 3.57623291015625, "rewards/rejected": -6.044818878173828, "step": 2099 }, { "epoch": 0.44, "learning_rate": 1.1260504201680673e-05, "logits/chosen": -2.32964825630188, "logits/rejected": -1.603450894355774, "logps/chosen": -410.2392578125, "logps/rejected": -377.18988037109375, "loss": 0.371, "rewards/accuracies": 0.875, "rewards/chosen": -1.9095793962478638, "rewards/margins": 4.100839138031006, "rewards/rejected": -6.010417938232422, "step": 2100 }, { "epoch": 0.44, "learning_rate": 1.1256302521008404e-05, "logits/chosen": -2.290189743041992, "logits/rejected": -2.090519905090332, "logps/chosen": -273.4089660644531, "logps/rejected": -293.29266357421875, "loss": 0.1521, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7961888313293457, "rewards/margins": 4.478764057159424, "rewards/rejected": -6.2749528884887695, "step": 2101 }, { "epoch": 0.44, "learning_rate": 1.1252100840336136e-05, "logits/chosen": -2.324431896209717, "logits/rejected": -2.2318320274353027, "logps/chosen": -268.22857666015625, "logps/rejected": -277.08648681640625, "loss": 0.2325, "rewards/accuracies": 0.8125, "rewards/chosen": -1.608604073524475, "rewards/margins": 3.6065711975097656, "rewards/rejected": -5.215175151824951, "step": 2102 }, { "epoch": 0.44, "learning_rate": 1.1247899159663866e-05, "logits/chosen": -1.8101016283035278, "logits/rejected": -1.7072787284851074, "logps/chosen": -216.84495544433594, "logps/rejected": -259.4885559082031, "loss": 0.3058, "rewards/accuracies": 0.75, "rewards/chosen": -1.8696796894073486, "rewards/margins": 4.031265735626221, "rewards/rejected": -5.900945663452148, "step": 2103 }, { "epoch": 0.44, "learning_rate": 1.1243697478991598e-05, "logits/chosen": -1.866039752960205, "logits/rejected": -2.0375428199768066, "logps/chosen": -332.8404235839844, "logps/rejected": -310.45416259765625, "loss": 0.4658, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1105406284332275, "rewards/margins": 4.041823387145996, "rewards/rejected": -6.152364253997803, "step": 2104 }, { "epoch": 0.44, "learning_rate": 1.1239495798319328e-05, "logits/chosen": -2.2641921043395996, "logits/rejected": -2.3262248039245605, "logps/chosen": -336.8399353027344, "logps/rejected": -428.47216796875, "loss": 0.2122, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3530216217041016, "rewards/margins": 4.220526695251465, "rewards/rejected": -6.573548316955566, "step": 2105 }, { "epoch": 0.44, "learning_rate": 1.123529411764706e-05, "logits/chosen": -2.3589253425598145, "logits/rejected": -1.8773760795593262, "logps/chosen": -250.96490478515625, "logps/rejected": -235.406982421875, "loss": 0.3002, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2207822799682617, "rewards/margins": 4.55903959274292, "rewards/rejected": -6.779821872711182, "step": 2106 }, { "epoch": 0.44, "learning_rate": 1.123109243697479e-05, "logits/chosen": -1.6518869400024414, "logits/rejected": -1.560832142829895, "logps/chosen": -392.82037353515625, "logps/rejected": -301.0335388183594, "loss": 0.1628, "rewards/accuracies": 0.9375, "rewards/chosen": -2.157667875289917, "rewards/margins": 4.188111782073975, "rewards/rejected": -6.3457794189453125, "step": 2107 }, { "epoch": 0.44, "learning_rate": 1.1226890756302522e-05, "logits/chosen": -1.927270770072937, "logits/rejected": -2.0700478553771973, "logps/chosen": -218.9384002685547, "logps/rejected": -491.71588134765625, "loss": 0.2734, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5728435516357422, "rewards/margins": 4.213078022003174, "rewards/rejected": -5.785921573638916, "step": 2108 }, { "epoch": 0.44, "learning_rate": 1.1222689075630254e-05, "logits/chosen": -2.2030696868896484, "logits/rejected": -2.2147703170776367, "logps/chosen": -336.65985107421875, "logps/rejected": -443.0171203613281, "loss": 0.8827, "rewards/accuracies": 0.8125, "rewards/chosen": -2.138679027557373, "rewards/margins": 1.965569019317627, "rewards/rejected": -4.104248046875, "step": 2109 }, { "epoch": 0.44, "learning_rate": 1.1218487394957984e-05, "logits/chosen": -2.1268224716186523, "logits/rejected": -2.117046594619751, "logps/chosen": -243.7693328857422, "logps/rejected": -322.23895263671875, "loss": 0.3461, "rewards/accuracies": 0.875, "rewards/chosen": -1.841224193572998, "rewards/margins": 4.569146156311035, "rewards/rejected": -6.410369873046875, "step": 2110 }, { "epoch": 0.44, "learning_rate": 1.1214285714285716e-05, "logits/chosen": -1.9013772010803223, "logits/rejected": -1.8576560020446777, "logps/chosen": -400.218994140625, "logps/rejected": -378.9980163574219, "loss": 0.3474, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2907309532165527, "rewards/margins": 2.286616802215576, "rewards/rejected": -4.577347755432129, "step": 2111 }, { "epoch": 0.44, "learning_rate": 1.1210084033613446e-05, "logits/chosen": -2.185497760772705, "logits/rejected": -1.4457454681396484, "logps/chosen": -395.5505676269531, "logps/rejected": -306.52978515625, "loss": 0.2547, "rewards/accuracies": 0.875, "rewards/chosen": -1.6021113395690918, "rewards/margins": 3.7774672508239746, "rewards/rejected": -5.379578590393066, "step": 2112 }, { "epoch": 0.44, "learning_rate": 1.1205882352941178e-05, "logits/chosen": -2.0680925846099854, "logits/rejected": -2.034123420715332, "logps/chosen": -317.09588623046875, "logps/rejected": -293.72576904296875, "loss": 0.3705, "rewards/accuracies": 0.75, "rewards/chosen": -1.2642791271209717, "rewards/margins": 3.343371868133545, "rewards/rejected": -4.6076507568359375, "step": 2113 }, { "epoch": 0.44, "learning_rate": 1.1201680672268908e-05, "logits/chosen": -2.0487797260284424, "logits/rejected": -1.839284896850586, "logps/chosen": -399.13446044921875, "logps/rejected": -408.6397705078125, "loss": 0.1207, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1105506420135498, "rewards/margins": 4.683767318725586, "rewards/rejected": -5.794317245483398, "step": 2114 }, { "epoch": 0.44, "learning_rate": 1.119747899159664e-05, "logits/chosen": -2.248800039291382, "logits/rejected": -1.820990800857544, "logps/chosen": -327.5166015625, "logps/rejected": -275.72821044921875, "loss": 0.3841, "rewards/accuracies": 0.875, "rewards/chosen": -2.0566530227661133, "rewards/margins": 3.134023666381836, "rewards/rejected": -5.190676689147949, "step": 2115 }, { "epoch": 0.44, "learning_rate": 1.119327731092437e-05, "logits/chosen": -2.3500242233276367, "logits/rejected": -2.351048469543457, "logps/chosen": -288.04998779296875, "logps/rejected": -322.3257141113281, "loss": 0.6152, "rewards/accuracies": 0.75, "rewards/chosen": -2.28229022026062, "rewards/margins": 2.140395402908325, "rewards/rejected": -4.422685623168945, "step": 2116 }, { "epoch": 0.44, "learning_rate": 1.1189075630252102e-05, "logits/chosen": -2.2064483165740967, "logits/rejected": -2.0624213218688965, "logps/chosen": -224.48138427734375, "logps/rejected": -266.416748046875, "loss": 0.2117, "rewards/accuracies": 0.875, "rewards/chosen": -1.9427436590194702, "rewards/margins": 4.758347511291504, "rewards/rejected": -6.7010908126831055, "step": 2117 }, { "epoch": 0.44, "learning_rate": 1.1184873949579833e-05, "logits/chosen": -1.919853925704956, "logits/rejected": -2.0803310871124268, "logps/chosen": -405.0791931152344, "logps/rejected": -391.9491882324219, "loss": 0.2733, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6223969459533691, "rewards/margins": 3.929147720336914, "rewards/rejected": -5.551544189453125, "step": 2118 }, { "epoch": 0.44, "learning_rate": 1.1180672268907565e-05, "logits/chosen": -2.158562660217285, "logits/rejected": -2.0884718894958496, "logps/chosen": -245.0634002685547, "logps/rejected": -270.0154113769531, "loss": 0.6614, "rewards/accuracies": 0.6875, "rewards/chosen": -2.7728772163391113, "rewards/margins": 1.5978729724884033, "rewards/rejected": -4.3707499504089355, "step": 2119 }, { "epoch": 0.44, "learning_rate": 1.1176470588235295e-05, "logits/chosen": -2.3036646842956543, "logits/rejected": -1.7940211296081543, "logps/chosen": -395.62701416015625, "logps/rejected": -282.34710693359375, "loss": 0.3757, "rewards/accuracies": 0.75, "rewards/chosen": -1.5422285795211792, "rewards/margins": 2.4022321701049805, "rewards/rejected": -3.944460868835449, "step": 2120 }, { "epoch": 0.44, "learning_rate": 1.1172268907563027e-05, "logits/chosen": -2.3628814220428467, "logits/rejected": -1.8830676078796387, "logps/chosen": -296.4643249511719, "logps/rejected": -305.5382385253906, "loss": 0.2552, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4281736612319946, "rewards/margins": 3.2428548336029053, "rewards/rejected": -4.6710286140441895, "step": 2121 }, { "epoch": 0.44, "learning_rate": 1.1168067226890757e-05, "logits/chosen": -2.156634569168091, "logits/rejected": -2.038304328918457, "logps/chosen": -284.01422119140625, "logps/rejected": -334.09405517578125, "loss": 0.6048, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1160361766815186, "rewards/margins": 3.0303492546081543, "rewards/rejected": -5.146385192871094, "step": 2122 }, { "epoch": 0.44, "learning_rate": 1.1163865546218489e-05, "logits/chosen": -2.6387832164764404, "logits/rejected": -2.0389580726623535, "logps/chosen": -356.20269775390625, "logps/rejected": -249.40695190429688, "loss": 0.5059, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8262940645217896, "rewards/margins": 2.527844190597534, "rewards/rejected": -4.354138374328613, "step": 2123 }, { "epoch": 0.44, "learning_rate": 1.1159663865546219e-05, "logits/chosen": -2.007159948348999, "logits/rejected": -2.019857883453369, "logps/chosen": -324.50885009765625, "logps/rejected": -446.41827392578125, "loss": 0.2346, "rewards/accuracies": 0.8125, "rewards/chosen": -1.0337823629379272, "rewards/margins": 4.590088367462158, "rewards/rejected": -5.623870849609375, "step": 2124 }, { "epoch": 0.44, "learning_rate": 1.1155462184873951e-05, "logits/chosen": -2.3467788696289062, "logits/rejected": -1.712422251701355, "logps/chosen": -376.94354248046875, "logps/rejected": -292.44635009765625, "loss": 0.118, "rewards/accuracies": 0.9375, "rewards/chosen": -0.9205886125564575, "rewards/margins": 5.446073532104492, "rewards/rejected": -6.366662502288818, "step": 2125 }, { "epoch": 0.44, "learning_rate": 1.1151260504201681e-05, "logits/chosen": -2.0627143383026123, "logits/rejected": -2.0900228023529053, "logps/chosen": -253.41395568847656, "logps/rejected": -315.8117980957031, "loss": 0.3165, "rewards/accuracies": 0.875, "rewards/chosen": -1.6969473361968994, "rewards/margins": 3.3166635036468506, "rewards/rejected": -5.01361083984375, "step": 2126 }, { "epoch": 0.44, "learning_rate": 1.1147058823529413e-05, "logits/chosen": -2.214221239089966, "logits/rejected": -2.2162487506866455, "logps/chosen": -315.79638671875, "logps/rejected": -335.0633544921875, "loss": 0.2653, "rewards/accuracies": 0.875, "rewards/chosen": -1.503174066543579, "rewards/margins": 3.8282437324523926, "rewards/rejected": -5.331417083740234, "step": 2127 }, { "epoch": 0.45, "learning_rate": 1.1142857142857143e-05, "logits/chosen": -2.1454780101776123, "logits/rejected": -2.429685115814209, "logps/chosen": -271.5894470214844, "logps/rejected": -331.1427001953125, "loss": 0.2079, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8967690467834473, "rewards/margins": 4.363072395324707, "rewards/rejected": -6.259840488433838, "step": 2128 }, { "epoch": 0.45, "learning_rate": 1.1138655462184875e-05, "logits/chosen": -2.0567209720611572, "logits/rejected": -1.8903017044067383, "logps/chosen": -221.18536376953125, "logps/rejected": -244.03857421875, "loss": 0.2921, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0792124271392822, "rewards/margins": 3.994507312774658, "rewards/rejected": -6.0737199783325195, "step": 2129 }, { "epoch": 0.45, "learning_rate": 1.1134453781512606e-05, "logits/chosen": -2.1531128883361816, "logits/rejected": -1.928863286972046, "logps/chosen": -351.0466613769531, "logps/rejected": -335.0560607910156, "loss": 0.3611, "rewards/accuracies": 0.8125, "rewards/chosen": -1.4166717529296875, "rewards/margins": 2.5792300701141357, "rewards/rejected": -3.9959020614624023, "step": 2130 }, { "epoch": 0.45, "learning_rate": 1.1130252100840337e-05, "logits/chosen": -2.2557971477508545, "logits/rejected": -2.076328992843628, "logps/chosen": -354.7710266113281, "logps/rejected": -359.93536376953125, "loss": 0.1661, "rewards/accuracies": 0.9375, "rewards/chosen": -2.266421318054199, "rewards/margins": 3.2916550636291504, "rewards/rejected": -5.55807638168335, "step": 2131 }, { "epoch": 0.45, "learning_rate": 1.112605042016807e-05, "logits/chosen": -2.0067272186279297, "logits/rejected": -1.946378231048584, "logps/chosen": -340.98028564453125, "logps/rejected": -387.01654052734375, "loss": 0.5698, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6974260807037354, "rewards/margins": 2.0739426612854004, "rewards/rejected": -3.771368980407715, "step": 2132 }, { "epoch": 0.45, "learning_rate": 1.11218487394958e-05, "logits/chosen": -2.0942511558532715, "logits/rejected": -1.7318954467773438, "logps/chosen": -278.7694396972656, "logps/rejected": -230.17576599121094, "loss": 0.2503, "rewards/accuracies": 0.9375, "rewards/chosen": -2.154672384262085, "rewards/margins": 3.4053964614868164, "rewards/rejected": -5.5600690841674805, "step": 2133 }, { "epoch": 0.45, "learning_rate": 1.1117647058823531e-05, "logits/chosen": -2.133626937866211, "logits/rejected": -1.4854366779327393, "logps/chosen": -320.07464599609375, "logps/rejected": -260.7422790527344, "loss": 0.2469, "rewards/accuracies": 0.875, "rewards/chosen": -1.6537978649139404, "rewards/margins": 2.7697830200195312, "rewards/rejected": -4.423581123352051, "step": 2134 }, { "epoch": 0.45, "learning_rate": 1.1113445378151262e-05, "logits/chosen": -2.3014333248138428, "logits/rejected": -1.8766543865203857, "logps/chosen": -327.1843566894531, "logps/rejected": -240.4751434326172, "loss": 0.4028, "rewards/accuracies": 0.75, "rewards/chosen": -1.9308364391326904, "rewards/margins": 2.4663476943969727, "rewards/rejected": -4.397184371948242, "step": 2135 }, { "epoch": 0.45, "learning_rate": 1.1109243697478994e-05, "logits/chosen": -2.3082189559936523, "logits/rejected": -2.0221683979034424, "logps/chosen": -433.845703125, "logps/rejected": -477.98236083984375, "loss": 0.1283, "rewards/accuracies": 0.9375, "rewards/chosen": -1.2177271842956543, "rewards/margins": 4.435600280761719, "rewards/rejected": -5.653327465057373, "step": 2136 }, { "epoch": 0.45, "learning_rate": 1.1105042016806724e-05, "logits/chosen": -2.1207849979400635, "logits/rejected": -2.1162607669830322, "logps/chosen": -477.9659423828125, "logps/rejected": -376.99041748046875, "loss": 0.5491, "rewards/accuracies": 0.75, "rewards/chosen": -1.758960485458374, "rewards/margins": 3.101200580596924, "rewards/rejected": -4.860160827636719, "step": 2137 }, { "epoch": 0.45, "learning_rate": 1.1100840336134456e-05, "logits/chosen": -2.0040762424468994, "logits/rejected": -1.7547401189804077, "logps/chosen": -222.41940307617188, "logps/rejected": -244.71957397460938, "loss": 0.2128, "rewards/accuracies": 0.875, "rewards/chosen": -1.7893508672714233, "rewards/margins": 3.536910057067871, "rewards/rejected": -5.326261043548584, "step": 2138 }, { "epoch": 0.45, "learning_rate": 1.1096638655462186e-05, "logits/chosen": -2.021898031234741, "logits/rejected": -2.126206398010254, "logps/chosen": -281.96881103515625, "logps/rejected": -422.16717529296875, "loss": 0.1347, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1842364072799683, "rewards/margins": 3.9649136066436768, "rewards/rejected": -5.149150371551514, "step": 2139 }, { "epoch": 0.45, "learning_rate": 1.1092436974789918e-05, "logits/chosen": -1.9356483221054077, "logits/rejected": -1.9482295513153076, "logps/chosen": -348.6847229003906, "logps/rejected": -339.896240234375, "loss": 0.4361, "rewards/accuracies": 0.875, "rewards/chosen": -1.6491668224334717, "rewards/margins": 3.3715784549713135, "rewards/rejected": -5.020745754241943, "step": 2140 }, { "epoch": 0.45, "learning_rate": 1.1088235294117648e-05, "logits/chosen": -1.9045846462249756, "logits/rejected": -1.9601390361785889, "logps/chosen": -254.6430206298828, "logps/rejected": -256.7317810058594, "loss": 0.4325, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2346580028533936, "rewards/margins": 1.3620989322662354, "rewards/rejected": -3.596756935119629, "step": 2141 }, { "epoch": 0.45, "learning_rate": 1.108403361344538e-05, "logits/chosen": -2.0899438858032227, "logits/rejected": -1.9953680038452148, "logps/chosen": -341.23724365234375, "logps/rejected": -350.5480651855469, "loss": 0.2085, "rewards/accuracies": 0.875, "rewards/chosen": -1.4544007778167725, "rewards/margins": 3.8809330463409424, "rewards/rejected": -5.335333824157715, "step": 2142 }, { "epoch": 0.45, "learning_rate": 1.107983193277311e-05, "logits/chosen": -1.807093620300293, "logits/rejected": -2.1782169342041016, "logps/chosen": -284.1681213378906, "logps/rejected": -350.2537536621094, "loss": 0.2749, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8590219020843506, "rewards/margins": 4.617184162139893, "rewards/rejected": -6.476205348968506, "step": 2143 }, { "epoch": 0.45, "learning_rate": 1.1075630252100842e-05, "logits/chosen": -1.9736236333847046, "logits/rejected": -1.7161903381347656, "logps/chosen": -329.498291015625, "logps/rejected": -256.70166015625, "loss": 0.1526, "rewards/accuracies": 0.9375, "rewards/chosen": -2.075540065765381, "rewards/margins": 4.444458484649658, "rewards/rejected": -6.519998550415039, "step": 2144 }, { "epoch": 0.45, "learning_rate": 1.1071428571428572e-05, "logits/chosen": -2.0499110221862793, "logits/rejected": -1.6978867053985596, "logps/chosen": -438.6824035644531, "logps/rejected": -420.48138427734375, "loss": 0.1331, "rewards/accuracies": 0.9375, "rewards/chosen": -1.453322410583496, "rewards/margins": 4.202869415283203, "rewards/rejected": -5.656192302703857, "step": 2145 }, { "epoch": 0.45, "learning_rate": 1.1067226890756304e-05, "logits/chosen": -2.0777339935302734, "logits/rejected": -1.7196691036224365, "logps/chosen": -406.0032958984375, "logps/rejected": -299.55535888671875, "loss": 0.4558, "rewards/accuracies": 0.75, "rewards/chosen": -2.3479602336883545, "rewards/margins": 2.849907398223877, "rewards/rejected": -5.197867393493652, "step": 2146 }, { "epoch": 0.45, "learning_rate": 1.1063025210084035e-05, "logits/chosen": -1.9819612503051758, "logits/rejected": -1.8589093685150146, "logps/chosen": -380.8150634765625, "logps/rejected": -316.04998779296875, "loss": 0.4447, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2182402610778809, "rewards/margins": 2.380608081817627, "rewards/rejected": -3.598848342895508, "step": 2147 }, { "epoch": 0.45, "learning_rate": 1.1058823529411766e-05, "logits/chosen": -2.1406993865966797, "logits/rejected": -1.9022096395492554, "logps/chosen": -255.685546875, "logps/rejected": -284.3907775878906, "loss": 0.2504, "rewards/accuracies": 0.875, "rewards/chosen": -2.062098503112793, "rewards/margins": 3.99426007270813, "rewards/rejected": -6.056358337402344, "step": 2148 }, { "epoch": 0.45, "learning_rate": 1.1054621848739497e-05, "logits/chosen": -2.0744566917419434, "logits/rejected": -1.6592695713043213, "logps/chosen": -414.8008117675781, "logps/rejected": -374.22576904296875, "loss": 0.541, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8700759410858154, "rewards/margins": 2.453132390975952, "rewards/rejected": -4.323208332061768, "step": 2149 }, { "epoch": 0.45, "learning_rate": 1.1050420168067229e-05, "logits/chosen": -2.2994801998138428, "logits/rejected": -2.1311941146850586, "logps/chosen": -373.1037902832031, "logps/rejected": -370.64605712890625, "loss": 0.44, "rewards/accuracies": 0.75, "rewards/chosen": -2.4412970542907715, "rewards/margins": 2.494722843170166, "rewards/rejected": -4.936020374298096, "step": 2150 }, { "epoch": 0.45, "learning_rate": 1.1046218487394959e-05, "logits/chosen": -2.1855316162109375, "logits/rejected": -1.809584617614746, "logps/chosen": -353.017578125, "logps/rejected": -376.0660400390625, "loss": 0.2531, "rewards/accuracies": 0.875, "rewards/chosen": -2.504176616668701, "rewards/margins": 3.7505970001220703, "rewards/rejected": -6.2547736167907715, "step": 2151 }, { "epoch": 0.45, "learning_rate": 1.104201680672269e-05, "logits/chosen": -2.1064038276672363, "logits/rejected": -2.291184425354004, "logps/chosen": -277.2584228515625, "logps/rejected": -320.9295654296875, "loss": 0.2134, "rewards/accuracies": 0.9375, "rewards/chosen": -1.748108983039856, "rewards/margins": 5.320732593536377, "rewards/rejected": -7.06884241104126, "step": 2152 }, { "epoch": 0.45, "learning_rate": 1.1037815126050423e-05, "logits/chosen": -1.3586311340332031, "logits/rejected": -2.143803358078003, "logps/chosen": -141.74215698242188, "logps/rejected": -416.4621887207031, "loss": 0.1852, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5869956016540527, "rewards/margins": 5.488154411315918, "rewards/rejected": -8.075149536132812, "step": 2153 }, { "epoch": 0.45, "learning_rate": 1.1033613445378153e-05, "logits/chosen": -2.063504934310913, "logits/rejected": -2.1399271488189697, "logps/chosen": -403.47430419921875, "logps/rejected": -408.6955871582031, "loss": 0.4129, "rewards/accuracies": 0.75, "rewards/chosen": -3.31896710395813, "rewards/margins": 2.337878704071045, "rewards/rejected": -5.656845569610596, "step": 2154 }, { "epoch": 0.45, "learning_rate": 1.1029411764705885e-05, "logits/chosen": -2.310912847518921, "logits/rejected": -1.835289716720581, "logps/chosen": -273.4323425292969, "logps/rejected": -296.7527770996094, "loss": 0.2315, "rewards/accuracies": 0.9375, "rewards/chosen": -2.002804756164551, "rewards/margins": 3.6010003089904785, "rewards/rejected": -5.603805065155029, "step": 2155 }, { "epoch": 0.45, "learning_rate": 1.1025210084033615e-05, "logits/chosen": -1.9482005834579468, "logits/rejected": -2.0161092281341553, "logps/chosen": -231.42431640625, "logps/rejected": -336.76190185546875, "loss": 0.2457, "rewards/accuracies": 0.9375, "rewards/chosen": -2.353275775909424, "rewards/margins": 4.855534553527832, "rewards/rejected": -7.208811283111572, "step": 2156 }, { "epoch": 0.45, "learning_rate": 1.1021008403361347e-05, "logits/chosen": -2.066718339920044, "logits/rejected": -2.2583022117614746, "logps/chosen": -256.9452819824219, "logps/rejected": -453.9226989746094, "loss": 0.3597, "rewards/accuracies": 0.875, "rewards/chosen": -2.7138071060180664, "rewards/margins": 3.7090206146240234, "rewards/rejected": -6.422828197479248, "step": 2157 }, { "epoch": 0.45, "learning_rate": 1.1016806722689077e-05, "logits/chosen": -2.4758973121643066, "logits/rejected": -1.9720137119293213, "logps/chosen": -396.96002197265625, "logps/rejected": -369.3337097167969, "loss": 0.0442, "rewards/accuracies": 1.0, "rewards/chosen": -0.8153385519981384, "rewards/margins": 5.693082332611084, "rewards/rejected": -6.508420467376709, "step": 2158 }, { "epoch": 0.45, "learning_rate": 1.1012605042016809e-05, "logits/chosen": -1.9146405458450317, "logits/rejected": -1.9971437454223633, "logps/chosen": -275.4366760253906, "logps/rejected": -303.45977783203125, "loss": 0.7363, "rewards/accuracies": 0.75, "rewards/chosen": -2.3198189735412598, "rewards/margins": 2.8516457080841064, "rewards/rejected": -5.171464920043945, "step": 2159 }, { "epoch": 0.45, "learning_rate": 1.100840336134454e-05, "logits/chosen": -2.0020523071289062, "logits/rejected": -2.1226553916931152, "logps/chosen": -315.7077941894531, "logps/rejected": -361.6723937988281, "loss": 0.6878, "rewards/accuracies": 0.8125, "rewards/chosen": -2.637564182281494, "rewards/margins": 2.7419075965881348, "rewards/rejected": -5.379471778869629, "step": 2160 }, { "epoch": 0.45, "learning_rate": 1.1004201680672271e-05, "logits/chosen": -2.408409595489502, "logits/rejected": -2.198552131652832, "logps/chosen": -246.95294189453125, "logps/rejected": -264.199462890625, "loss": 0.871, "rewards/accuracies": 0.625, "rewards/chosen": -2.5186996459960938, "rewards/margins": 1.7888046503067017, "rewards/rejected": -4.307504177093506, "step": 2161 }, { "epoch": 0.45, "learning_rate": 1.1000000000000001e-05, "logits/chosen": -1.996659755706787, "logits/rejected": -1.8724391460418701, "logps/chosen": -332.5257263183594, "logps/rejected": -390.8209228515625, "loss": 0.2259, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4656529426574707, "rewards/margins": 5.166693687438965, "rewards/rejected": -7.6323466300964355, "step": 2162 }, { "epoch": 0.45, "learning_rate": 1.0995798319327733e-05, "logits/chosen": -2.014070987701416, "logits/rejected": -1.9697198867797852, "logps/chosen": -340.2963562011719, "logps/rejected": -403.85308837890625, "loss": 0.3293, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7613236904144287, "rewards/margins": 4.447287559509277, "rewards/rejected": -7.208611488342285, "step": 2163 }, { "epoch": 0.45, "learning_rate": 1.0991596638655464e-05, "logits/chosen": -2.1590576171875, "logits/rejected": -1.6582505702972412, "logps/chosen": -426.586669921875, "logps/rejected": -315.9244689941406, "loss": 0.1607, "rewards/accuracies": 0.9375, "rewards/chosen": -2.891146659851074, "rewards/margins": 3.6502685546875, "rewards/rejected": -6.541415214538574, "step": 2164 }, { "epoch": 0.45, "learning_rate": 1.0987394957983195e-05, "logits/chosen": -2.239654541015625, "logits/rejected": -2.051011800765991, "logps/chosen": -333.54583740234375, "logps/rejected": -374.7533264160156, "loss": 0.4656, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3286242485046387, "rewards/margins": 3.1175670623779297, "rewards/rejected": -5.446191787719727, "step": 2165 }, { "epoch": 0.45, "learning_rate": 1.0983193277310926e-05, "logits/chosen": -2.227440595626831, "logits/rejected": -1.6257731914520264, "logps/chosen": -324.452392578125, "logps/rejected": -350.2322082519531, "loss": 0.4629, "rewards/accuracies": 0.875, "rewards/chosen": -2.6320247650146484, "rewards/margins": 4.669007301330566, "rewards/rejected": -7.301032066345215, "step": 2166 }, { "epoch": 0.45, "learning_rate": 1.0978991596638658e-05, "logits/chosen": -1.8249037265777588, "logits/rejected": -1.996896743774414, "logps/chosen": -430.4873046875, "logps/rejected": -423.3940734863281, "loss": 0.4376, "rewards/accuracies": 0.6875, "rewards/chosen": -2.5360426902770996, "rewards/margins": 3.874124050140381, "rewards/rejected": -6.410167694091797, "step": 2167 }, { "epoch": 0.45, "learning_rate": 1.0974789915966388e-05, "logits/chosen": -2.0975301265716553, "logits/rejected": -2.2377991676330566, "logps/chosen": -372.0230712890625, "logps/rejected": -362.9530334472656, "loss": 0.1716, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1585159301757812, "rewards/margins": 4.064459800720215, "rewards/rejected": -6.222976207733154, "step": 2168 }, { "epoch": 0.45, "learning_rate": 1.097058823529412e-05, "logits/chosen": -2.486316680908203, "logits/rejected": -1.9096156358718872, "logps/chosen": -457.6482849121094, "logps/rejected": -450.875, "loss": 0.0471, "rewards/accuracies": 1.0, "rewards/chosen": -1.2479711771011353, "rewards/margins": 4.646535873413086, "rewards/rejected": -5.894506454467773, "step": 2169 }, { "epoch": 0.45, "learning_rate": 1.096638655462185e-05, "logits/chosen": -1.9506534337997437, "logits/rejected": -1.620651125907898, "logps/chosen": -432.6158752441406, "logps/rejected": -335.71527099609375, "loss": 0.3216, "rewards/accuracies": 0.875, "rewards/chosen": -2.5584423542022705, "rewards/margins": 4.7099928855896, "rewards/rejected": -7.268435478210449, "step": 2170 }, { "epoch": 0.45, "learning_rate": 1.0962184873949582e-05, "logits/chosen": -1.963405728340149, "logits/rejected": -2.0942749977111816, "logps/chosen": -272.369140625, "logps/rejected": -335.7466735839844, "loss": 0.3217, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3475117683410645, "rewards/margins": 4.016371726989746, "rewards/rejected": -6.363883972167969, "step": 2171 }, { "epoch": 0.45, "learning_rate": 1.0957983193277312e-05, "logits/chosen": -2.163416862487793, "logits/rejected": -1.9712603092193604, "logps/chosen": -314.0606384277344, "logps/rejected": -329.432861328125, "loss": 0.2107, "rewards/accuracies": 0.875, "rewards/chosen": -2.70294451713562, "rewards/margins": 4.212249279022217, "rewards/rejected": -6.915193557739258, "step": 2172 }, { "epoch": 0.45, "learning_rate": 1.0953781512605044e-05, "logits/chosen": -2.2412123680114746, "logits/rejected": -1.7747416496276855, "logps/chosen": -468.77557373046875, "logps/rejected": -384.94287109375, "loss": 0.0783, "rewards/accuracies": 1.0, "rewards/chosen": -1.805687427520752, "rewards/margins": 3.7529850006103516, "rewards/rejected": -5.558672904968262, "step": 2173 }, { "epoch": 0.45, "learning_rate": 1.0949579831932774e-05, "logits/chosen": -2.0782248973846436, "logits/rejected": -2.045067071914673, "logps/chosen": -295.7433166503906, "logps/rejected": -291.02227783203125, "loss": 0.3914, "rewards/accuracies": 0.875, "rewards/chosen": -2.424102783203125, "rewards/margins": 3.0287539958953857, "rewards/rejected": -5.452856063842773, "step": 2174 }, { "epoch": 0.46, "learning_rate": 1.0945378151260506e-05, "logits/chosen": -1.7476472854614258, "logits/rejected": -2.089772939682007, "logps/chosen": -222.85621643066406, "logps/rejected": -342.553955078125, "loss": 0.2662, "rewards/accuracies": 0.875, "rewards/chosen": -2.483558177947998, "rewards/margins": 4.208912372589111, "rewards/rejected": -6.692470550537109, "step": 2175 }, { "epoch": 0.46, "learning_rate": 1.0941176470588238e-05, "logits/chosen": -2.0619561672210693, "logits/rejected": -1.5173184871673584, "logps/chosen": -403.90960693359375, "logps/rejected": -370.9187316894531, "loss": 0.0938, "rewards/accuracies": 1.0, "rewards/chosen": -1.6350886821746826, "rewards/margins": 4.738796234130859, "rewards/rejected": -6.373884677886963, "step": 2176 }, { "epoch": 0.46, "learning_rate": 1.0936974789915967e-05, "logits/chosen": -2.050675868988037, "logits/rejected": -1.9390783309936523, "logps/chosen": -376.11114501953125, "logps/rejected": -429.7190246582031, "loss": 0.2869, "rewards/accuracies": 0.875, "rewards/chosen": -2.5263442993164062, "rewards/margins": 2.934098482131958, "rewards/rejected": -5.460442543029785, "step": 2177 }, { "epoch": 0.46, "learning_rate": 1.0932773109243697e-05, "logits/chosen": -1.8516062498092651, "logits/rejected": -1.8539493083953857, "logps/chosen": -314.53460693359375, "logps/rejected": -318.81060791015625, "loss": 0.3125, "rewards/accuracies": 0.875, "rewards/chosen": -2.8330845832824707, "rewards/margins": 2.078017473220825, "rewards/rejected": -4.911102294921875, "step": 2178 }, { "epoch": 0.46, "learning_rate": 1.0928571428571429e-05, "logits/chosen": -2.3023195266723633, "logits/rejected": -2.301882266998291, "logps/chosen": -412.1047058105469, "logps/rejected": -418.8988037109375, "loss": 0.5978, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1915111541748047, "rewards/margins": 4.310257434844971, "rewards/rejected": -6.501768112182617, "step": 2179 }, { "epoch": 0.46, "learning_rate": 1.0924369747899159e-05, "logits/chosen": -2.0407650470733643, "logits/rejected": -2.271683931350708, "logps/chosen": -255.7179718017578, "logps/rejected": -339.0557556152344, "loss": 0.0507, "rewards/accuracies": 1.0, "rewards/chosen": -2.5781748294830322, "rewards/margins": 4.977086067199707, "rewards/rejected": -7.555261135101318, "step": 2180 }, { "epoch": 0.46, "learning_rate": 1.0920168067226891e-05, "logits/chosen": -2.2148516178131104, "logits/rejected": -2.2547411918640137, "logps/chosen": -436.52734375, "logps/rejected": -474.769287109375, "loss": 0.1716, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4925627708435059, "rewards/margins": 4.246408939361572, "rewards/rejected": -5.738971710205078, "step": 2181 }, { "epoch": 0.46, "learning_rate": 1.0915966386554621e-05, "logits/chosen": -2.2408289909362793, "logits/rejected": -2.023141860961914, "logps/chosen": -403.99871826171875, "logps/rejected": -399.5506591796875, "loss": 0.586, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7053794860839844, "rewards/margins": 2.3182213306427, "rewards/rejected": -5.023601055145264, "step": 2182 }, { "epoch": 0.46, "learning_rate": 1.0911764705882353e-05, "logits/chosen": -1.6399402618408203, "logits/rejected": -1.7357593774795532, "logps/chosen": -507.09234619140625, "logps/rejected": -425.30145263671875, "loss": 0.24, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3757715225219727, "rewards/margins": 3.9001646041870117, "rewards/rejected": -6.275936126708984, "step": 2183 }, { "epoch": 0.46, "learning_rate": 1.0907563025210083e-05, "logits/chosen": -2.0427703857421875, "logits/rejected": -2.1670761108398438, "logps/chosen": -355.68426513671875, "logps/rejected": -440.0216064453125, "loss": 0.5933, "rewards/accuracies": 0.6875, "rewards/chosen": -2.7694621086120605, "rewards/margins": 3.3427374362945557, "rewards/rejected": -6.112199783325195, "step": 2184 }, { "epoch": 0.46, "learning_rate": 1.0903361344537815e-05, "logits/chosen": -2.351576328277588, "logits/rejected": -2.1522438526153564, "logps/chosen": -336.2664794921875, "logps/rejected": -357.8309631347656, "loss": 0.2405, "rewards/accuracies": 0.9375, "rewards/chosen": -2.058230400085449, "rewards/margins": 3.932281970977783, "rewards/rejected": -5.990512371063232, "step": 2185 }, { "epoch": 0.46, "learning_rate": 1.0899159663865545e-05, "logits/chosen": -2.090019941329956, "logits/rejected": -2.0676915645599365, "logps/chosen": -335.3820495605469, "logps/rejected": -304.6964416503906, "loss": 0.6636, "rewards/accuracies": 0.625, "rewards/chosen": -3.871441602706909, "rewards/margins": 2.626312255859375, "rewards/rejected": -6.497754096984863, "step": 2186 }, { "epoch": 0.46, "learning_rate": 1.0894957983193277e-05, "logits/chosen": -2.189368724822998, "logits/rejected": -1.9917242527008057, "logps/chosen": -325.16314697265625, "logps/rejected": -284.92034912109375, "loss": 0.2305, "rewards/accuracies": 0.875, "rewards/chosen": -1.945838212966919, "rewards/margins": 5.222498416900635, "rewards/rejected": -7.168336868286133, "step": 2187 }, { "epoch": 0.46, "learning_rate": 1.089075630252101e-05, "logits/chosen": -2.0255091190338135, "logits/rejected": -1.7928221225738525, "logps/chosen": -273.7083435058594, "logps/rejected": -320.1885070800781, "loss": 0.2606, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7845582962036133, "rewards/margins": 4.29451322555542, "rewards/rejected": -7.079071998596191, "step": 2188 }, { "epoch": 0.46, "learning_rate": 1.088655462184874e-05, "logits/chosen": -1.6586649417877197, "logits/rejected": -1.6128149032592773, "logps/chosen": -319.1055908203125, "logps/rejected": -313.76007080078125, "loss": 0.1692, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4722862243652344, "rewards/margins": 3.634136199951172, "rewards/rejected": -6.106421947479248, "step": 2189 }, { "epoch": 0.46, "learning_rate": 1.0882352941176471e-05, "logits/chosen": -2.049567937850952, "logits/rejected": -1.8410358428955078, "logps/chosen": -372.403076171875, "logps/rejected": -391.11279296875, "loss": 0.2081, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9479005336761475, "rewards/margins": 4.206435680389404, "rewards/rejected": -7.154335975646973, "step": 2190 }, { "epoch": 0.46, "learning_rate": 1.0878151260504202e-05, "logits/chosen": -2.0285184383392334, "logits/rejected": -2.119401454925537, "logps/chosen": -291.40618896484375, "logps/rejected": -292.7174987792969, "loss": 0.1861, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1796467304229736, "rewards/margins": 4.327023029327393, "rewards/rejected": -6.506669521331787, "step": 2191 }, { "epoch": 0.46, "learning_rate": 1.0873949579831933e-05, "logits/chosen": -2.241891622543335, "logits/rejected": -2.0366880893707275, "logps/chosen": -371.3621826171875, "logps/rejected": -313.64239501953125, "loss": 0.3594, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0455706119537354, "rewards/margins": 2.867155075073242, "rewards/rejected": -4.912725448608398, "step": 2192 }, { "epoch": 0.46, "learning_rate": 1.0869747899159664e-05, "logits/chosen": -1.8171122074127197, "logits/rejected": -1.9362175464630127, "logps/chosen": -256.2909851074219, "logps/rejected": -359.748779296875, "loss": 0.0634, "rewards/accuracies": 1.0, "rewards/chosen": -2.8400087356567383, "rewards/margins": 4.862631797790527, "rewards/rejected": -7.702641010284424, "step": 2193 }, { "epoch": 0.46, "learning_rate": 1.0865546218487396e-05, "logits/chosen": -2.0894508361816406, "logits/rejected": -1.9648733139038086, "logps/chosen": -206.87841796875, "logps/rejected": -241.1136474609375, "loss": 0.3472, "rewards/accuracies": 0.8125, "rewards/chosen": -2.9194865226745605, "rewards/margins": 3.1334617137908936, "rewards/rejected": -6.052947998046875, "step": 2194 }, { "epoch": 0.46, "learning_rate": 1.0861344537815126e-05, "logits/chosen": -2.4774932861328125, "logits/rejected": -1.8363265991210938, "logps/chosen": -311.701171875, "logps/rejected": -288.99200439453125, "loss": 0.4517, "rewards/accuracies": 0.8125, "rewards/chosen": -1.968862771987915, "rewards/margins": 3.147794008255005, "rewards/rejected": -5.11665678024292, "step": 2195 }, { "epoch": 0.46, "learning_rate": 1.0857142857142858e-05, "logits/chosen": -2.261884927749634, "logits/rejected": -2.079307794570923, "logps/chosen": -354.0386657714844, "logps/rejected": -398.28839111328125, "loss": 0.5102, "rewards/accuracies": 0.8125, "rewards/chosen": -2.148468017578125, "rewards/margins": 3.487614631652832, "rewards/rejected": -5.636082649230957, "step": 2196 }, { "epoch": 0.46, "learning_rate": 1.0852941176470588e-05, "logits/chosen": -1.9121036529541016, "logits/rejected": -1.739865779876709, "logps/chosen": -323.443603515625, "logps/rejected": -320.7878723144531, "loss": 0.2256, "rewards/accuracies": 0.875, "rewards/chosen": -2.251253366470337, "rewards/margins": 3.9752492904663086, "rewards/rejected": -6.226502418518066, "step": 2197 }, { "epoch": 0.46, "learning_rate": 1.084873949579832e-05, "logits/chosen": -2.10477876663208, "logits/rejected": -1.7474911212921143, "logps/chosen": -327.603759765625, "logps/rejected": -298.4808349609375, "loss": 0.5288, "rewards/accuracies": 0.8125, "rewards/chosen": -3.091388702392578, "rewards/margins": 2.6992855072021484, "rewards/rejected": -5.790674209594727, "step": 2198 }, { "epoch": 0.46, "learning_rate": 1.084453781512605e-05, "logits/chosen": -2.3742029666900635, "logits/rejected": -2.1379635334014893, "logps/chosen": -365.9757385253906, "logps/rejected": -375.118408203125, "loss": 0.3775, "rewards/accuracies": 0.875, "rewards/chosen": -2.7214303016662598, "rewards/margins": 3.7734272480010986, "rewards/rejected": -6.4948577880859375, "step": 2199 }, { "epoch": 0.46, "learning_rate": 1.0840336134453782e-05, "logits/chosen": -2.286151885986328, "logits/rejected": -1.999983787536621, "logps/chosen": -374.40264892578125, "logps/rejected": -364.20947265625, "loss": 0.2444, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7902493476867676, "rewards/margins": 4.513697147369385, "rewards/rejected": -7.3039469718933105, "step": 2200 }, { "epoch": 0.46, "learning_rate": 1.0836134453781512e-05, "logits/chosen": -2.6414976119995117, "logits/rejected": -2.2949018478393555, "logps/chosen": -361.7509765625, "logps/rejected": -294.181396484375, "loss": 0.239, "rewards/accuracies": 0.875, "rewards/chosen": -2.162288188934326, "rewards/margins": 5.092597007751465, "rewards/rejected": -7.254885196685791, "step": 2201 }, { "epoch": 0.46, "learning_rate": 1.0831932773109244e-05, "logits/chosen": -2.229208469390869, "logits/rejected": -2.1986241340637207, "logps/chosen": -391.3697509765625, "logps/rejected": -414.3483581542969, "loss": 0.2296, "rewards/accuracies": 0.9375, "rewards/chosen": -2.487478733062744, "rewards/margins": 5.798541069030762, "rewards/rejected": -8.286020278930664, "step": 2202 }, { "epoch": 0.46, "learning_rate": 1.0827731092436974e-05, "logits/chosen": -1.7265875339508057, "logits/rejected": -1.5976886749267578, "logps/chosen": -319.3828125, "logps/rejected": -346.0447692871094, "loss": 0.2522, "rewards/accuracies": 0.875, "rewards/chosen": -1.9269204139709473, "rewards/margins": 4.979476451873779, "rewards/rejected": -6.906396865844727, "step": 2203 }, { "epoch": 0.46, "learning_rate": 1.0823529411764706e-05, "logits/chosen": -2.1532180309295654, "logits/rejected": -1.8384546041488647, "logps/chosen": -264.1273498535156, "logps/rejected": -322.74090576171875, "loss": 0.54, "rewards/accuracies": 0.6875, "rewards/chosen": -2.7662711143493652, "rewards/margins": 2.821484327316284, "rewards/rejected": -5.58775520324707, "step": 2204 }, { "epoch": 0.46, "learning_rate": 1.0819327731092437e-05, "logits/chosen": -2.2824459075927734, "logits/rejected": -1.5332520008087158, "logps/chosen": -346.97906494140625, "logps/rejected": -406.9164733886719, "loss": 0.2554, "rewards/accuracies": 0.875, "rewards/chosen": -2.8781042098999023, "rewards/margins": 4.692074298858643, "rewards/rejected": -7.570178985595703, "step": 2205 }, { "epoch": 0.46, "learning_rate": 1.0815126050420168e-05, "logits/chosen": -2.170711040496826, "logits/rejected": -1.7903850078582764, "logps/chosen": -383.2043151855469, "logps/rejected": -300.8239440917969, "loss": 0.0836, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8433815240859985, "rewards/margins": 5.589659690856934, "rewards/rejected": -7.433040618896484, "step": 2206 }, { "epoch": 0.46, "learning_rate": 1.0810924369747899e-05, "logits/chosen": -2.308537721633911, "logits/rejected": -2.3312244415283203, "logps/chosen": -395.81304931640625, "logps/rejected": -406.4383239746094, "loss": 0.6183, "rewards/accuracies": 0.6875, "rewards/chosen": -3.0024824142456055, "rewards/margins": 3.527528762817383, "rewards/rejected": -6.530011177062988, "step": 2207 }, { "epoch": 0.46, "learning_rate": 1.080672268907563e-05, "logits/chosen": -2.2998664379119873, "logits/rejected": -2.1448166370391846, "logps/chosen": -339.9493713378906, "logps/rejected": -372.6708679199219, "loss": 0.358, "rewards/accuracies": 0.875, "rewards/chosen": -2.8555407524108887, "rewards/margins": 4.349848747253418, "rewards/rejected": -7.205389022827148, "step": 2208 }, { "epoch": 0.46, "learning_rate": 1.080252100840336e-05, "logits/chosen": -1.6480600833892822, "logits/rejected": -1.9596545696258545, "logps/chosen": -192.30313110351562, "logps/rejected": -284.8935852050781, "loss": 0.209, "rewards/accuracies": 0.875, "rewards/chosen": -2.6758458614349365, "rewards/margins": 5.204977512359619, "rewards/rejected": -7.880823135375977, "step": 2209 }, { "epoch": 0.46, "learning_rate": 1.0798319327731093e-05, "logits/chosen": -2.2920610904693604, "logits/rejected": -2.1652326583862305, "logps/chosen": -325.49786376953125, "logps/rejected": -266.3533630371094, "loss": 0.7449, "rewards/accuracies": 0.625, "rewards/chosen": -3.4675817489624023, "rewards/margins": 1.6641329526901245, "rewards/rejected": -5.131714820861816, "step": 2210 }, { "epoch": 0.46, "learning_rate": 1.0794117647058825e-05, "logits/chosen": -1.9324177503585815, "logits/rejected": -2.0953521728515625, "logps/chosen": -338.3372497558594, "logps/rejected": -379.05084228515625, "loss": 0.0628, "rewards/accuracies": 1.0, "rewards/chosen": -2.318756341934204, "rewards/margins": 6.595605850219727, "rewards/rejected": -8.914361953735352, "step": 2211 }, { "epoch": 0.46, "learning_rate": 1.0789915966386555e-05, "logits/chosen": -2.408193826675415, "logits/rejected": -1.6430341005325317, "logps/chosen": -445.908447265625, "logps/rejected": -345.45330810546875, "loss": 0.1399, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8667926788330078, "rewards/margins": 4.218425750732422, "rewards/rejected": -6.0852179527282715, "step": 2212 }, { "epoch": 0.46, "learning_rate": 1.0785714285714287e-05, "logits/chosen": -1.7721405029296875, "logits/rejected": -1.875141978263855, "logps/chosen": -325.4342956542969, "logps/rejected": -332.0131530761719, "loss": 0.5432, "rewards/accuracies": 0.75, "rewards/chosen": -2.4487504959106445, "rewards/margins": 2.412158966064453, "rewards/rejected": -4.860909461975098, "step": 2213 }, { "epoch": 0.46, "learning_rate": 1.0781512605042017e-05, "logits/chosen": -2.221919059753418, "logits/rejected": -2.1236467361450195, "logps/chosen": -304.822021484375, "logps/rejected": -395.0116271972656, "loss": 0.2476, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7468748092651367, "rewards/margins": 4.721800804138184, "rewards/rejected": -7.4686760902404785, "step": 2214 }, { "epoch": 0.46, "learning_rate": 1.0777310924369749e-05, "logits/chosen": -1.943882942199707, "logits/rejected": -2.1289587020874023, "logps/chosen": -234.31790161132812, "logps/rejected": -312.1425476074219, "loss": 0.1867, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5907859802246094, "rewards/margins": 5.211655139923096, "rewards/rejected": -7.802441596984863, "step": 2215 }, { "epoch": 0.46, "learning_rate": 1.0773109243697479e-05, "logits/chosen": -2.232961893081665, "logits/rejected": -1.516900658607483, "logps/chosen": -334.7171630859375, "logps/rejected": -315.154296875, "loss": 0.1667, "rewards/accuracies": 0.875, "rewards/chosen": -2.770059823989868, "rewards/margins": 5.6397857666015625, "rewards/rejected": -8.409846305847168, "step": 2216 }, { "epoch": 0.46, "learning_rate": 1.0768907563025211e-05, "logits/chosen": -2.2113990783691406, "logits/rejected": -2.358501434326172, "logps/chosen": -217.718017578125, "logps/rejected": -260.5225830078125, "loss": 0.2355, "rewards/accuracies": 0.9375, "rewards/chosen": -2.661038875579834, "rewards/margins": 3.5129575729370117, "rewards/rejected": -6.1739959716796875, "step": 2217 }, { "epoch": 0.46, "learning_rate": 1.0764705882352941e-05, "logits/chosen": -2.2868616580963135, "logits/rejected": -1.994567632675171, "logps/chosen": -285.7127990722656, "logps/rejected": -286.7519226074219, "loss": 0.7166, "rewards/accuracies": 0.625, "rewards/chosen": -2.269653558731079, "rewards/margins": 2.8677520751953125, "rewards/rejected": -5.1374053955078125, "step": 2218 }, { "epoch": 0.46, "learning_rate": 1.0760504201680673e-05, "logits/chosen": -2.0874366760253906, "logits/rejected": -1.9938488006591797, "logps/chosen": -299.6470031738281, "logps/rejected": -344.7825012207031, "loss": 0.1614, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7262797355651855, "rewards/margins": 4.680932998657227, "rewards/rejected": -7.407212257385254, "step": 2219 }, { "epoch": 0.46, "learning_rate": 1.0756302521008403e-05, "logits/chosen": -2.219188690185547, "logits/rejected": -1.85585618019104, "logps/chosen": -277.2038269042969, "logps/rejected": -242.1912841796875, "loss": 0.3288, "rewards/accuracies": 0.8125, "rewards/chosen": -2.494776487350464, "rewards/margins": 3.3394877910614014, "rewards/rejected": -5.834264278411865, "step": 2220 }, { "epoch": 0.46, "learning_rate": 1.0752100840336135e-05, "logits/chosen": -2.302727460861206, "logits/rejected": -2.219780921936035, "logps/chosen": -341.3525695800781, "logps/rejected": -291.35687255859375, "loss": 0.2023, "rewards/accuracies": 0.875, "rewards/chosen": -1.839379072189331, "rewards/margins": 4.455135822296143, "rewards/rejected": -6.2945146560668945, "step": 2221 }, { "epoch": 0.46, "learning_rate": 1.0747899159663866e-05, "logits/chosen": -2.44423246383667, "logits/rejected": -2.164736032485962, "logps/chosen": -274.3758239746094, "logps/rejected": -284.9472351074219, "loss": 0.185, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9392091035842896, "rewards/margins": 3.931859016418457, "rewards/rejected": -5.871068477630615, "step": 2222 }, { "epoch": 0.47, "learning_rate": 1.0743697478991597e-05, "logits/chosen": -1.8586549758911133, "logits/rejected": -2.2672882080078125, "logps/chosen": -270.50628662109375, "logps/rejected": -377.39288330078125, "loss": 0.182, "rewards/accuracies": 0.875, "rewards/chosen": -2.9168264865875244, "rewards/margins": 4.387651443481445, "rewards/rejected": -7.304478645324707, "step": 2223 }, { "epoch": 0.47, "learning_rate": 1.0739495798319328e-05, "logits/chosen": -2.2375826835632324, "logits/rejected": -2.4133353233337402, "logps/chosen": -284.8385925292969, "logps/rejected": -383.3919982910156, "loss": 0.1722, "rewards/accuracies": 0.9375, "rewards/chosen": -2.103823661804199, "rewards/margins": 5.040059566497803, "rewards/rejected": -7.143883228302002, "step": 2224 }, { "epoch": 0.47, "learning_rate": 1.073529411764706e-05, "logits/chosen": -2.255084276199341, "logits/rejected": -1.869938611984253, "logps/chosen": -390.86376953125, "logps/rejected": -377.3240051269531, "loss": 0.702, "rewards/accuracies": 0.75, "rewards/chosen": -2.9561069011688232, "rewards/margins": 3.594660997390747, "rewards/rejected": -6.55076789855957, "step": 2225 }, { "epoch": 0.47, "learning_rate": 1.073109243697479e-05, "logits/chosen": -2.032472610473633, "logits/rejected": -2.1681413650512695, "logps/chosen": -328.66876220703125, "logps/rejected": -387.1632080078125, "loss": 0.5122, "rewards/accuracies": 0.75, "rewards/chosen": -2.000619649887085, "rewards/margins": 3.8319714069366455, "rewards/rejected": -5.8325910568237305, "step": 2226 }, { "epoch": 0.47, "learning_rate": 1.0726890756302522e-05, "logits/chosen": -2.299386978149414, "logits/rejected": -2.2176566123962402, "logps/chosen": -296.2229919433594, "logps/rejected": -400.525146484375, "loss": 0.1183, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6341265439987183, "rewards/margins": 5.189969539642334, "rewards/rejected": -6.824095726013184, "step": 2227 }, { "epoch": 0.47, "learning_rate": 1.0722689075630252e-05, "logits/chosen": -2.344362735748291, "logits/rejected": -2.1972527503967285, "logps/chosen": -254.92074584960938, "logps/rejected": -317.0208435058594, "loss": 0.3232, "rewards/accuracies": 0.8125, "rewards/chosen": -2.403212547302246, "rewards/margins": 3.340275287628174, "rewards/rejected": -5.743488311767578, "step": 2228 }, { "epoch": 0.47, "learning_rate": 1.0718487394957984e-05, "logits/chosen": -2.115281581878662, "logits/rejected": -2.0778818130493164, "logps/chosen": -308.91973876953125, "logps/rejected": -348.39447021484375, "loss": 0.3515, "rewards/accuracies": 0.875, "rewards/chosen": -2.801435947418213, "rewards/margins": 4.555243968963623, "rewards/rejected": -7.356679916381836, "step": 2229 }, { "epoch": 0.47, "learning_rate": 1.0714285714285714e-05, "logits/chosen": -1.9922337532043457, "logits/rejected": -1.5101429224014282, "logps/chosen": -339.193115234375, "logps/rejected": -348.9505310058594, "loss": 0.1001, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8807196617126465, "rewards/margins": 6.552649974822998, "rewards/rejected": -8.433370590209961, "step": 2230 }, { "epoch": 0.47, "learning_rate": 1.0710084033613446e-05, "logits/chosen": -2.1856231689453125, "logits/rejected": -1.764656662940979, "logps/chosen": -402.35906982421875, "logps/rejected": -446.05694580078125, "loss": 0.1637, "rewards/accuracies": 0.9375, "rewards/chosen": -1.4766392707824707, "rewards/margins": 5.052073955535889, "rewards/rejected": -6.528713226318359, "step": 2231 }, { "epoch": 0.47, "learning_rate": 1.0705882352941178e-05, "logits/chosen": -1.666250228881836, "logits/rejected": -1.4801517724990845, "logps/chosen": -264.9478759765625, "logps/rejected": -324.7733154296875, "loss": 0.4313, "rewards/accuracies": 0.8125, "rewards/chosen": -2.266223430633545, "rewards/margins": 3.1230216026306152, "rewards/rejected": -5.389245510101318, "step": 2232 }, { "epoch": 0.47, "learning_rate": 1.0701680672268908e-05, "logits/chosen": -2.4650676250457764, "logits/rejected": -2.1606171131134033, "logps/chosen": -390.0478210449219, "logps/rejected": -376.9784240722656, "loss": 0.4598, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0779905319213867, "rewards/margins": 3.78542423248291, "rewards/rejected": -5.863414764404297, "step": 2233 }, { "epoch": 0.47, "learning_rate": 1.069747899159664e-05, "logits/chosen": -2.3174917697906494, "logits/rejected": -1.8342410326004028, "logps/chosen": -340.0020446777344, "logps/rejected": -307.1953125, "loss": 0.2601, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5838121175765991, "rewards/margins": 3.620408296585083, "rewards/rejected": -5.204220771789551, "step": 2234 }, { "epoch": 0.47, "learning_rate": 1.069327731092437e-05, "logits/chosen": -2.17441463470459, "logits/rejected": -1.7299838066101074, "logps/chosen": -337.74639892578125, "logps/rejected": -295.0901794433594, "loss": 0.1725, "rewards/accuracies": 1.0, "rewards/chosen": -1.877786636352539, "rewards/margins": 3.0202202796936035, "rewards/rejected": -4.898006916046143, "step": 2235 }, { "epoch": 0.47, "learning_rate": 1.0689075630252102e-05, "logits/chosen": -2.270581007003784, "logits/rejected": -1.8749492168426514, "logps/chosen": -261.9243469238281, "logps/rejected": -203.61996459960938, "loss": 0.3716, "rewards/accuracies": 0.875, "rewards/chosen": -2.4220809936523438, "rewards/margins": 2.819855213165283, "rewards/rejected": -5.241936206817627, "step": 2236 }, { "epoch": 0.47, "learning_rate": 1.0684873949579832e-05, "logits/chosen": -2.2787320613861084, "logits/rejected": -1.8370237350463867, "logps/chosen": -217.92852783203125, "logps/rejected": -242.4041748046875, "loss": 0.3508, "rewards/accuracies": 0.875, "rewards/chosen": -1.757615089416504, "rewards/margins": 4.874331951141357, "rewards/rejected": -6.631947040557861, "step": 2237 }, { "epoch": 0.47, "learning_rate": 1.0680672268907564e-05, "logits/chosen": -2.1033692359924316, "logits/rejected": -2.1304478645324707, "logps/chosen": -297.3065185546875, "logps/rejected": -304.0707092285156, "loss": 0.3296, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0246024131774902, "rewards/margins": 5.272213459014893, "rewards/rejected": -7.296815395355225, "step": 2238 }, { "epoch": 0.47, "learning_rate": 1.0676470588235295e-05, "logits/chosen": -2.493102550506592, "logits/rejected": -1.6055775880813599, "logps/chosen": -343.25311279296875, "logps/rejected": -275.83184814453125, "loss": 0.5858, "rewards/accuracies": 0.8125, "rewards/chosen": -3.303953170776367, "rewards/margins": 2.8446543216705322, "rewards/rejected": -6.148608207702637, "step": 2239 }, { "epoch": 0.47, "learning_rate": 1.0672268907563026e-05, "logits/chosen": -2.3732991218566895, "logits/rejected": -1.3028373718261719, "logps/chosen": -336.52301025390625, "logps/rejected": -308.91583251953125, "loss": 0.1113, "rewards/accuracies": 1.0, "rewards/chosen": -1.7011866569519043, "rewards/margins": 3.7374966144561768, "rewards/rejected": -5.43868350982666, "step": 2240 }, { "epoch": 0.47, "learning_rate": 1.0668067226890757e-05, "logits/chosen": -2.541382312774658, "logits/rejected": -2.0083346366882324, "logps/chosen": -408.62957763671875, "logps/rejected": -319.6160888671875, "loss": 0.1543, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7813868522644043, "rewards/margins": 4.453334808349609, "rewards/rejected": -6.234721660614014, "step": 2241 }, { "epoch": 0.47, "learning_rate": 1.0663865546218489e-05, "logits/chosen": -2.182600498199463, "logits/rejected": -1.753218412399292, "logps/chosen": -276.21453857421875, "logps/rejected": -280.6688537597656, "loss": 0.1858, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0654513835906982, "rewards/margins": 5.518108367919922, "rewards/rejected": -7.583559989929199, "step": 2242 }, { "epoch": 0.47, "learning_rate": 1.0659663865546219e-05, "logits/chosen": -2.244385242462158, "logits/rejected": -1.3462674617767334, "logps/chosen": -372.945068359375, "logps/rejected": -268.8367004394531, "loss": 0.3019, "rewards/accuracies": 0.875, "rewards/chosen": -1.858535885810852, "rewards/margins": 4.242053985595703, "rewards/rejected": -6.100589752197266, "step": 2243 }, { "epoch": 0.47, "learning_rate": 1.065546218487395e-05, "logits/chosen": -1.829626441001892, "logits/rejected": -1.9983892440795898, "logps/chosen": -348.000732421875, "logps/rejected": -434.7879638671875, "loss": 0.3185, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6664745807647705, "rewards/margins": 4.671359062194824, "rewards/rejected": -7.337833881378174, "step": 2244 }, { "epoch": 0.47, "learning_rate": 1.0651260504201681e-05, "logits/chosen": -2.2860045433044434, "logits/rejected": -2.216571569442749, "logps/chosen": -295.8656005859375, "logps/rejected": -349.29376220703125, "loss": 0.1342, "rewards/accuracies": 0.9375, "rewards/chosen": -1.580683708190918, "rewards/margins": 5.6064300537109375, "rewards/rejected": -7.187114238739014, "step": 2245 }, { "epoch": 0.47, "learning_rate": 1.0647058823529413e-05, "logits/chosen": -1.977545976638794, "logits/rejected": -1.8383845090866089, "logps/chosen": -332.2418212890625, "logps/rejected": -304.9915771484375, "loss": 0.1582, "rewards/accuracies": 0.9375, "rewards/chosen": -1.0358564853668213, "rewards/margins": 4.698163986206055, "rewards/rejected": -5.734020709991455, "step": 2246 }, { "epoch": 0.47, "learning_rate": 1.0642857142857143e-05, "logits/chosen": -2.205399990081787, "logits/rejected": -1.9745917320251465, "logps/chosen": -348.6900634765625, "logps/rejected": -434.18096923828125, "loss": 0.4658, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9008054733276367, "rewards/margins": 3.648080587387085, "rewards/rejected": -6.548886299133301, "step": 2247 }, { "epoch": 0.47, "learning_rate": 1.0638655462184875e-05, "logits/chosen": -2.161336660385132, "logits/rejected": -1.6996766328811646, "logps/chosen": -307.3624267578125, "logps/rejected": -200.87753295898438, "loss": 0.3624, "rewards/accuracies": 0.8125, "rewards/chosen": -3.090386390686035, "rewards/margins": 1.5949962139129639, "rewards/rejected": -4.685382843017578, "step": 2248 }, { "epoch": 0.47, "learning_rate": 1.0634453781512605e-05, "logits/chosen": -2.21938419342041, "logits/rejected": -2.160126209259033, "logps/chosen": -458.0859375, "logps/rejected": -423.2908935546875, "loss": 0.4247, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2430121898651123, "rewards/margins": 3.230743885040283, "rewards/rejected": -5.473756313323975, "step": 2249 }, { "epoch": 0.47, "learning_rate": 1.0630252100840337e-05, "logits/chosen": -2.311984062194824, "logits/rejected": -2.030043840408325, "logps/chosen": -437.20074462890625, "logps/rejected": -395.2602233886719, "loss": 0.0833, "rewards/accuracies": 1.0, "rewards/chosen": -1.775640845298767, "rewards/margins": 5.377265453338623, "rewards/rejected": -7.15290641784668, "step": 2250 }, { "epoch": 0.47, "learning_rate": 1.0626050420168067e-05, "logits/chosen": -2.534961223602295, "logits/rejected": -2.0440006256103516, "logps/chosen": -432.7363586425781, "logps/rejected": -331.427001953125, "loss": 0.2031, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3942008018493652, "rewards/margins": 4.759222984313965, "rewards/rejected": -7.153424263000488, "step": 2251 }, { "epoch": 0.47, "learning_rate": 1.06218487394958e-05, "logits/chosen": -2.186769962310791, "logits/rejected": -1.577028512954712, "logps/chosen": -380.36651611328125, "logps/rejected": -367.24798583984375, "loss": 0.3819, "rewards/accuracies": 0.75, "rewards/chosen": -2.320749282836914, "rewards/margins": 3.0393753051757812, "rewards/rejected": -5.360124588012695, "step": 2252 }, { "epoch": 0.47, "learning_rate": 1.061764705882353e-05, "logits/chosen": -2.464982032775879, "logits/rejected": -2.277682304382324, "logps/chosen": -355.7694091796875, "logps/rejected": -355.5269775390625, "loss": 0.1861, "rewards/accuracies": 0.875, "rewards/chosen": -2.0717246532440186, "rewards/margins": 4.6460161209106445, "rewards/rejected": -6.717741012573242, "step": 2253 }, { "epoch": 0.47, "learning_rate": 1.0613445378151261e-05, "logits/chosen": -2.132046937942505, "logits/rejected": -2.1645894050598145, "logps/chosen": -279.29534912109375, "logps/rejected": -296.971435546875, "loss": 0.1427, "rewards/accuracies": 0.875, "rewards/chosen": -1.611100435256958, "rewards/margins": 5.796579837799072, "rewards/rejected": -7.407679557800293, "step": 2254 }, { "epoch": 0.47, "learning_rate": 1.0609243697478993e-05, "logits/chosen": -2.2670211791992188, "logits/rejected": -1.1424469947814941, "logps/chosen": -276.5291748046875, "logps/rejected": -190.7577362060547, "loss": 0.2272, "rewards/accuracies": 0.8125, "rewards/chosen": -2.65633487701416, "rewards/margins": 3.2629189491271973, "rewards/rejected": -5.919253349304199, "step": 2255 }, { "epoch": 0.47, "learning_rate": 1.0605042016806724e-05, "logits/chosen": -1.8606351613998413, "logits/rejected": -2.001354455947876, "logps/chosen": -291.7681884765625, "logps/rejected": -429.3544616699219, "loss": 0.0872, "rewards/accuracies": 1.0, "rewards/chosen": -1.9967080354690552, "rewards/margins": 6.338170528411865, "rewards/rejected": -8.334878921508789, "step": 2256 }, { "epoch": 0.47, "learning_rate": 1.0600840336134455e-05, "logits/chosen": -2.2156407833099365, "logits/rejected": -1.8583240509033203, "logps/chosen": -324.5140686035156, "logps/rejected": -345.49224853515625, "loss": 0.1138, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9272854328155518, "rewards/margins": 5.892843246459961, "rewards/rejected": -7.820127964019775, "step": 2257 }, { "epoch": 0.47, "learning_rate": 1.0596638655462186e-05, "logits/chosen": -2.350205898284912, "logits/rejected": -2.100562572479248, "logps/chosen": -430.767822265625, "logps/rejected": -432.44012451171875, "loss": 0.1411, "rewards/accuracies": 1.0, "rewards/chosen": -2.129133462905884, "rewards/margins": 3.9867444038391113, "rewards/rejected": -6.115878105163574, "step": 2258 }, { "epoch": 0.47, "learning_rate": 1.0592436974789918e-05, "logits/chosen": -2.123753070831299, "logits/rejected": -2.0963852405548096, "logps/chosen": -235.05751037597656, "logps/rejected": -232.68536376953125, "loss": 0.7072, "rewards/accuracies": 0.875, "rewards/chosen": -2.6822681427001953, "rewards/margins": 2.7440764904022217, "rewards/rejected": -5.426344871520996, "step": 2259 }, { "epoch": 0.47, "learning_rate": 1.0588235294117648e-05, "logits/chosen": -2.380059003829956, "logits/rejected": -1.995006799697876, "logps/chosen": -442.45294189453125, "logps/rejected": -366.01123046875, "loss": 0.3146, "rewards/accuracies": 0.8125, "rewards/chosen": -1.6446397304534912, "rewards/margins": 4.297112464904785, "rewards/rejected": -5.9417524337768555, "step": 2260 }, { "epoch": 0.47, "learning_rate": 1.058403361344538e-05, "logits/chosen": -2.071579933166504, "logits/rejected": -1.9076157808303833, "logps/chosen": -343.28558349609375, "logps/rejected": -337.1347961425781, "loss": 0.4269, "rewards/accuracies": 0.875, "rewards/chosen": -2.4804975986480713, "rewards/margins": 4.389636516571045, "rewards/rejected": -6.870134353637695, "step": 2261 }, { "epoch": 0.47, "learning_rate": 1.057983193277311e-05, "logits/chosen": -2.2891056537628174, "logits/rejected": -2.11515736579895, "logps/chosen": -401.09228515625, "logps/rejected": -378.7855224609375, "loss": 0.535, "rewards/accuracies": 0.6875, "rewards/chosen": -2.5840132236480713, "rewards/margins": 3.7881226539611816, "rewards/rejected": -6.372136116027832, "step": 2262 }, { "epoch": 0.47, "learning_rate": 1.0575630252100842e-05, "logits/chosen": -2.134396553039551, "logits/rejected": -2.165761709213257, "logps/chosen": -330.08551025390625, "logps/rejected": -335.84393310546875, "loss": 0.5337, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6672980785369873, "rewards/margins": 2.9391837120056152, "rewards/rejected": -5.606481552124023, "step": 2263 }, { "epoch": 0.47, "learning_rate": 1.0571428571428572e-05, "logits/chosen": -2.1118838787078857, "logits/rejected": -1.8191330432891846, "logps/chosen": -303.418701171875, "logps/rejected": -301.62945556640625, "loss": 0.252, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4390218257904053, "rewards/margins": 3.3422956466674805, "rewards/rejected": -5.781317234039307, "step": 2264 }, { "epoch": 0.47, "learning_rate": 1.0567226890756304e-05, "logits/chosen": -2.3714094161987305, "logits/rejected": -2.09450364112854, "logps/chosen": -351.881591796875, "logps/rejected": -346.2973937988281, "loss": 0.3416, "rewards/accuracies": 0.875, "rewards/chosen": -2.331536054611206, "rewards/margins": 3.77156138420105, "rewards/rejected": -6.103097438812256, "step": 2265 }, { "epoch": 0.47, "learning_rate": 1.0563025210084034e-05, "logits/chosen": -2.073634386062622, "logits/rejected": -2.2854981422424316, "logps/chosen": -390.7427978515625, "logps/rejected": -487.54473876953125, "loss": 0.4898, "rewards/accuracies": 0.75, "rewards/chosen": -2.2430243492126465, "rewards/margins": 1.8762409687042236, "rewards/rejected": -4.119265556335449, "step": 2266 }, { "epoch": 0.47, "learning_rate": 1.0558823529411766e-05, "logits/chosen": -2.2952065467834473, "logits/rejected": -2.1446497440338135, "logps/chosen": -439.07672119140625, "logps/rejected": -476.9337463378906, "loss": 0.1455, "rewards/accuracies": 0.9375, "rewards/chosen": -1.1975785493850708, "rewards/margins": 4.0154242515563965, "rewards/rejected": -5.213002681732178, "step": 2267 }, { "epoch": 0.47, "learning_rate": 1.0554621848739496e-05, "logits/chosen": -2.3860528469085693, "logits/rejected": -2.342975616455078, "logps/chosen": -361.2107238769531, "logps/rejected": -340.8471374511719, "loss": 0.2526, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7229912281036377, "rewards/margins": 4.171393394470215, "rewards/rejected": -5.894384384155273, "step": 2268 }, { "epoch": 0.47, "learning_rate": 1.0550420168067228e-05, "logits/chosen": -2.1936278343200684, "logits/rejected": -1.8721768856048584, "logps/chosen": -404.3787536621094, "logps/rejected": -323.47528076171875, "loss": 0.2161, "rewards/accuracies": 0.875, "rewards/chosen": -1.8592255115509033, "rewards/margins": 3.1862571239471436, "rewards/rejected": -5.045482635498047, "step": 2269 }, { "epoch": 0.47, "learning_rate": 1.0546218487394959e-05, "logits/chosen": -1.8568305969238281, "logits/rejected": -1.7172417640686035, "logps/chosen": -340.1731872558594, "logps/rejected": -378.71990966796875, "loss": 0.5118, "rewards/accuracies": 0.75, "rewards/chosen": -2.8410212993621826, "rewards/margins": 4.397149562835693, "rewards/rejected": -7.238170146942139, "step": 2270 }, { "epoch": 0.48, "learning_rate": 1.054201680672269e-05, "logits/chosen": -2.167987108230591, "logits/rejected": -2.2638425827026367, "logps/chosen": -234.21136474609375, "logps/rejected": -247.3729705810547, "loss": 0.7625, "rewards/accuracies": 0.625, "rewards/chosen": -2.6195812225341797, "rewards/margins": 2.501633882522583, "rewards/rejected": -5.121215343475342, "step": 2271 }, { "epoch": 0.48, "learning_rate": 1.053781512605042e-05, "logits/chosen": -2.143850803375244, "logits/rejected": -2.1145806312561035, "logps/chosen": -284.2950744628906, "logps/rejected": -351.94415283203125, "loss": 0.3488, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3442676067352295, "rewards/margins": 2.7296066284179688, "rewards/rejected": -5.073873996734619, "step": 2272 }, { "epoch": 0.48, "learning_rate": 1.0533613445378153e-05, "logits/chosen": -2.2186970710754395, "logits/rejected": -1.824692964553833, "logps/chosen": -403.21417236328125, "logps/rejected": -364.8682861328125, "loss": 0.17, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1549181938171387, "rewards/margins": 3.983293294906616, "rewards/rejected": -6.138211250305176, "step": 2273 }, { "epoch": 0.48, "learning_rate": 1.0529411764705883e-05, "logits/chosen": -2.2263786792755127, "logits/rejected": -1.9309293031692505, "logps/chosen": -377.0666198730469, "logps/rejected": -345.8014221191406, "loss": 0.2844, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4748728275299072, "rewards/margins": 2.6632609367370605, "rewards/rejected": -5.138134002685547, "step": 2274 }, { "epoch": 0.48, "learning_rate": 1.0525210084033615e-05, "logits/chosen": -2.2081804275512695, "logits/rejected": -2.1690778732299805, "logps/chosen": -405.8522033691406, "logps/rejected": -438.5234069824219, "loss": 0.0946, "rewards/accuracies": 1.0, "rewards/chosen": -2.0199661254882812, "rewards/margins": 4.896577835083008, "rewards/rejected": -6.916543006896973, "step": 2275 }, { "epoch": 0.48, "learning_rate": 1.0521008403361345e-05, "logits/chosen": -2.109779119491577, "logits/rejected": -1.8916078805923462, "logps/chosen": -412.5704040527344, "logps/rejected": -336.9224853515625, "loss": 0.273, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3690977096557617, "rewards/margins": 3.8368191719055176, "rewards/rejected": -7.2059173583984375, "step": 2276 }, { "epoch": 0.48, "learning_rate": 1.0516806722689077e-05, "logits/chosen": -2.049389600753784, "logits/rejected": -2.2946488857269287, "logps/chosen": -244.0751953125, "logps/rejected": -334.02740478515625, "loss": 0.1811, "rewards/accuracies": 0.875, "rewards/chosen": -2.221323251724243, "rewards/margins": 3.872422456741333, "rewards/rejected": -6.093745231628418, "step": 2277 }, { "epoch": 0.48, "learning_rate": 1.0512605042016809e-05, "logits/chosen": -1.6871998310089111, "logits/rejected": -2.065037727355957, "logps/chosen": -261.9024658203125, "logps/rejected": -354.5792236328125, "loss": 0.2743, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8833125829696655, "rewards/margins": 5.362640380859375, "rewards/rejected": -7.24595308303833, "step": 2278 }, { "epoch": 0.48, "learning_rate": 1.0508403361344539e-05, "logits/chosen": -2.1301095485687256, "logits/rejected": -1.8175932168960571, "logps/chosen": -403.0058288574219, "logps/rejected": -353.320068359375, "loss": 0.2916, "rewards/accuracies": 0.75, "rewards/chosen": -1.5765385627746582, "rewards/margins": 3.7885303497314453, "rewards/rejected": -5.3650689125061035, "step": 2279 }, { "epoch": 0.48, "learning_rate": 1.0504201680672271e-05, "logits/chosen": -2.170663833618164, "logits/rejected": -2.0763487815856934, "logps/chosen": -333.7818603515625, "logps/rejected": -419.4892883300781, "loss": 0.5893, "rewards/accuracies": 0.75, "rewards/chosen": -2.268490791320801, "rewards/margins": 3.5740537643432617, "rewards/rejected": -5.8425445556640625, "step": 2280 }, { "epoch": 0.48, "learning_rate": 1.0500000000000001e-05, "logits/chosen": -2.1273372173309326, "logits/rejected": -1.657572627067566, "logps/chosen": -311.7430725097656, "logps/rejected": -291.7752990722656, "loss": 0.2873, "rewards/accuracies": 0.9375, "rewards/chosen": -2.247150421142578, "rewards/margins": 3.382223606109619, "rewards/rejected": -5.629374027252197, "step": 2281 }, { "epoch": 0.48, "learning_rate": 1.0495798319327733e-05, "logits/chosen": -2.058392286300659, "logits/rejected": -1.9028072357177734, "logps/chosen": -284.7234802246094, "logps/rejected": -387.623046875, "loss": 0.1222, "rewards/accuracies": 1.0, "rewards/chosen": -2.0908210277557373, "rewards/margins": 4.051909446716309, "rewards/rejected": -6.142730712890625, "step": 2282 }, { "epoch": 0.48, "learning_rate": 1.0491596638655463e-05, "logits/chosen": -2.1329362392425537, "logits/rejected": -1.8955204486846924, "logps/chosen": -351.31396484375, "logps/rejected": -315.35736083984375, "loss": 0.3912, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5695321559906006, "rewards/margins": 3.1083269119262695, "rewards/rejected": -5.677859306335449, "step": 2283 }, { "epoch": 0.48, "learning_rate": 1.0487394957983195e-05, "logits/chosen": -2.144345998764038, "logits/rejected": -2.027606964111328, "logps/chosen": -379.93804931640625, "logps/rejected": -350.4498291015625, "loss": 0.4272, "rewards/accuracies": 0.75, "rewards/chosen": -2.058117389678955, "rewards/margins": 2.9958508014678955, "rewards/rejected": -5.05396842956543, "step": 2284 }, { "epoch": 0.48, "learning_rate": 1.0483193277310925e-05, "logits/chosen": -1.8589396476745605, "logits/rejected": -2.1651878356933594, "logps/chosen": -287.7435302734375, "logps/rejected": -346.69329833984375, "loss": 0.3065, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6636154651641846, "rewards/margins": 4.229780197143555, "rewards/rejected": -6.893395900726318, "step": 2285 }, { "epoch": 0.48, "learning_rate": 1.0478991596638657e-05, "logits/chosen": -2.327234983444214, "logits/rejected": -1.6147817373275757, "logps/chosen": -371.5098876953125, "logps/rejected": -308.7040100097656, "loss": 0.7319, "rewards/accuracies": 0.6875, "rewards/chosen": -1.9379379749298096, "rewards/margins": 2.8196470737457275, "rewards/rejected": -4.757585048675537, "step": 2286 }, { "epoch": 0.48, "learning_rate": 1.0474789915966388e-05, "logits/chosen": -2.1534690856933594, "logits/rejected": -1.8361022472381592, "logps/chosen": -385.3453063964844, "logps/rejected": -327.05804443359375, "loss": 0.2885, "rewards/accuracies": 0.9375, "rewards/chosen": -2.141204833984375, "rewards/margins": 2.60811710357666, "rewards/rejected": -4.749321937561035, "step": 2287 }, { "epoch": 0.48, "learning_rate": 1.047058823529412e-05, "logits/chosen": -2.4462528228759766, "logits/rejected": -1.8881711959838867, "logps/chosen": -389.78082275390625, "logps/rejected": -297.2864074707031, "loss": 0.06, "rewards/accuracies": 1.0, "rewards/chosen": -1.5844292640686035, "rewards/margins": 4.194100379943848, "rewards/rejected": -5.778530120849609, "step": 2288 }, { "epoch": 0.48, "learning_rate": 1.046638655462185e-05, "logits/chosen": -2.297259569168091, "logits/rejected": -2.227626085281372, "logps/chosen": -306.6974792480469, "logps/rejected": -374.4692077636719, "loss": 0.5069, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2834997177124023, "rewards/margins": 3.1043922901153564, "rewards/rejected": -5.38789176940918, "step": 2289 }, { "epoch": 0.48, "learning_rate": 1.0462184873949582e-05, "logits/chosen": -2.3599748611450195, "logits/rejected": -1.8623325824737549, "logps/chosen": -448.1184387207031, "logps/rejected": -416.02880859375, "loss": 0.1388, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6405494213104248, "rewards/margins": 3.9979772567749023, "rewards/rejected": -5.638526916503906, "step": 2290 }, { "epoch": 0.48, "learning_rate": 1.0457983193277312e-05, "logits/chosen": -2.436807632446289, "logits/rejected": -1.9875867366790771, "logps/chosen": -398.48077392578125, "logps/rejected": -370.5784606933594, "loss": 0.1847, "rewards/accuracies": 0.875, "rewards/chosen": -1.903550148010254, "rewards/margins": 5.235564231872559, "rewards/rejected": -7.1391143798828125, "step": 2291 }, { "epoch": 0.48, "learning_rate": 1.0453781512605044e-05, "logits/chosen": -2.027940511703491, "logits/rejected": -2.1264381408691406, "logps/chosen": -255.39329528808594, "logps/rejected": -269.36065673828125, "loss": 0.3458, "rewards/accuracies": 0.875, "rewards/chosen": -1.9228711128234863, "rewards/margins": 4.008758068084717, "rewards/rejected": -5.931629180908203, "step": 2292 }, { "epoch": 0.48, "learning_rate": 1.0449579831932774e-05, "logits/chosen": -2.075376272201538, "logits/rejected": -1.755548119544983, "logps/chosen": -257.39459228515625, "logps/rejected": -324.1755065917969, "loss": 0.5122, "rewards/accuracies": 0.8125, "rewards/chosen": -2.1123883724212646, "rewards/margins": 3.1925406455993652, "rewards/rejected": -5.304928779602051, "step": 2293 }, { "epoch": 0.48, "learning_rate": 1.0445378151260506e-05, "logits/chosen": -2.155927896499634, "logits/rejected": -1.897313117980957, "logps/chosen": -472.8931579589844, "logps/rejected": -356.0142517089844, "loss": 0.4165, "rewards/accuracies": 0.9375, "rewards/chosen": -2.771646022796631, "rewards/margins": 3.050260066986084, "rewards/rejected": -5.821905612945557, "step": 2294 }, { "epoch": 0.48, "learning_rate": 1.0441176470588236e-05, "logits/chosen": -2.031141757965088, "logits/rejected": -1.7571543455123901, "logps/chosen": -416.4038391113281, "logps/rejected": -377.3183288574219, "loss": 0.6872, "rewards/accuracies": 0.6875, "rewards/chosen": -3.065910577774048, "rewards/margins": 2.4119224548339844, "rewards/rejected": -5.477832794189453, "step": 2295 }, { "epoch": 0.48, "learning_rate": 1.0436974789915968e-05, "logits/chosen": -1.9982634782791138, "logits/rejected": -2.2248196601867676, "logps/chosen": -394.5055847167969, "logps/rejected": -366.26068115234375, "loss": 0.8573, "rewards/accuracies": 0.6875, "rewards/chosen": -2.1969375610351562, "rewards/margins": 1.4121136665344238, "rewards/rejected": -3.609051465988159, "step": 2296 }, { "epoch": 0.48, "learning_rate": 1.0432773109243698e-05, "logits/chosen": -2.087254524230957, "logits/rejected": -2.105206251144409, "logps/chosen": -257.530517578125, "logps/rejected": -392.151123046875, "loss": 0.5559, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3972482681274414, "rewards/margins": 3.5129787921905518, "rewards/rejected": -5.910226821899414, "step": 2297 }, { "epoch": 0.48, "learning_rate": 1.042857142857143e-05, "logits/chosen": -1.8594083786010742, "logits/rejected": -1.8784559965133667, "logps/chosen": -216.34341430664062, "logps/rejected": -239.94049072265625, "loss": 0.4017, "rewards/accuracies": 0.625, "rewards/chosen": -2.8850667476654053, "rewards/margins": 2.38800311088562, "rewards/rejected": -5.273069858551025, "step": 2298 }, { "epoch": 0.48, "learning_rate": 1.0424369747899162e-05, "logits/chosen": -2.3612325191497803, "logits/rejected": -1.8503270149230957, "logps/chosen": -351.280029296875, "logps/rejected": -323.8396301269531, "loss": 0.1901, "rewards/accuracies": 0.9375, "rewards/chosen": -2.224998950958252, "rewards/margins": 4.165541648864746, "rewards/rejected": -6.390540599822998, "step": 2299 }, { "epoch": 0.48, "learning_rate": 1.0420168067226892e-05, "logits/chosen": -2.148956060409546, "logits/rejected": -2.082418918609619, "logps/chosen": -266.72198486328125, "logps/rejected": -284.8290100097656, "loss": 0.6362, "rewards/accuracies": 0.8125, "rewards/chosen": -2.545670986175537, "rewards/margins": 2.190056800842285, "rewards/rejected": -4.735727310180664, "step": 2300 }, { "epoch": 0.48, "learning_rate": 1.0415966386554624e-05, "logits/chosen": -1.9479386806488037, "logits/rejected": -1.770588755607605, "logps/chosen": -258.39300537109375, "logps/rejected": -289.5184326171875, "loss": 0.3128, "rewards/accuracies": 0.8125, "rewards/chosen": -2.543332576751709, "rewards/margins": 4.3417229652404785, "rewards/rejected": -6.885054588317871, "step": 2301 }, { "epoch": 0.48, "learning_rate": 1.0411764705882354e-05, "logits/chosen": -2.016252279281616, "logits/rejected": -1.8760995864868164, "logps/chosen": -285.0805969238281, "logps/rejected": -336.7353515625, "loss": 0.622, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7174577713012695, "rewards/margins": 4.859967231750488, "rewards/rejected": -7.577424049377441, "step": 2302 }, { "epoch": 0.48, "learning_rate": 1.0407563025210086e-05, "logits/chosen": -2.206784248352051, "logits/rejected": -2.0356056690216064, "logps/chosen": -277.21630859375, "logps/rejected": -303.509521484375, "loss": 0.2285, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6532747745513916, "rewards/margins": 4.461917877197266, "rewards/rejected": -6.115192413330078, "step": 2303 }, { "epoch": 0.48, "learning_rate": 1.0403361344537817e-05, "logits/chosen": -1.9960033893585205, "logits/rejected": -2.025233745574951, "logps/chosen": -308.00921630859375, "logps/rejected": -373.82086181640625, "loss": 0.2264, "rewards/accuracies": 0.8125, "rewards/chosen": -1.995784044265747, "rewards/margins": 4.461019992828369, "rewards/rejected": -6.456804275512695, "step": 2304 }, { "epoch": 0.48, "learning_rate": 1.0399159663865548e-05, "logits/chosen": -2.2696568965911865, "logits/rejected": -1.901287317276001, "logps/chosen": -333.8496398925781, "logps/rejected": -328.36859130859375, "loss": 0.5239, "rewards/accuracies": 0.875, "rewards/chosen": -3.70107102394104, "rewards/margins": 3.6228084564208984, "rewards/rejected": -7.323879241943359, "step": 2305 }, { "epoch": 0.48, "learning_rate": 1.0394957983193279e-05, "logits/chosen": -2.204970598220825, "logits/rejected": -1.9253250360488892, "logps/chosen": -299.41058349609375, "logps/rejected": -332.7149963378906, "loss": 0.2594, "rewards/accuracies": 0.8125, "rewards/chosen": -1.5997343063354492, "rewards/margins": 4.068821907043457, "rewards/rejected": -5.668556213378906, "step": 2306 }, { "epoch": 0.48, "learning_rate": 1.039075630252101e-05, "logits/chosen": -2.3570010662078857, "logits/rejected": -2.1713829040527344, "logps/chosen": -420.0768127441406, "logps/rejected": -391.6488342285156, "loss": 0.5355, "rewards/accuracies": 0.875, "rewards/chosen": -2.025796890258789, "rewards/margins": 3.06980037689209, "rewards/rejected": -5.095596790313721, "step": 2307 }, { "epoch": 0.48, "learning_rate": 1.038655462184874e-05, "logits/chosen": -2.2435479164123535, "logits/rejected": -1.6122558116912842, "logps/chosen": -367.30340576171875, "logps/rejected": -310.99847412109375, "loss": 0.1822, "rewards/accuracies": 0.9375, "rewards/chosen": -2.503770351409912, "rewards/margins": 5.666388511657715, "rewards/rejected": -8.170159339904785, "step": 2308 }, { "epoch": 0.48, "learning_rate": 1.0382352941176473e-05, "logits/chosen": -2.30468487739563, "logits/rejected": -1.940298318862915, "logps/chosen": -279.4072570800781, "logps/rejected": -334.99066162109375, "loss": 0.3656, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3309428691864014, "rewards/margins": 2.286120653152466, "rewards/rejected": -4.617063522338867, "step": 2309 }, { "epoch": 0.48, "learning_rate": 1.0378151260504203e-05, "logits/chosen": -2.3532700538635254, "logits/rejected": -1.9994531869888306, "logps/chosen": -397.7611999511719, "logps/rejected": -353.3001708984375, "loss": 0.5001, "rewards/accuracies": 0.8125, "rewards/chosen": -2.361623764038086, "rewards/margins": 3.895838975906372, "rewards/rejected": -6.257462978363037, "step": 2310 }, { "epoch": 0.48, "learning_rate": 1.0373949579831935e-05, "logits/chosen": -1.7401371002197266, "logits/rejected": -1.6821308135986328, "logps/chosen": -262.25726318359375, "logps/rejected": -362.7687072753906, "loss": 0.5544, "rewards/accuracies": 0.8125, "rewards/chosen": -3.5420703887939453, "rewards/margins": 2.6672356128692627, "rewards/rejected": -6.209306240081787, "step": 2311 }, { "epoch": 0.48, "learning_rate": 1.0369747899159665e-05, "logits/chosen": -2.186805009841919, "logits/rejected": -1.8599371910095215, "logps/chosen": -311.45953369140625, "logps/rejected": -243.93914794921875, "loss": 0.0586, "rewards/accuracies": 1.0, "rewards/chosen": -1.5152578353881836, "rewards/margins": 4.713910102844238, "rewards/rejected": -6.229167938232422, "step": 2312 }, { "epoch": 0.48, "learning_rate": 1.0365546218487397e-05, "logits/chosen": -2.1323211193084717, "logits/rejected": -1.7854714393615723, "logps/chosen": -274.37762451171875, "logps/rejected": -263.7120361328125, "loss": 0.5824, "rewards/accuracies": 0.8125, "rewards/chosen": -2.362654209136963, "rewards/margins": 2.8587679862976074, "rewards/rejected": -5.22142219543457, "step": 2313 }, { "epoch": 0.48, "learning_rate": 1.0361344537815127e-05, "logits/chosen": -2.2310009002685547, "logits/rejected": -1.94380521774292, "logps/chosen": -296.125, "logps/rejected": -303.37823486328125, "loss": 0.1717, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1863696575164795, "rewards/margins": 4.0852813720703125, "rewards/rejected": -6.271651268005371, "step": 2314 }, { "epoch": 0.48, "learning_rate": 1.0357142857142859e-05, "logits/chosen": -2.3897182941436768, "logits/rejected": -2.312070369720459, "logps/chosen": -285.4508056640625, "logps/rejected": -342.91778564453125, "loss": 0.4388, "rewards/accuracies": 0.75, "rewards/chosen": -2.359724521636963, "rewards/margins": 2.9359967708587646, "rewards/rejected": -5.295721530914307, "step": 2315 }, { "epoch": 0.48, "learning_rate": 1.035294117647059e-05, "logits/chosen": -2.118828058242798, "logits/rejected": -1.9076753854751587, "logps/chosen": -442.66815185546875, "logps/rejected": -374.44476318359375, "loss": 0.4159, "rewards/accuracies": 0.875, "rewards/chosen": -1.5280684232711792, "rewards/margins": 4.460971832275391, "rewards/rejected": -5.989039897918701, "step": 2316 }, { "epoch": 0.48, "learning_rate": 1.0348739495798321e-05, "logits/chosen": -2.4222981929779053, "logits/rejected": -1.9140920639038086, "logps/chosen": -398.29254150390625, "logps/rejected": -323.85986328125, "loss": 0.214, "rewards/accuracies": 0.875, "rewards/chosen": -1.780364990234375, "rewards/margins": 4.140762805938721, "rewards/rejected": -5.921127796173096, "step": 2317 }, { "epoch": 0.48, "learning_rate": 1.0344537815126051e-05, "logits/chosen": -2.24556827545166, "logits/rejected": -2.1300268173217773, "logps/chosen": -282.3939208984375, "logps/rejected": -274.1551513671875, "loss": 0.7544, "rewards/accuracies": 0.625, "rewards/chosen": -2.9049735069274902, "rewards/margins": 1.473496437072754, "rewards/rejected": -4.378469467163086, "step": 2318 }, { "epoch": 0.49, "learning_rate": 1.0340336134453783e-05, "logits/chosen": -1.9689371585845947, "logits/rejected": -2.0177853107452393, "logps/chosen": -363.2492980957031, "logps/rejected": -509.2076416015625, "loss": 0.1934, "rewards/accuracies": 0.9375, "rewards/chosen": -2.043694019317627, "rewards/margins": 5.898947238922119, "rewards/rejected": -7.942641258239746, "step": 2319 }, { "epoch": 0.49, "learning_rate": 1.0336134453781514e-05, "logits/chosen": -2.285996913909912, "logits/rejected": -2.084526300430298, "logps/chosen": -283.24053955078125, "logps/rejected": -276.7225036621094, "loss": 0.3824, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2471442222595215, "rewards/margins": 3.2939157485961914, "rewards/rejected": -5.541059494018555, "step": 2320 }, { "epoch": 0.49, "learning_rate": 1.0331932773109246e-05, "logits/chosen": -2.0119547843933105, "logits/rejected": -2.0724170207977295, "logps/chosen": -323.9012145996094, "logps/rejected": -377.12176513671875, "loss": 0.1381, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7072409391403198, "rewards/margins": 5.127521991729736, "rewards/rejected": -6.834763050079346, "step": 2321 }, { "epoch": 0.49, "learning_rate": 1.0327731092436977e-05, "logits/chosen": -1.9772799015045166, "logits/rejected": -2.087378978729248, "logps/chosen": -306.2851257324219, "logps/rejected": -336.35595703125, "loss": 0.1832, "rewards/accuracies": 1.0, "rewards/chosen": -2.317422866821289, "rewards/margins": 5.268842697143555, "rewards/rejected": -7.586265563964844, "step": 2322 }, { "epoch": 0.49, "learning_rate": 1.0323529411764708e-05, "logits/chosen": -2.3413643836975098, "logits/rejected": -2.0181195735931396, "logps/chosen": -473.9027099609375, "logps/rejected": -438.0572814941406, "loss": 0.1525, "rewards/accuracies": 1.0, "rewards/chosen": -2.5069773197174072, "rewards/margins": 3.889072895050049, "rewards/rejected": -6.396050453186035, "step": 2323 }, { "epoch": 0.49, "learning_rate": 1.031932773109244e-05, "logits/chosen": -2.1342389583587646, "logits/rejected": -1.8100658655166626, "logps/chosen": -340.0462341308594, "logps/rejected": -333.6764831542969, "loss": 0.2793, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2039098739624023, "rewards/margins": 4.550719738006592, "rewards/rejected": -6.754630088806152, "step": 2324 }, { "epoch": 0.49, "learning_rate": 1.031512605042017e-05, "logits/chosen": -2.097410202026367, "logits/rejected": -1.6221643686294556, "logps/chosen": -324.7098388671875, "logps/rejected": -355.258056640625, "loss": 0.1827, "rewards/accuracies": 0.875, "rewards/chosen": -2.1750595569610596, "rewards/margins": 3.7759718894958496, "rewards/rejected": -5.951031684875488, "step": 2325 }, { "epoch": 0.49, "learning_rate": 1.0310924369747898e-05, "logits/chosen": -2.276090145111084, "logits/rejected": -1.5418732166290283, "logps/chosen": -333.2148742675781, "logps/rejected": -309.28741455078125, "loss": 0.5162, "rewards/accuracies": 0.625, "rewards/chosen": -2.39286470413208, "rewards/margins": 3.407494068145752, "rewards/rejected": -5.80035924911499, "step": 2326 }, { "epoch": 0.49, "learning_rate": 1.030672268907563e-05, "logits/chosen": -1.9552195072174072, "logits/rejected": -2.0105459690093994, "logps/chosen": -247.01927185058594, "logps/rejected": -273.3602294921875, "loss": 0.1229, "rewards/accuracies": 1.0, "rewards/chosen": -2.407646417617798, "rewards/margins": 3.6120429039001465, "rewards/rejected": -6.019689083099365, "step": 2327 }, { "epoch": 0.49, "learning_rate": 1.030252100840336e-05, "logits/chosen": -2.1548757553100586, "logits/rejected": -1.6838428974151611, "logps/chosen": -279.60491943359375, "logps/rejected": -316.785400390625, "loss": 0.1947, "rewards/accuracies": 0.875, "rewards/chosen": -2.594866991043091, "rewards/margins": 4.489614009857178, "rewards/rejected": -7.0844807624816895, "step": 2328 }, { "epoch": 0.49, "learning_rate": 1.0298319327731092e-05, "logits/chosen": -2.328587532043457, "logits/rejected": -1.908862829208374, "logps/chosen": -465.23431396484375, "logps/rejected": -378.6368408203125, "loss": 0.2821, "rewards/accuracies": 0.875, "rewards/chosen": -1.7628517150878906, "rewards/margins": 3.3254811763763428, "rewards/rejected": -5.0883331298828125, "step": 2329 }, { "epoch": 0.49, "learning_rate": 1.0294117647058823e-05, "logits/chosen": -1.8744580745697021, "logits/rejected": -1.6908175945281982, "logps/chosen": -259.7429504394531, "logps/rejected": -254.2803955078125, "loss": 0.1529, "rewards/accuracies": 0.9375, "rewards/chosen": -1.686598777770996, "rewards/margins": 4.597958087921143, "rewards/rejected": -6.284556865692139, "step": 2330 }, { "epoch": 0.49, "learning_rate": 1.0289915966386555e-05, "logits/chosen": -2.311937093734741, "logits/rejected": -1.9439003467559814, "logps/chosen": -519.1214599609375, "logps/rejected": -501.08642578125, "loss": 0.2057, "rewards/accuracies": 0.8125, "rewards/chosen": -1.2204725742340088, "rewards/margins": 5.467904567718506, "rewards/rejected": -6.688377380371094, "step": 2331 }, { "epoch": 0.49, "learning_rate": 1.0285714285714285e-05, "logits/chosen": -2.255858898162842, "logits/rejected": -1.896178960800171, "logps/chosen": -387.4921569824219, "logps/rejected": -328.13751220703125, "loss": 0.0929, "rewards/accuracies": 1.0, "rewards/chosen": -1.8484928607940674, "rewards/margins": 5.108133316040039, "rewards/rejected": -6.956626892089844, "step": 2332 }, { "epoch": 0.49, "learning_rate": 1.0281512605042017e-05, "logits/chosen": -2.0751709938049316, "logits/rejected": -2.004098892211914, "logps/chosen": -294.5953063964844, "logps/rejected": -336.47259521484375, "loss": 0.2154, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6762757301330566, "rewards/margins": 5.432332515716553, "rewards/rejected": -8.10860824584961, "step": 2333 }, { "epoch": 0.49, "learning_rate": 1.0277310924369749e-05, "logits/chosen": -2.1748366355895996, "logits/rejected": -2.142876148223877, "logps/chosen": -260.00775146484375, "logps/rejected": -341.19873046875, "loss": 0.1294, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1166110038757324, "rewards/margins": 3.6506690979003906, "rewards/rejected": -5.767280101776123, "step": 2334 }, { "epoch": 0.49, "learning_rate": 1.0273109243697479e-05, "logits/chosen": -2.229928731918335, "logits/rejected": -2.0559520721435547, "logps/chosen": -413.1923828125, "logps/rejected": -398.8578186035156, "loss": 0.0944, "rewards/accuracies": 1.0, "rewards/chosen": -1.347983479499817, "rewards/margins": 3.7781519889831543, "rewards/rejected": -5.126135349273682, "step": 2335 }, { "epoch": 0.49, "learning_rate": 1.026890756302521e-05, "logits/chosen": -2.4388957023620605, "logits/rejected": -2.0972840785980225, "logps/chosen": -280.9982604980469, "logps/rejected": -329.42138671875, "loss": 0.1405, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2944209575653076, "rewards/margins": 4.757044792175293, "rewards/rejected": -7.05146598815918, "step": 2336 }, { "epoch": 0.49, "learning_rate": 1.0264705882352941e-05, "logits/chosen": -2.203840732574463, "logits/rejected": -1.7752699851989746, "logps/chosen": -269.9710998535156, "logps/rejected": -273.66229248046875, "loss": 0.212, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0475072860717773, "rewards/margins": 4.2801594734191895, "rewards/rejected": -6.327666759490967, "step": 2337 }, { "epoch": 0.49, "learning_rate": 1.0260504201680673e-05, "logits/chosen": -2.216181755065918, "logits/rejected": -2.0790791511535645, "logps/chosen": -254.6668701171875, "logps/rejected": -300.34228515625, "loss": 0.1406, "rewards/accuracies": 1.0, "rewards/chosen": -2.469614028930664, "rewards/margins": 4.0897650718688965, "rewards/rejected": -6.5593791007995605, "step": 2338 }, { "epoch": 0.49, "learning_rate": 1.0256302521008403e-05, "logits/chosen": -2.079301595687866, "logits/rejected": -1.7495542764663696, "logps/chosen": -327.1336669921875, "logps/rejected": -342.5215148925781, "loss": 0.263, "rewards/accuracies": 0.875, "rewards/chosen": -2.7037224769592285, "rewards/margins": 4.537293910980225, "rewards/rejected": -7.241016387939453, "step": 2339 }, { "epoch": 0.49, "learning_rate": 1.0252100840336135e-05, "logits/chosen": -1.8507493734359741, "logits/rejected": -2.2964813709259033, "logps/chosen": -217.77003479003906, "logps/rejected": -343.0801086425781, "loss": 0.383, "rewards/accuracies": 0.8125, "rewards/chosen": -3.268247604370117, "rewards/margins": 4.268898010253906, "rewards/rejected": -7.537145137786865, "step": 2340 }, { "epoch": 0.49, "learning_rate": 1.0247899159663865e-05, "logits/chosen": -2.00114369392395, "logits/rejected": -2.044537305831909, "logps/chosen": -411.5748291015625, "logps/rejected": -463.3763427734375, "loss": 0.7643, "rewards/accuracies": 0.75, "rewards/chosen": -3.153486967086792, "rewards/margins": 2.3865609169006348, "rewards/rejected": -5.540047645568848, "step": 2341 }, { "epoch": 0.49, "learning_rate": 1.0243697478991597e-05, "logits/chosen": -2.0533995628356934, "logits/rejected": -1.9718629121780396, "logps/chosen": -292.7917785644531, "logps/rejected": -277.1968994140625, "loss": 0.1032, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5260417461395264, "rewards/margins": 5.376851558685303, "rewards/rejected": -7.902894020080566, "step": 2342 }, { "epoch": 0.49, "learning_rate": 1.0239495798319327e-05, "logits/chosen": -2.1429636478424072, "logits/rejected": -1.9796146154403687, "logps/chosen": -263.4094543457031, "logps/rejected": -260.13861083984375, "loss": 0.2047, "rewards/accuracies": 0.9375, "rewards/chosen": -3.481532096862793, "rewards/margins": 3.8358161449432373, "rewards/rejected": -7.317348480224609, "step": 2343 }, { "epoch": 0.49, "learning_rate": 1.023529411764706e-05, "logits/chosen": -2.1428537368774414, "logits/rejected": -2.1778624057769775, "logps/chosen": -208.9241943359375, "logps/rejected": -296.50360107421875, "loss": 0.213, "rewards/accuracies": 0.8125, "rewards/chosen": -2.914829730987549, "rewards/margins": 3.4774906635284424, "rewards/rejected": -6.392320156097412, "step": 2344 }, { "epoch": 0.49, "learning_rate": 1.023109243697479e-05, "logits/chosen": -2.118650197982788, "logits/rejected": -1.8937420845031738, "logps/chosen": -427.9129638671875, "logps/rejected": -392.86444091796875, "loss": 0.1552, "rewards/accuracies": 0.9375, "rewards/chosen": -2.171908378601074, "rewards/margins": 4.405630588531494, "rewards/rejected": -6.57753849029541, "step": 2345 }, { "epoch": 0.49, "learning_rate": 1.0226890756302521e-05, "logits/chosen": -2.2222931385040283, "logits/rejected": -2.1161084175109863, "logps/chosen": -354.2533874511719, "logps/rejected": -398.53717041015625, "loss": 0.3485, "rewards/accuracies": 0.875, "rewards/chosen": -2.480360984802246, "rewards/margins": 4.525232315063477, "rewards/rejected": -7.005593299865723, "step": 2346 }, { "epoch": 0.49, "learning_rate": 1.0222689075630252e-05, "logits/chosen": -2.2023239135742188, "logits/rejected": -1.9762842655181885, "logps/chosen": -365.4632873535156, "logps/rejected": -401.91949462890625, "loss": 0.2193, "rewards/accuracies": 0.9375, "rewards/chosen": -2.385115385055542, "rewards/margins": 5.419905662536621, "rewards/rejected": -7.805021286010742, "step": 2347 }, { "epoch": 0.49, "learning_rate": 1.0218487394957984e-05, "logits/chosen": -2.197899580001831, "logits/rejected": -1.7755745649337769, "logps/chosen": -318.3700256347656, "logps/rejected": -301.67254638671875, "loss": 0.2818, "rewards/accuracies": 0.875, "rewards/chosen": -2.0880095958709717, "rewards/margins": 4.677926540374756, "rewards/rejected": -6.765935897827148, "step": 2348 }, { "epoch": 0.49, "learning_rate": 1.0214285714285714e-05, "logits/chosen": -2.0928783416748047, "logits/rejected": -2.080960750579834, "logps/chosen": -324.05609130859375, "logps/rejected": -317.9915771484375, "loss": 0.2068, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9530079364776611, "rewards/margins": 3.729494094848633, "rewards/rejected": -5.682502269744873, "step": 2349 }, { "epoch": 0.49, "learning_rate": 1.0210084033613446e-05, "logits/chosen": -2.520092487335205, "logits/rejected": -1.9951176643371582, "logps/chosen": -390.976806640625, "logps/rejected": -354.14837646484375, "loss": 0.2037, "rewards/accuracies": 0.875, "rewards/chosen": -2.3681273460388184, "rewards/margins": 4.0330810546875, "rewards/rejected": -6.401208877563477, "step": 2350 }, { "epoch": 0.49, "learning_rate": 1.0205882352941176e-05, "logits/chosen": -2.048600673675537, "logits/rejected": -2.1942248344421387, "logps/chosen": -257.93670654296875, "logps/rejected": -339.9472351074219, "loss": 0.1832, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5696909427642822, "rewards/margins": 4.439303398132324, "rewards/rejected": -7.008994102478027, "step": 2351 }, { "epoch": 0.49, "learning_rate": 1.0201680672268908e-05, "logits/chosen": -2.0361030101776123, "logits/rejected": -1.8283741474151611, "logps/chosen": -344.7386474609375, "logps/rejected": -320.3345642089844, "loss": 0.3664, "rewards/accuracies": 0.75, "rewards/chosen": -3.239349603652954, "rewards/margins": 3.6690733432769775, "rewards/rejected": -6.908422470092773, "step": 2352 }, { "epoch": 0.49, "learning_rate": 1.0197478991596638e-05, "logits/chosen": -2.393639087677002, "logits/rejected": -2.444275140762329, "logps/chosen": -348.50897216796875, "logps/rejected": -350.375732421875, "loss": 0.4295, "rewards/accuracies": 0.8125, "rewards/chosen": -2.299044370651245, "rewards/margins": 4.70559549331665, "rewards/rejected": -7.004640579223633, "step": 2353 }, { "epoch": 0.49, "learning_rate": 1.019327731092437e-05, "logits/chosen": -2.3311803340911865, "logits/rejected": -2.0447452068328857, "logps/chosen": -217.54718017578125, "logps/rejected": -220.82308959960938, "loss": 0.3582, "rewards/accuracies": 0.8125, "rewards/chosen": -2.995664119720459, "rewards/margins": 2.9903500080108643, "rewards/rejected": -5.986014366149902, "step": 2354 }, { "epoch": 0.49, "learning_rate": 1.01890756302521e-05, "logits/chosen": -2.2401108741760254, "logits/rejected": -1.6294881105422974, "logps/chosen": -312.89501953125, "logps/rejected": -274.7790222167969, "loss": 0.3417, "rewards/accuracies": 0.875, "rewards/chosen": -2.852980375289917, "rewards/margins": 4.437831401824951, "rewards/rejected": -7.2908124923706055, "step": 2355 }, { "epoch": 0.49, "learning_rate": 1.0184873949579832e-05, "logits/chosen": -2.128582239151001, "logits/rejected": -1.9758517742156982, "logps/chosen": -340.9329528808594, "logps/rejected": -414.89337158203125, "loss": 0.1756, "rewards/accuracies": 0.9375, "rewards/chosen": -3.3178863525390625, "rewards/margins": 4.384424209594727, "rewards/rejected": -7.702310562133789, "step": 2356 }, { "epoch": 0.49, "learning_rate": 1.0180672268907564e-05, "logits/chosen": -2.375614643096924, "logits/rejected": -1.9644136428833008, "logps/chosen": -381.69610595703125, "logps/rejected": -390.6536865234375, "loss": 0.2455, "rewards/accuracies": 0.875, "rewards/chosen": -2.174661159515381, "rewards/margins": 5.049365520477295, "rewards/rejected": -7.224026679992676, "step": 2357 }, { "epoch": 0.49, "learning_rate": 1.0176470588235294e-05, "logits/chosen": -2.0989181995391846, "logits/rejected": -2.069923162460327, "logps/chosen": -377.942138671875, "logps/rejected": -343.534423828125, "loss": 0.4286, "rewards/accuracies": 0.75, "rewards/chosen": -3.2267632484436035, "rewards/margins": 3.6504411697387695, "rewards/rejected": -6.877203941345215, "step": 2358 }, { "epoch": 0.49, "learning_rate": 1.0172268907563026e-05, "logits/chosen": -2.230153799057007, "logits/rejected": -1.7753021717071533, "logps/chosen": -451.0483093261719, "logps/rejected": -409.6219482421875, "loss": 0.2368, "rewards/accuracies": 0.875, "rewards/chosen": -2.4202916622161865, "rewards/margins": 4.739964485168457, "rewards/rejected": -7.160256385803223, "step": 2359 }, { "epoch": 0.49, "learning_rate": 1.0168067226890756e-05, "logits/chosen": -2.0516109466552734, "logits/rejected": -2.4862051010131836, "logps/chosen": -180.43333435058594, "logps/rejected": -306.3184814453125, "loss": 0.1284, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2275238037109375, "rewards/margins": 6.064441680908203, "rewards/rejected": -9.29196548461914, "step": 2360 }, { "epoch": 0.49, "learning_rate": 1.0163865546218488e-05, "logits/chosen": -2.2121572494506836, "logits/rejected": -2.1386008262634277, "logps/chosen": -429.6708984375, "logps/rejected": -409.91845703125, "loss": 0.1757, "rewards/accuracies": 0.9375, "rewards/chosen": -2.672609329223633, "rewards/margins": 2.929409980773926, "rewards/rejected": -5.602019309997559, "step": 2361 }, { "epoch": 0.49, "learning_rate": 1.0159663865546219e-05, "logits/chosen": -2.1723341941833496, "logits/rejected": -1.3779733180999756, "logps/chosen": -353.0877685546875, "logps/rejected": -269.4619445800781, "loss": 0.2653, "rewards/accuracies": 0.875, "rewards/chosen": -3.0441975593566895, "rewards/margins": 2.9195477962493896, "rewards/rejected": -5.9637451171875, "step": 2362 }, { "epoch": 0.49, "learning_rate": 1.015546218487395e-05, "logits/chosen": -1.7376939058303833, "logits/rejected": -1.6737254858016968, "logps/chosen": -310.61724853515625, "logps/rejected": -336.97760009765625, "loss": 0.5457, "rewards/accuracies": 0.8125, "rewards/chosen": -4.106523513793945, "rewards/margins": 3.6526708602905273, "rewards/rejected": -7.759194850921631, "step": 2363 }, { "epoch": 0.49, "learning_rate": 1.015126050420168e-05, "logits/chosen": -2.2258591651916504, "logits/rejected": -1.9771018028259277, "logps/chosen": -266.7381591796875, "logps/rejected": -343.21588134765625, "loss": 0.4157, "rewards/accuracies": 0.875, "rewards/chosen": -3.445730209350586, "rewards/margins": 4.628357887268066, "rewards/rejected": -8.074089050292969, "step": 2364 }, { "epoch": 0.49, "learning_rate": 1.0147058823529413e-05, "logits/chosen": -1.917832851409912, "logits/rejected": -1.7805628776550293, "logps/chosen": -422.8464660644531, "logps/rejected": -349.28375244140625, "loss": 0.1705, "rewards/accuracies": 0.875, "rewards/chosen": -2.3043696880340576, "rewards/margins": 3.801083564758301, "rewards/rejected": -6.1054534912109375, "step": 2365 }, { "epoch": 0.49, "learning_rate": 1.0142857142857143e-05, "logits/chosen": -1.8564013242721558, "logits/rejected": -2.225888729095459, "logps/chosen": -391.9482421875, "logps/rejected": -558.7158813476562, "loss": 0.1541, "rewards/accuracies": 0.9375, "rewards/chosen": -2.211973190307617, "rewards/margins": 4.239522933959961, "rewards/rejected": -6.451496124267578, "step": 2366 }, { "epoch": 0.5, "learning_rate": 1.0138655462184875e-05, "logits/chosen": -1.974426031112671, "logits/rejected": -2.100161552429199, "logps/chosen": -262.3293151855469, "logps/rejected": -331.90679931640625, "loss": 0.6789, "rewards/accuracies": 0.8125, "rewards/chosen": -3.795732259750366, "rewards/margins": 2.055051326751709, "rewards/rejected": -5.850783348083496, "step": 2367 }, { "epoch": 0.5, "learning_rate": 1.0134453781512605e-05, "logits/chosen": -2.2483503818511963, "logits/rejected": -2.062711000442505, "logps/chosen": -377.90899658203125, "logps/rejected": -348.4140625, "loss": 0.2545, "rewards/accuracies": 0.875, "rewards/chosen": -2.9506871700286865, "rewards/margins": 4.971905708312988, "rewards/rejected": -7.922593116760254, "step": 2368 }, { "epoch": 0.5, "learning_rate": 1.0130252100840337e-05, "logits/chosen": -2.264641284942627, "logits/rejected": -1.7713286876678467, "logps/chosen": -318.753662109375, "logps/rejected": -387.6985168457031, "loss": 0.2859, "rewards/accuracies": 0.875, "rewards/chosen": -3.798366069793701, "rewards/margins": 3.808821201324463, "rewards/rejected": -7.607187271118164, "step": 2369 }, { "epoch": 0.5, "learning_rate": 1.0126050420168067e-05, "logits/chosen": -2.0879833698272705, "logits/rejected": -2.022408962249756, "logps/chosen": -465.34576416015625, "logps/rejected": -435.58941650390625, "loss": 0.3542, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2982735633850098, "rewards/margins": 4.443699836730957, "rewards/rejected": -6.74197244644165, "step": 2370 }, { "epoch": 0.5, "learning_rate": 1.0121848739495799e-05, "logits/chosen": -2.0189285278320312, "logits/rejected": -1.9815878868103027, "logps/chosen": -272.43585205078125, "logps/rejected": -390.76904296875, "loss": 0.7259, "rewards/accuracies": 0.6875, "rewards/chosen": -3.639523506164551, "rewards/margins": 3.0126352310180664, "rewards/rejected": -6.652159214019775, "step": 2371 }, { "epoch": 0.5, "learning_rate": 1.011764705882353e-05, "logits/chosen": -2.331763744354248, "logits/rejected": -2.1646156311035156, "logps/chosen": -442.7057800292969, "logps/rejected": -377.1627502441406, "loss": 0.1682, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6277990341186523, "rewards/margins": 4.27970027923584, "rewards/rejected": -6.907499313354492, "step": 2372 }, { "epoch": 0.5, "learning_rate": 1.0113445378151261e-05, "logits/chosen": -2.412088394165039, "logits/rejected": -1.8948307037353516, "logps/chosen": -382.93829345703125, "logps/rejected": -317.02227783203125, "loss": 0.3936, "rewards/accuracies": 0.8125, "rewards/chosen": -2.954725742340088, "rewards/margins": 2.828511953353882, "rewards/rejected": -5.783237457275391, "step": 2373 }, { "epoch": 0.5, "learning_rate": 1.0109243697478991e-05, "logits/chosen": -2.207195281982422, "logits/rejected": -1.9755818843841553, "logps/chosen": -375.7664794921875, "logps/rejected": -380.745849609375, "loss": 0.2122, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8473095893859863, "rewards/margins": 4.760699272155762, "rewards/rejected": -7.60800838470459, "step": 2374 }, { "epoch": 0.5, "learning_rate": 1.0105042016806723e-05, "logits/chosen": -2.118767023086548, "logits/rejected": -1.8216348886489868, "logps/chosen": -339.97943115234375, "logps/rejected": -304.74688720703125, "loss": 0.1393, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7438158988952637, "rewards/margins": 4.913661479949951, "rewards/rejected": -7.657476425170898, "step": 2375 }, { "epoch": 0.5, "learning_rate": 1.0100840336134453e-05, "logits/chosen": -2.537324905395508, "logits/rejected": -2.022660732269287, "logps/chosen": -383.07586669921875, "logps/rejected": -332.684814453125, "loss": 0.1091, "rewards/accuracies": 1.0, "rewards/chosen": -3.3256542682647705, "rewards/margins": 4.027649879455566, "rewards/rejected": -7.353304386138916, "step": 2376 }, { "epoch": 0.5, "learning_rate": 1.0096638655462185e-05, "logits/chosen": -2.3195064067840576, "logits/rejected": -2.185446262359619, "logps/chosen": -424.9019775390625, "logps/rejected": -502.19598388671875, "loss": 0.3324, "rewards/accuracies": 0.875, "rewards/chosen": -3.273162841796875, "rewards/margins": 3.496093511581421, "rewards/rejected": -6.769256591796875, "step": 2377 }, { "epoch": 0.5, "learning_rate": 1.0092436974789917e-05, "logits/chosen": -2.441375970840454, "logits/rejected": -1.9123210906982422, "logps/chosen": -344.749755859375, "logps/rejected": -268.8642883300781, "loss": 0.0874, "rewards/accuracies": 1.0, "rewards/chosen": -2.3219001293182373, "rewards/margins": 4.846903324127197, "rewards/rejected": -7.168804168701172, "step": 2378 }, { "epoch": 0.5, "learning_rate": 1.0088235294117648e-05, "logits/chosen": -1.9964544773101807, "logits/rejected": -1.843702793121338, "logps/chosen": -328.4271545410156, "logps/rejected": -388.16998291015625, "loss": 0.3413, "rewards/accuracies": 0.875, "rewards/chosen": -3.2889223098754883, "rewards/margins": 4.009631156921387, "rewards/rejected": -7.298553466796875, "step": 2379 }, { "epoch": 0.5, "learning_rate": 1.008403361344538e-05, "logits/chosen": -1.9196803569793701, "logits/rejected": -1.5918848514556885, "logps/chosen": -411.455322265625, "logps/rejected": -426.3360595703125, "loss": 0.1504, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9419288635253906, "rewards/margins": 4.285278797149658, "rewards/rejected": -7.227207660675049, "step": 2380 }, { "epoch": 0.5, "learning_rate": 1.007983193277311e-05, "logits/chosen": -2.0860230922698975, "logits/rejected": -2.2899765968322754, "logps/chosen": -255.71780395507812, "logps/rejected": -313.4264831542969, "loss": 0.2851, "rewards/accuracies": 0.875, "rewards/chosen": -2.593266248703003, "rewards/margins": 4.350472450256348, "rewards/rejected": -6.943737983703613, "step": 2381 }, { "epoch": 0.5, "learning_rate": 1.0075630252100842e-05, "logits/chosen": -2.2322916984558105, "logits/rejected": -1.991105079650879, "logps/chosen": -347.2756652832031, "logps/rejected": -334.885498046875, "loss": 0.3435, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1120400428771973, "rewards/margins": 3.6266214847564697, "rewards/rejected": -6.738661766052246, "step": 2382 }, { "epoch": 0.5, "learning_rate": 1.0071428571428572e-05, "logits/chosen": -2.3625547885894775, "logits/rejected": -2.2327795028686523, "logps/chosen": -297.80084228515625, "logps/rejected": -400.97515869140625, "loss": 0.3024, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8820149898529053, "rewards/margins": 4.671796798706055, "rewards/rejected": -7.553812026977539, "step": 2383 }, { "epoch": 0.5, "learning_rate": 1.0067226890756304e-05, "logits/chosen": -2.1526286602020264, "logits/rejected": -2.2628276348114014, "logps/chosen": -390.17755126953125, "logps/rejected": -359.5980224609375, "loss": 0.1104, "rewards/accuracies": 1.0, "rewards/chosen": -2.8839187622070312, "rewards/margins": 3.8925013542175293, "rewards/rejected": -6.776420593261719, "step": 2384 }, { "epoch": 0.5, "learning_rate": 1.0063025210084034e-05, "logits/chosen": -1.9154771566390991, "logits/rejected": -2.020354747772217, "logps/chosen": -423.9452209472656, "logps/rejected": -391.2864074707031, "loss": 0.2739, "rewards/accuracies": 0.8125, "rewards/chosen": -2.87423038482666, "rewards/margins": 3.822540760040283, "rewards/rejected": -6.696771144866943, "step": 2385 }, { "epoch": 0.5, "learning_rate": 1.0058823529411766e-05, "logits/chosen": -2.248162269592285, "logits/rejected": -2.29250431060791, "logps/chosen": -445.37939453125, "logps/rejected": -330.8786315917969, "loss": 0.3405, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6439990997314453, "rewards/margins": 2.292757272720337, "rewards/rejected": -4.936756134033203, "step": 2386 }, { "epoch": 0.5, "learning_rate": 1.0054621848739496e-05, "logits/chosen": -2.0872318744659424, "logits/rejected": -1.9483592510223389, "logps/chosen": -286.3751525878906, "logps/rejected": -371.61944580078125, "loss": 0.5192, "rewards/accuracies": 0.75, "rewards/chosen": -3.1115846633911133, "rewards/margins": 2.079066276550293, "rewards/rejected": -5.190650939941406, "step": 2387 }, { "epoch": 0.5, "learning_rate": 1.0050420168067228e-05, "logits/chosen": -2.1926615238189697, "logits/rejected": -1.941779613494873, "logps/chosen": -453.847900390625, "logps/rejected": -412.57366943359375, "loss": 0.1812, "rewards/accuracies": 0.875, "rewards/chosen": -2.1723692417144775, "rewards/margins": 5.949557304382324, "rewards/rejected": -8.121927261352539, "step": 2388 }, { "epoch": 0.5, "learning_rate": 1.0046218487394958e-05, "logits/chosen": -2.1235480308532715, "logits/rejected": -2.3440585136413574, "logps/chosen": -293.1241455078125, "logps/rejected": -330.4990234375, "loss": 0.5469, "rewards/accuracies": 0.75, "rewards/chosen": -3.06968355178833, "rewards/margins": 3.635479211807251, "rewards/rejected": -6.705162048339844, "step": 2389 }, { "epoch": 0.5, "learning_rate": 1.004201680672269e-05, "logits/chosen": -2.2626047134399414, "logits/rejected": -2.227721691131592, "logps/chosen": -328.4931335449219, "logps/rejected": -292.5007629394531, "loss": 0.3164, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4760754108428955, "rewards/margins": 4.064693450927734, "rewards/rejected": -6.540769100189209, "step": 2390 }, { "epoch": 0.5, "learning_rate": 1.003781512605042e-05, "logits/chosen": -2.139137029647827, "logits/rejected": -1.7435845136642456, "logps/chosen": -321.35418701171875, "logps/rejected": -360.59027099609375, "loss": 0.4074, "rewards/accuracies": 0.875, "rewards/chosen": -3.110934257507324, "rewards/margins": 3.720695972442627, "rewards/rejected": -6.831630229949951, "step": 2391 }, { "epoch": 0.5, "learning_rate": 1.0033613445378152e-05, "logits/chosen": -2.0980777740478516, "logits/rejected": -1.829169750213623, "logps/chosen": -381.323486328125, "logps/rejected": -390.6148681640625, "loss": 0.3501, "rewards/accuracies": 0.8125, "rewards/chosen": -2.991558790206909, "rewards/margins": 4.191066265106201, "rewards/rejected": -7.182624816894531, "step": 2392 }, { "epoch": 0.5, "learning_rate": 1.0029411764705882e-05, "logits/chosen": -2.022017002105713, "logits/rejected": -2.1261677742004395, "logps/chosen": -255.502685546875, "logps/rejected": -324.99169921875, "loss": 0.4771, "rewards/accuracies": 0.875, "rewards/chosen": -3.4784703254699707, "rewards/margins": 4.7589263916015625, "rewards/rejected": -8.237397193908691, "step": 2393 }, { "epoch": 0.5, "learning_rate": 1.0025210084033614e-05, "logits/chosen": -2.0819475650787354, "logits/rejected": -2.2279903888702393, "logps/chosen": -365.35235595703125, "logps/rejected": -371.3768615722656, "loss": 0.4438, "rewards/accuracies": 0.875, "rewards/chosen": -2.9094648361206055, "rewards/margins": 4.8693647384643555, "rewards/rejected": -7.778829574584961, "step": 2394 }, { "epoch": 0.5, "learning_rate": 1.0021008403361345e-05, "logits/chosen": -1.7972275018692017, "logits/rejected": -1.6436253786087036, "logps/chosen": -291.5543212890625, "logps/rejected": -303.94696044921875, "loss": 0.5153, "rewards/accuracies": 0.8125, "rewards/chosen": -2.981525421142578, "rewards/margins": 3.397333860397339, "rewards/rejected": -6.378859519958496, "step": 2395 }, { "epoch": 0.5, "learning_rate": 1.0016806722689077e-05, "logits/chosen": -1.8907406330108643, "logits/rejected": -2.097783088684082, "logps/chosen": -311.48687744140625, "logps/rejected": -380.88336181640625, "loss": 0.516, "rewards/accuracies": 0.6875, "rewards/chosen": -2.7880444526672363, "rewards/margins": 2.915473699569702, "rewards/rejected": -5.703518390655518, "step": 2396 }, { "epoch": 0.5, "learning_rate": 1.0012605042016807e-05, "logits/chosen": -2.2271320819854736, "logits/rejected": -1.7776563167572021, "logps/chosen": -403.1595153808594, "logps/rejected": -340.1800842285156, "loss": 0.143, "rewards/accuracies": 0.9375, "rewards/chosen": -2.421895742416382, "rewards/margins": 4.355343818664551, "rewards/rejected": -6.777239799499512, "step": 2397 }, { "epoch": 0.5, "learning_rate": 1.0008403361344539e-05, "logits/chosen": -2.1426753997802734, "logits/rejected": -2.140594005584717, "logps/chosen": -326.5360412597656, "logps/rejected": -398.43597412109375, "loss": 0.2813, "rewards/accuracies": 0.8125, "rewards/chosen": -2.651247024536133, "rewards/margins": 4.19482421875, "rewards/rejected": -6.846071720123291, "step": 2398 }, { "epoch": 0.5, "learning_rate": 1.0004201680672269e-05, "logits/chosen": -2.3763651847839355, "logits/rejected": -2.3359007835388184, "logps/chosen": -330.80255126953125, "logps/rejected": -355.82623291015625, "loss": 0.1755, "rewards/accuracies": 0.875, "rewards/chosen": -2.269780158996582, "rewards/margins": 5.421090126037598, "rewards/rejected": -7.69087028503418, "step": 2399 }, { "epoch": 0.5, "learning_rate": 1e-05, "logits/chosen": -2.2974443435668945, "logits/rejected": -2.065579414367676, "logps/chosen": -308.79461669921875, "logps/rejected": -320.31781005859375, "loss": 0.3388, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2970945835113525, "rewards/margins": 4.837871074676514, "rewards/rejected": -7.134965896606445, "step": 2400 }, { "epoch": 0.5, "learning_rate": 9.995798319327733e-06, "logits/chosen": -1.9178187847137451, "logits/rejected": -2.0462570190429688, "logps/chosen": -279.6192626953125, "logps/rejected": -320.6001281738281, "loss": 0.4874, "rewards/accuracies": 0.75, "rewards/chosen": -2.510626792907715, "rewards/margins": 4.299708366394043, "rewards/rejected": -6.8103346824646, "step": 2401 }, { "epoch": 0.5, "learning_rate": 9.991596638655463e-06, "logits/chosen": -2.2906880378723145, "logits/rejected": -2.1577656269073486, "logps/chosen": -263.5411376953125, "logps/rejected": -277.16473388671875, "loss": 0.3356, "rewards/accuracies": 0.8125, "rewards/chosen": -2.759248971939087, "rewards/margins": 2.738969564437866, "rewards/rejected": -5.498218536376953, "step": 2402 }, { "epoch": 0.5, "learning_rate": 9.987394957983195e-06, "logits/chosen": -1.7807637453079224, "logits/rejected": -1.669394612312317, "logps/chosen": -473.79345703125, "logps/rejected": -479.2176208496094, "loss": 0.3746, "rewards/accuracies": 0.75, "rewards/chosen": -2.7221126556396484, "rewards/margins": 3.74810791015625, "rewards/rejected": -6.470220565795898, "step": 2403 }, { "epoch": 0.5, "learning_rate": 9.983193277310925e-06, "logits/chosen": -2.2767786979675293, "logits/rejected": -1.8739500045776367, "logps/chosen": -317.3830871582031, "logps/rejected": -330.66082763671875, "loss": 0.2303, "rewards/accuracies": 0.875, "rewards/chosen": -2.860841989517212, "rewards/margins": 3.7483394145965576, "rewards/rejected": -6.6091814041137695, "step": 2404 }, { "epoch": 0.5, "learning_rate": 9.978991596638657e-06, "logits/chosen": -2.418147087097168, "logits/rejected": -1.9186406135559082, "logps/chosen": -273.658447265625, "logps/rejected": -317.94830322265625, "loss": 0.3404, "rewards/accuracies": 0.8125, "rewards/chosen": -3.732921600341797, "rewards/margins": 4.473215579986572, "rewards/rejected": -8.206136703491211, "step": 2405 }, { "epoch": 0.5, "learning_rate": 9.974789915966387e-06, "logits/chosen": -2.348971366882324, "logits/rejected": -2.004465103149414, "logps/chosen": -362.5961608886719, "logps/rejected": -457.1365051269531, "loss": 0.3792, "rewards/accuracies": 0.875, "rewards/chosen": -2.5905508995056152, "rewards/margins": 3.9804835319519043, "rewards/rejected": -6.5710344314575195, "step": 2406 }, { "epoch": 0.5, "learning_rate": 9.970588235294119e-06, "logits/chosen": -2.070108413696289, "logits/rejected": -1.8491915464401245, "logps/chosen": -252.86036682128906, "logps/rejected": -273.4906005859375, "loss": 0.3498, "rewards/accuracies": 0.8125, "rewards/chosen": -2.429840564727783, "rewards/margins": 3.3223671913146973, "rewards/rejected": -5.752207279205322, "step": 2407 }, { "epoch": 0.5, "learning_rate": 9.96638655462185e-06, "logits/chosen": -2.3902580738067627, "logits/rejected": -2.2644615173339844, "logps/chosen": -339.4729309082031, "logps/rejected": -313.0079040527344, "loss": 0.2017, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4026505947113037, "rewards/margins": 3.845741033554077, "rewards/rejected": -6.248391151428223, "step": 2408 }, { "epoch": 0.5, "learning_rate": 9.962184873949581e-06, "logits/chosen": -2.1466519832611084, "logits/rejected": -2.0018019676208496, "logps/chosen": -330.3734130859375, "logps/rejected": -274.92901611328125, "loss": 0.2013, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0059640407562256, "rewards/margins": 2.92734694480896, "rewards/rejected": -4.933310508728027, "step": 2409 }, { "epoch": 0.5, "learning_rate": 9.957983193277312e-06, "logits/chosen": -1.5954811573028564, "logits/rejected": -1.7179388999938965, "logps/chosen": -322.6182861328125, "logps/rejected": -384.58233642578125, "loss": 0.1033, "rewards/accuracies": 1.0, "rewards/chosen": -2.76421856880188, "rewards/margins": 4.421438217163086, "rewards/rejected": -7.185657024383545, "step": 2410 }, { "epoch": 0.5, "learning_rate": 9.953781512605043e-06, "logits/chosen": -1.8747562170028687, "logits/rejected": -1.8973026275634766, "logps/chosen": -237.95907592773438, "logps/rejected": -290.9625549316406, "loss": 0.6665, "rewards/accuracies": 0.6875, "rewards/chosen": -3.492603302001953, "rewards/margins": 1.7184562683105469, "rewards/rejected": -5.211059093475342, "step": 2411 }, { "epoch": 0.5, "learning_rate": 9.949579831932774e-06, "logits/chosen": -2.222580909729004, "logits/rejected": -1.9016811847686768, "logps/chosen": -459.78436279296875, "logps/rejected": -373.4814758300781, "loss": 0.0571, "rewards/accuracies": 1.0, "rewards/chosen": -2.050839900970459, "rewards/margins": 6.049892425537109, "rewards/rejected": -8.100732803344727, "step": 2412 }, { "epoch": 0.5, "learning_rate": 9.945378151260506e-06, "logits/chosen": -2.470627784729004, "logits/rejected": -2.0995800495147705, "logps/chosen": -447.10845947265625, "logps/rejected": -454.21258544921875, "loss": 0.2402, "rewards/accuracies": 0.875, "rewards/chosen": -2.3339078426361084, "rewards/margins": 4.221175193786621, "rewards/rejected": -6.555083274841309, "step": 2413 }, { "epoch": 0.51, "learning_rate": 9.941176470588236e-06, "logits/chosen": -2.3919808864593506, "logits/rejected": -2.0468430519104004, "logps/chosen": -371.371826171875, "logps/rejected": -364.661376953125, "loss": 0.313, "rewards/accuracies": 0.75, "rewards/chosen": -1.9488651752471924, "rewards/margins": 2.873868227005005, "rewards/rejected": -4.822733402252197, "step": 2414 }, { "epoch": 0.51, "learning_rate": 9.936974789915968e-06, "logits/chosen": -2.2457568645477295, "logits/rejected": -2.1089138984680176, "logps/chosen": -354.2779235839844, "logps/rejected": -371.3050537109375, "loss": 0.5444, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6733100414276123, "rewards/margins": 3.0175583362579346, "rewards/rejected": -5.690868377685547, "step": 2415 }, { "epoch": 0.51, "learning_rate": 9.932773109243698e-06, "logits/chosen": -2.256075382232666, "logits/rejected": -2.0453333854675293, "logps/chosen": -458.46246337890625, "logps/rejected": -434.7677307128906, "loss": 0.1436, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0981826782226562, "rewards/margins": 4.070631504058838, "rewards/rejected": -6.168813705444336, "step": 2416 }, { "epoch": 0.51, "learning_rate": 9.92857142857143e-06, "logits/chosen": -1.832474946975708, "logits/rejected": -1.751023292541504, "logps/chosen": -319.69476318359375, "logps/rejected": -306.7323913574219, "loss": 0.2366, "rewards/accuracies": 0.8125, "rewards/chosen": -2.0703225135803223, "rewards/margins": 4.127175807952881, "rewards/rejected": -6.197497844696045, "step": 2417 }, { "epoch": 0.51, "learning_rate": 9.92436974789916e-06, "logits/chosen": -2.2268459796905518, "logits/rejected": -1.9217164516448975, "logps/chosen": -467.34344482421875, "logps/rejected": -364.5041809082031, "loss": 0.296, "rewards/accuracies": 0.875, "rewards/chosen": -2.536264657974243, "rewards/margins": 4.76158332824707, "rewards/rejected": -7.297848224639893, "step": 2418 }, { "epoch": 0.51, "learning_rate": 9.920168067226892e-06, "logits/chosen": -2.0816447734832764, "logits/rejected": -2.1499948501586914, "logps/chosen": -249.6778564453125, "logps/rejected": -323.81109619140625, "loss": 0.5068, "rewards/accuracies": 0.8125, "rewards/chosen": -3.7292394638061523, "rewards/margins": 3.4589426517486572, "rewards/rejected": -7.1881818771362305, "step": 2419 }, { "epoch": 0.51, "learning_rate": 9.915966386554622e-06, "logits/chosen": -2.0341103076934814, "logits/rejected": -2.121586799621582, "logps/chosen": -341.7618408203125, "logps/rejected": -384.7890625, "loss": 0.1978, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6588406562805176, "rewards/margins": 4.138150691986084, "rewards/rejected": -6.796991348266602, "step": 2420 }, { "epoch": 0.51, "learning_rate": 9.911764705882354e-06, "logits/chosen": -2.099214792251587, "logits/rejected": -1.7684375047683716, "logps/chosen": -242.392822265625, "logps/rejected": -238.7559051513672, "loss": 0.4174, "rewards/accuracies": 0.75, "rewards/chosen": -3.2388548851013184, "rewards/margins": 2.9281370639801025, "rewards/rejected": -6.1669921875, "step": 2421 }, { "epoch": 0.51, "learning_rate": 9.907563025210084e-06, "logits/chosen": -2.173182964324951, "logits/rejected": -1.9689698219299316, "logps/chosen": -383.5847473144531, "logps/rejected": -328.8962707519531, "loss": 0.4517, "rewards/accuracies": 0.75, "rewards/chosen": -2.834446907043457, "rewards/margins": 3.36008358001709, "rewards/rejected": -6.194530487060547, "step": 2422 }, { "epoch": 0.51, "learning_rate": 9.903361344537816e-06, "logits/chosen": -2.096619129180908, "logits/rejected": -2.059051036834717, "logps/chosen": -283.3281555175781, "logps/rejected": -340.39385986328125, "loss": 0.3868, "rewards/accuracies": 0.9375, "rewards/chosen": -3.169192314147949, "rewards/margins": 3.4856536388397217, "rewards/rejected": -6.654845714569092, "step": 2423 }, { "epoch": 0.51, "learning_rate": 9.899159663865548e-06, "logits/chosen": -2.155031681060791, "logits/rejected": -1.4631612300872803, "logps/chosen": -359.2854309082031, "logps/rejected": -303.6133728027344, "loss": 0.5978, "rewards/accuracies": 0.75, "rewards/chosen": -3.564146041870117, "rewards/margins": 1.9146242141723633, "rewards/rejected": -5.4787702560424805, "step": 2424 }, { "epoch": 0.51, "learning_rate": 9.894957983193278e-06, "logits/chosen": -2.281752824783325, "logits/rejected": -2.141045093536377, "logps/chosen": -218.8125762939453, "logps/rejected": -289.11956787109375, "loss": 0.221, "rewards/accuracies": 0.875, "rewards/chosen": -3.1416196823120117, "rewards/margins": 4.813127517700195, "rewards/rejected": -7.954747200012207, "step": 2425 }, { "epoch": 0.51, "learning_rate": 9.89075630252101e-06, "logits/chosen": -1.8661407232284546, "logits/rejected": -1.8895190954208374, "logps/chosen": -270.0294494628906, "logps/rejected": -388.3438720703125, "loss": 0.4309, "rewards/accuracies": 0.75, "rewards/chosen": -2.1605801582336426, "rewards/margins": 5.889432907104492, "rewards/rejected": -8.050013542175293, "step": 2426 }, { "epoch": 0.51, "learning_rate": 9.88655462184874e-06, "logits/chosen": -2.2457845211029053, "logits/rejected": -1.9798784255981445, "logps/chosen": -413.43975830078125, "logps/rejected": -605.51611328125, "loss": 0.101, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1516494750976562, "rewards/margins": 4.583868026733398, "rewards/rejected": -6.735517501831055, "step": 2427 }, { "epoch": 0.51, "learning_rate": 9.882352941176472e-06, "logits/chosen": -2.276337146759033, "logits/rejected": -2.0764236450195312, "logps/chosen": -362.8184814453125, "logps/rejected": -392.36920166015625, "loss": 0.483, "rewards/accuracies": 0.875, "rewards/chosen": -2.7045369148254395, "rewards/margins": 2.9930124282836914, "rewards/rejected": -5.697549819946289, "step": 2428 }, { "epoch": 0.51, "learning_rate": 9.878151260504203e-06, "logits/chosen": -2.273655414581299, "logits/rejected": -2.3905067443847656, "logps/chosen": -328.6600646972656, "logps/rejected": -447.10546875, "loss": 0.3746, "rewards/accuracies": 0.8125, "rewards/chosen": -1.7527495622634888, "rewards/margins": 3.2043819427490234, "rewards/rejected": -4.957131385803223, "step": 2429 }, { "epoch": 0.51, "learning_rate": 9.873949579831935e-06, "logits/chosen": -2.25905442237854, "logits/rejected": -1.72170889377594, "logps/chosen": -392.6197509765625, "logps/rejected": -355.248291015625, "loss": 0.2448, "rewards/accuracies": 0.875, "rewards/chosen": -2.880793571472168, "rewards/margins": 4.852039337158203, "rewards/rejected": -7.732832908630371, "step": 2430 }, { "epoch": 0.51, "learning_rate": 9.869747899159665e-06, "logits/chosen": -2.402531623840332, "logits/rejected": -2.236377716064453, "logps/chosen": -266.3785400390625, "logps/rejected": -251.65174865722656, "loss": 0.1707, "rewards/accuracies": 0.875, "rewards/chosen": -3.1692585945129395, "rewards/margins": 3.8515825271606445, "rewards/rejected": -7.020841598510742, "step": 2431 }, { "epoch": 0.51, "learning_rate": 9.865546218487397e-06, "logits/chosen": -2.138195753097534, "logits/rejected": -1.9108774662017822, "logps/chosen": -423.58221435546875, "logps/rejected": -396.9621276855469, "loss": 0.352, "rewards/accuracies": 0.75, "rewards/chosen": -2.975484848022461, "rewards/margins": 4.312573432922363, "rewards/rejected": -7.288058280944824, "step": 2432 }, { "epoch": 0.51, "learning_rate": 9.861344537815127e-06, "logits/chosen": -2.290330648422241, "logits/rejected": -2.1706066131591797, "logps/chosen": -307.01837158203125, "logps/rejected": -342.9692687988281, "loss": 0.5941, "rewards/accuracies": 0.875, "rewards/chosen": -3.3528783321380615, "rewards/margins": 2.6740097999572754, "rewards/rejected": -6.026887893676758, "step": 2433 }, { "epoch": 0.51, "learning_rate": 9.857142857142859e-06, "logits/chosen": -2.1459391117095947, "logits/rejected": -1.9871735572814941, "logps/chosen": -356.656982421875, "logps/rejected": -378.06744384765625, "loss": 0.2507, "rewards/accuracies": 0.875, "rewards/chosen": -2.0185086727142334, "rewards/margins": 3.6278114318847656, "rewards/rejected": -5.646320343017578, "step": 2434 }, { "epoch": 0.51, "learning_rate": 9.852941176470589e-06, "logits/chosen": -2.2394466400146484, "logits/rejected": -2.0912163257598877, "logps/chosen": -408.5823059082031, "logps/rejected": -428.4944763183594, "loss": 0.3981, "rewards/accuracies": 0.8125, "rewards/chosen": -3.22701096534729, "rewards/margins": 2.8428955078125, "rewards/rejected": -6.069906234741211, "step": 2435 }, { "epoch": 0.51, "learning_rate": 9.848739495798321e-06, "logits/chosen": -2.0506677627563477, "logits/rejected": -2.067673683166504, "logps/chosen": -326.36077880859375, "logps/rejected": -365.61651611328125, "loss": 0.7727, "rewards/accuracies": 0.625, "rewards/chosen": -3.611337184906006, "rewards/margins": 3.0270886421203613, "rewards/rejected": -6.638426303863525, "step": 2436 }, { "epoch": 0.51, "learning_rate": 9.844537815126051e-06, "logits/chosen": -1.8925117254257202, "logits/rejected": -2.0888848304748535, "logps/chosen": -271.73089599609375, "logps/rejected": -324.39678955078125, "loss": 0.3358, "rewards/accuracies": 0.875, "rewards/chosen": -2.850221633911133, "rewards/margins": 4.2946953773498535, "rewards/rejected": -7.1449174880981445, "step": 2437 }, { "epoch": 0.51, "learning_rate": 9.840336134453781e-06, "logits/chosen": -1.892627239227295, "logits/rejected": -1.7988214492797852, "logps/chosen": -171.96270751953125, "logps/rejected": -243.96498107910156, "loss": 0.3838, "rewards/accuracies": 0.8125, "rewards/chosen": -2.9922971725463867, "rewards/margins": 3.47005033493042, "rewards/rejected": -6.462347507476807, "step": 2438 }, { "epoch": 0.51, "learning_rate": 9.836134453781513e-06, "logits/chosen": -2.303834915161133, "logits/rejected": -1.8925604820251465, "logps/chosen": -377.1575012207031, "logps/rejected": -363.5316162109375, "loss": 0.8032, "rewards/accuracies": 0.75, "rewards/chosen": -3.1350483894348145, "rewards/margins": 2.6547577381134033, "rewards/rejected": -5.789806365966797, "step": 2439 }, { "epoch": 0.51, "learning_rate": 9.831932773109244e-06, "logits/chosen": -1.9305191040039062, "logits/rejected": -1.6555087566375732, "logps/chosen": -329.98370361328125, "logps/rejected": -361.0148620605469, "loss": 0.6618, "rewards/accuracies": 0.75, "rewards/chosen": -2.3163788318634033, "rewards/margins": 4.039077281951904, "rewards/rejected": -6.355456352233887, "step": 2440 }, { "epoch": 0.51, "learning_rate": 9.827731092436975e-06, "logits/chosen": -2.544266700744629, "logits/rejected": -2.2024903297424316, "logps/chosen": -339.9169616699219, "logps/rejected": -298.70294189453125, "loss": 0.4582, "rewards/accuracies": 0.8125, "rewards/chosen": -3.0411252975463867, "rewards/margins": 2.788961887359619, "rewards/rejected": -5.830087184906006, "step": 2441 }, { "epoch": 0.51, "learning_rate": 9.823529411764706e-06, "logits/chosen": -2.1996612548828125, "logits/rejected": -1.7889305353164673, "logps/chosen": -346.45660400390625, "logps/rejected": -335.2572021484375, "loss": 0.143, "rewards/accuracies": 1.0, "rewards/chosen": -2.0869550704956055, "rewards/margins": 6.196144104003906, "rewards/rejected": -8.283100128173828, "step": 2442 }, { "epoch": 0.51, "learning_rate": 9.819327731092438e-06, "logits/chosen": -2.2793169021606445, "logits/rejected": -2.0673391819000244, "logps/chosen": -366.3721008300781, "logps/rejected": -389.57647705078125, "loss": 0.3627, "rewards/accuracies": 0.75, "rewards/chosen": -2.7804150581359863, "rewards/margins": 4.010008811950684, "rewards/rejected": -6.790424346923828, "step": 2443 }, { "epoch": 0.51, "learning_rate": 9.815126050420168e-06, "logits/chosen": -2.2194337844848633, "logits/rejected": -1.9418132305145264, "logps/chosen": -289.8243408203125, "logps/rejected": -254.2372283935547, "loss": 0.5406, "rewards/accuracies": 0.8125, "rewards/chosen": -3.4012985229492188, "rewards/margins": 2.4547157287597656, "rewards/rejected": -5.856014251708984, "step": 2444 }, { "epoch": 0.51, "learning_rate": 9.8109243697479e-06, "logits/chosen": -2.2629475593566895, "logits/rejected": -1.9769794940948486, "logps/chosen": -294.1553039550781, "logps/rejected": -311.6893615722656, "loss": 0.2439, "rewards/accuracies": 0.9375, "rewards/chosen": -2.181826114654541, "rewards/margins": 3.803929328918457, "rewards/rejected": -5.985755920410156, "step": 2445 }, { "epoch": 0.51, "learning_rate": 9.80672268907563e-06, "logits/chosen": -2.369563102722168, "logits/rejected": -2.0142669677734375, "logps/chosen": -324.5804443359375, "logps/rejected": -452.1680908203125, "loss": 0.268, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2360963821411133, "rewards/margins": 3.114483118057251, "rewards/rejected": -6.350579738616943, "step": 2446 }, { "epoch": 0.51, "learning_rate": 9.802521008403362e-06, "logits/chosen": -1.959319829940796, "logits/rejected": -2.3647871017456055, "logps/chosen": -275.794677734375, "logps/rejected": -394.02490234375, "loss": 0.4332, "rewards/accuracies": 0.8125, "rewards/chosen": -2.740354061126709, "rewards/margins": 3.1907262802124023, "rewards/rejected": -5.9310808181762695, "step": 2447 }, { "epoch": 0.51, "learning_rate": 9.798319327731092e-06, "logits/chosen": -2.1668601036071777, "logits/rejected": -2.0067825317382812, "logps/chosen": -332.92303466796875, "logps/rejected": -349.88104248046875, "loss": 0.2853, "rewards/accuracies": 0.8125, "rewards/chosen": -2.146078109741211, "rewards/margins": 4.368680000305176, "rewards/rejected": -6.514758110046387, "step": 2448 }, { "epoch": 0.51, "learning_rate": 9.794117647058824e-06, "logits/chosen": -2.097109079360962, "logits/rejected": -1.8445606231689453, "logps/chosen": -287.12579345703125, "logps/rejected": -224.05560302734375, "loss": 0.3181, "rewards/accuracies": 0.75, "rewards/chosen": -3.3764634132385254, "rewards/margins": 3.5817036628723145, "rewards/rejected": -6.958167552947998, "step": 2449 }, { "epoch": 0.51, "learning_rate": 9.789915966386554e-06, "logits/chosen": -1.8995410203933716, "logits/rejected": -1.6100553274154663, "logps/chosen": -328.62054443359375, "logps/rejected": -350.05615234375, "loss": 0.3648, "rewards/accuracies": 0.75, "rewards/chosen": -2.5372843742370605, "rewards/margins": 2.7280852794647217, "rewards/rejected": -5.265369415283203, "step": 2450 }, { "epoch": 0.51, "learning_rate": 9.785714285714286e-06, "logits/chosen": -1.7688593864440918, "logits/rejected": -1.6556869745254517, "logps/chosen": -329.97637939453125, "logps/rejected": -363.11309814453125, "loss": 0.4768, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1256723403930664, "rewards/margins": 2.586132764816284, "rewards/rejected": -5.711804389953613, "step": 2451 }, { "epoch": 0.51, "learning_rate": 9.781512605042018e-06, "logits/chosen": -2.0984439849853516, "logits/rejected": -2.225480079650879, "logps/chosen": -303.2366943359375, "logps/rejected": -337.7622985839844, "loss": 0.3028, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2887487411499023, "rewards/margins": 3.055553913116455, "rewards/rejected": -6.344302654266357, "step": 2452 }, { "epoch": 0.51, "learning_rate": 9.777310924369748e-06, "logits/chosen": -2.042961597442627, "logits/rejected": -2.081300735473633, "logps/chosen": -350.177001953125, "logps/rejected": -369.05224609375, "loss": 0.221, "rewards/accuracies": 0.875, "rewards/chosen": -2.5011491775512695, "rewards/margins": 3.2595925331115723, "rewards/rejected": -5.760741233825684, "step": 2453 }, { "epoch": 0.51, "learning_rate": 9.77310924369748e-06, "logits/chosen": -2.1722545623779297, "logits/rejected": -1.8652329444885254, "logps/chosen": -246.10723876953125, "logps/rejected": -290.38055419921875, "loss": 0.2364, "rewards/accuracies": 0.875, "rewards/chosen": -2.429633617401123, "rewards/margins": 4.2516632080078125, "rewards/rejected": -6.6812968254089355, "step": 2454 }, { "epoch": 0.51, "learning_rate": 9.76890756302521e-06, "logits/chosen": -2.1108481884002686, "logits/rejected": -1.9407432079315186, "logps/chosen": -301.5434265136719, "logps/rejected": -341.26556396484375, "loss": 0.2757, "rewards/accuracies": 0.875, "rewards/chosen": -2.6759743690490723, "rewards/margins": 3.18408465385437, "rewards/rejected": -5.860058784484863, "step": 2455 }, { "epoch": 0.51, "learning_rate": 9.764705882352942e-06, "logits/chosen": -1.7556816339492798, "logits/rejected": -1.8322665691375732, "logps/chosen": -225.53726196289062, "logps/rejected": -284.1981201171875, "loss": 0.3382, "rewards/accuracies": 0.75, "rewards/chosen": -3.129570245742798, "rewards/margins": 3.1095287799835205, "rewards/rejected": -6.239099502563477, "step": 2456 }, { "epoch": 0.51, "learning_rate": 9.760504201680673e-06, "logits/chosen": -2.4053800106048584, "logits/rejected": -1.751323938369751, "logps/chosen": -290.16680908203125, "logps/rejected": -318.2878112792969, "loss": 0.1977, "rewards/accuracies": 0.875, "rewards/chosen": -2.7551960945129395, "rewards/margins": 3.770480155944824, "rewards/rejected": -6.525676250457764, "step": 2457 }, { "epoch": 0.51, "learning_rate": 9.756302521008404e-06, "logits/chosen": -2.2911484241485596, "logits/rejected": -2.147085666656494, "logps/chosen": -267.47674560546875, "logps/rejected": -262.7044982910156, "loss": 0.2244, "rewards/accuracies": 0.875, "rewards/chosen": -1.8555421829223633, "rewards/margins": 3.6998133659362793, "rewards/rejected": -5.555356025695801, "step": 2458 }, { "epoch": 0.51, "learning_rate": 9.752100840336135e-06, "logits/chosen": -1.7450745105743408, "logits/rejected": -1.8867847919464111, "logps/chosen": -265.0449523925781, "logps/rejected": -268.73773193359375, "loss": 0.2069, "rewards/accuracies": 0.875, "rewards/chosen": -3.2022788524627686, "rewards/margins": 3.4072115421295166, "rewards/rejected": -6.609490394592285, "step": 2459 }, { "epoch": 0.51, "learning_rate": 9.747899159663867e-06, "logits/chosen": -2.0996358394622803, "logits/rejected": -2.2437381744384766, "logps/chosen": -238.03005981445312, "logps/rejected": -359.89202880859375, "loss": 0.1595, "rewards/accuracies": 1.0, "rewards/chosen": -1.8500618934631348, "rewards/margins": 3.956296443939209, "rewards/rejected": -5.806358337402344, "step": 2460 }, { "epoch": 0.51, "learning_rate": 9.743697478991597e-06, "logits/chosen": -2.2528650760650635, "logits/rejected": -2.020031452178955, "logps/chosen": -397.2727966308594, "logps/rejected": -303.8723449707031, "loss": 0.2884, "rewards/accuracies": 0.875, "rewards/chosen": -1.974136233329773, "rewards/margins": 3.6831774711608887, "rewards/rejected": -5.657313823699951, "step": 2461 }, { "epoch": 0.52, "learning_rate": 9.739495798319329e-06, "logits/chosen": -2.1542177200317383, "logits/rejected": -2.0130393505096436, "logps/chosen": -416.8458251953125, "logps/rejected": -399.8071594238281, "loss": 0.2241, "rewards/accuracies": 0.875, "rewards/chosen": -2.472114086151123, "rewards/margins": 3.7513413429260254, "rewards/rejected": -6.223455905914307, "step": 2462 }, { "epoch": 0.52, "learning_rate": 9.735294117647059e-06, "logits/chosen": -2.1159396171569824, "logits/rejected": -2.096940279006958, "logps/chosen": -392.9292907714844, "logps/rejected": -406.60382080078125, "loss": 0.1088, "rewards/accuracies": 1.0, "rewards/chosen": -2.2731056213378906, "rewards/margins": 5.1493120193481445, "rewards/rejected": -7.422417640686035, "step": 2463 }, { "epoch": 0.52, "learning_rate": 9.731092436974791e-06, "logits/chosen": -2.410989284515381, "logits/rejected": -2.2994556427001953, "logps/chosen": -355.16552734375, "logps/rejected": -338.94647216796875, "loss": 0.3553, "rewards/accuracies": 0.6875, "rewards/chosen": -3.0237691402435303, "rewards/margins": 3.965956211090088, "rewards/rejected": -6.989725112915039, "step": 2464 }, { "epoch": 0.52, "learning_rate": 9.726890756302521e-06, "logits/chosen": -2.0583953857421875, "logits/rejected": -1.7986162900924683, "logps/chosen": -287.96136474609375, "logps/rejected": -308.815185546875, "loss": 0.1745, "rewards/accuracies": 0.9375, "rewards/chosen": -2.40604829788208, "rewards/margins": 4.625705718994141, "rewards/rejected": -7.031754016876221, "step": 2465 }, { "epoch": 0.52, "learning_rate": 9.722689075630253e-06, "logits/chosen": -2.1780307292938232, "logits/rejected": -2.0415542125701904, "logps/chosen": -256.6803894042969, "logps/rejected": -253.82778930664062, "loss": 0.3768, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1402909755706787, "rewards/margins": 3.306736946105957, "rewards/rejected": -6.447027206420898, "step": 2466 }, { "epoch": 0.52, "learning_rate": 9.718487394957983e-06, "logits/chosen": -2.262104034423828, "logits/rejected": -2.003505229949951, "logps/chosen": -424.62738037109375, "logps/rejected": -354.9641418457031, "loss": 0.8189, "rewards/accuracies": 0.625, "rewards/chosen": -3.7076375484466553, "rewards/margins": 1.9948680400848389, "rewards/rejected": -5.702506065368652, "step": 2467 }, { "epoch": 0.52, "learning_rate": 9.714285714285715e-06, "logits/chosen": -2.070359468460083, "logits/rejected": -2.31072735786438, "logps/chosen": -263.1658935546875, "logps/rejected": -297.6817321777344, "loss": 0.251, "rewards/accuracies": 0.9375, "rewards/chosen": -2.321218729019165, "rewards/margins": 5.258805751800537, "rewards/rejected": -7.580024719238281, "step": 2468 }, { "epoch": 0.52, "learning_rate": 9.710084033613445e-06, "logits/chosen": -2.224356174468994, "logits/rejected": -1.8325464725494385, "logps/chosen": -322.0057678222656, "logps/rejected": -346.1938171386719, "loss": 0.161, "rewards/accuracies": 0.9375, "rewards/chosen": -2.505855083465576, "rewards/margins": 4.101947784423828, "rewards/rejected": -6.6078033447265625, "step": 2469 }, { "epoch": 0.52, "learning_rate": 9.705882352941177e-06, "logits/chosen": -2.0635294914245605, "logits/rejected": -2.2067527770996094, "logps/chosen": -216.92633056640625, "logps/rejected": -264.6098937988281, "loss": 0.2893, "rewards/accuracies": 0.875, "rewards/chosen": -2.2246415615081787, "rewards/margins": 3.6159889698028564, "rewards/rejected": -5.840631008148193, "step": 2470 }, { "epoch": 0.52, "learning_rate": 9.701680672268908e-06, "logits/chosen": -2.1970443725585938, "logits/rejected": -1.5998048782348633, "logps/chosen": -349.5067138671875, "logps/rejected": -348.56768798828125, "loss": 0.2665, "rewards/accuracies": 0.875, "rewards/chosen": -2.670179843902588, "rewards/margins": 3.6221532821655273, "rewards/rejected": -6.292333126068115, "step": 2471 }, { "epoch": 0.52, "learning_rate": 9.69747899159664e-06, "logits/chosen": -2.314962863922119, "logits/rejected": -1.5359362363815308, "logps/chosen": -375.8669128417969, "logps/rejected": -272.38360595703125, "loss": 0.2938, "rewards/accuracies": 0.8125, "rewards/chosen": -2.750781774520874, "rewards/margins": 3.455026626586914, "rewards/rejected": -6.205808639526367, "step": 2472 }, { "epoch": 0.52, "learning_rate": 9.693277310924371e-06, "logits/chosen": -1.9978368282318115, "logits/rejected": -1.9219955205917358, "logps/chosen": -291.6861877441406, "logps/rejected": -295.47235107421875, "loss": 0.5612, "rewards/accuracies": 0.875, "rewards/chosen": -2.6381125450134277, "rewards/margins": 2.6202173233032227, "rewards/rejected": -5.25832986831665, "step": 2473 }, { "epoch": 0.52, "learning_rate": 9.689075630252102e-06, "logits/chosen": -1.9236409664154053, "logits/rejected": -1.6505722999572754, "logps/chosen": -319.92327880859375, "logps/rejected": -256.6158142089844, "loss": 0.3467, "rewards/accuracies": 0.875, "rewards/chosen": -2.785343885421753, "rewards/margins": 3.375783920288086, "rewards/rejected": -6.161128044128418, "step": 2474 }, { "epoch": 0.52, "learning_rate": 9.684873949579834e-06, "logits/chosen": -2.1630935668945312, "logits/rejected": -1.744858980178833, "logps/chosen": -374.8528747558594, "logps/rejected": -353.58740234375, "loss": 0.6678, "rewards/accuracies": 0.75, "rewards/chosen": -3.1705503463745117, "rewards/margins": 3.0533413887023926, "rewards/rejected": -6.223891258239746, "step": 2475 }, { "epoch": 0.52, "learning_rate": 9.680672268907564e-06, "logits/chosen": -1.9848287105560303, "logits/rejected": -1.430161476135254, "logps/chosen": -381.407958984375, "logps/rejected": -290.5611267089844, "loss": 0.1705, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1242594718933105, "rewards/margins": 3.6427717208862305, "rewards/rejected": -6.767031669616699, "step": 2476 }, { "epoch": 0.52, "learning_rate": 9.676470588235296e-06, "logits/chosen": -2.0961620807647705, "logits/rejected": -1.940819501876831, "logps/chosen": -348.21270751953125, "logps/rejected": -337.52410888671875, "loss": 0.3089, "rewards/accuracies": 0.875, "rewards/chosen": -2.33778715133667, "rewards/margins": 4.036999225616455, "rewards/rejected": -6.374786376953125, "step": 2477 }, { "epoch": 0.52, "learning_rate": 9.672268907563026e-06, "logits/chosen": -2.1644392013549805, "logits/rejected": -1.6128807067871094, "logps/chosen": -461.83074951171875, "logps/rejected": -383.1968994140625, "loss": 0.245, "rewards/accuracies": 1.0, "rewards/chosen": -2.3617360591888428, "rewards/margins": 2.6947131156921387, "rewards/rejected": -5.056448936462402, "step": 2478 }, { "epoch": 0.52, "learning_rate": 9.668067226890758e-06, "logits/chosen": -1.943839430809021, "logits/rejected": -1.903981328010559, "logps/chosen": -378.90447998046875, "logps/rejected": -447.855224609375, "loss": 0.259, "rewards/accuracies": 0.875, "rewards/chosen": -2.8508312702178955, "rewards/margins": 3.9421370029449463, "rewards/rejected": -6.792968273162842, "step": 2479 }, { "epoch": 0.52, "learning_rate": 9.663865546218488e-06, "logits/chosen": -2.2937865257263184, "logits/rejected": -2.1340508460998535, "logps/chosen": -401.56903076171875, "logps/rejected": -331.7358093261719, "loss": 0.1174, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3423702716827393, "rewards/margins": 4.279393196105957, "rewards/rejected": -6.621763229370117, "step": 2480 }, { "epoch": 0.52, "learning_rate": 9.65966386554622e-06, "logits/chosen": -2.031053066253662, "logits/rejected": -1.5821595191955566, "logps/chosen": -347.1388854980469, "logps/rejected": -314.3232421875, "loss": 0.1265, "rewards/accuracies": 0.9375, "rewards/chosen": -2.601536750793457, "rewards/margins": 4.141270637512207, "rewards/rejected": -6.742807388305664, "step": 2481 }, { "epoch": 0.52, "learning_rate": 9.65546218487395e-06, "logits/chosen": -2.2985172271728516, "logits/rejected": -2.019052028656006, "logps/chosen": -431.980712890625, "logps/rejected": -340.51171875, "loss": 0.1518, "rewards/accuracies": 1.0, "rewards/chosen": -2.3010196685791016, "rewards/margins": 3.675325870513916, "rewards/rejected": -5.976345539093018, "step": 2482 }, { "epoch": 0.52, "learning_rate": 9.651260504201682e-06, "logits/chosen": -1.8462460041046143, "logits/rejected": -2.1538021564483643, "logps/chosen": -192.8182830810547, "logps/rejected": -307.1131591796875, "loss": 0.2123, "rewards/accuracies": 0.9375, "rewards/chosen": -2.861116886138916, "rewards/margins": 3.683250904083252, "rewards/rejected": -6.544367790222168, "step": 2483 }, { "epoch": 0.52, "learning_rate": 9.647058823529412e-06, "logits/chosen": -2.170926570892334, "logits/rejected": -2.0191023349761963, "logps/chosen": -426.21856689453125, "logps/rejected": -409.97418212890625, "loss": 0.6966, "rewards/accuracies": 0.75, "rewards/chosen": -3.1404061317443848, "rewards/margins": 2.4268946647644043, "rewards/rejected": -5.567300796508789, "step": 2484 }, { "epoch": 0.52, "learning_rate": 9.642857142857144e-06, "logits/chosen": -2.3636703491210938, "logits/rejected": -2.213817834854126, "logps/chosen": -505.3054504394531, "logps/rejected": -388.56536865234375, "loss": 0.2744, "rewards/accuracies": 0.8125, "rewards/chosen": -1.8712806701660156, "rewards/margins": 3.5582900047302246, "rewards/rejected": -5.429571151733398, "step": 2485 }, { "epoch": 0.52, "learning_rate": 9.638655462184874e-06, "logits/chosen": -2.345541000366211, "logits/rejected": -2.1253554821014404, "logps/chosen": -460.6683044433594, "logps/rejected": -458.9995422363281, "loss": 0.5745, "rewards/accuracies": 0.6875, "rewards/chosen": -3.807734251022339, "rewards/margins": 1.8625736236572266, "rewards/rejected": -5.670307636260986, "step": 2486 }, { "epoch": 0.52, "learning_rate": 9.634453781512606e-06, "logits/chosen": -2.132876396179199, "logits/rejected": -2.0411922931671143, "logps/chosen": -315.79840087890625, "logps/rejected": -297.36163330078125, "loss": 0.1376, "rewards/accuracies": 0.9375, "rewards/chosen": -1.6191868782043457, "rewards/margins": 3.9339449405670166, "rewards/rejected": -5.553132057189941, "step": 2487 }, { "epoch": 0.52, "learning_rate": 9.630252100840337e-06, "logits/chosen": -2.327385187149048, "logits/rejected": -1.8433102369308472, "logps/chosen": -357.0150146484375, "logps/rejected": -343.45794677734375, "loss": 0.2, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4687323570251465, "rewards/margins": 3.935394287109375, "rewards/rejected": -6.40412712097168, "step": 2488 }, { "epoch": 0.52, "learning_rate": 9.626050420168068e-06, "logits/chosen": -2.3436050415039062, "logits/rejected": -1.8886823654174805, "logps/chosen": -376.2164306640625, "logps/rejected": -400.1048583984375, "loss": 0.1636, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5092034339904785, "rewards/margins": 4.286349296569824, "rewards/rejected": -6.795552730560303, "step": 2489 }, { "epoch": 0.52, "learning_rate": 9.621848739495799e-06, "logits/chosen": -2.188086748123169, "logits/rejected": -1.9260979890823364, "logps/chosen": -425.3425598144531, "logps/rejected": -377.23675537109375, "loss": 0.4469, "rewards/accuracies": 0.8125, "rewards/chosen": -2.410393476486206, "rewards/margins": 3.28798508644104, "rewards/rejected": -5.698378562927246, "step": 2490 }, { "epoch": 0.52, "learning_rate": 9.61764705882353e-06, "logits/chosen": -2.1028683185577393, "logits/rejected": -1.5959678888320923, "logps/chosen": -350.1116027832031, "logps/rejected": -383.0577392578125, "loss": 0.2312, "rewards/accuracies": 0.875, "rewards/chosen": -2.581228733062744, "rewards/margins": 4.88942813873291, "rewards/rejected": -7.470656871795654, "step": 2491 }, { "epoch": 0.52, "learning_rate": 9.61344537815126e-06, "logits/chosen": -2.0289759635925293, "logits/rejected": -2.2293808460235596, "logps/chosen": -261.5367736816406, "logps/rejected": -360.9933776855469, "loss": 0.1301, "rewards/accuracies": 0.9375, "rewards/chosen": -1.9632079601287842, "rewards/margins": 5.21551513671875, "rewards/rejected": -7.178723335266113, "step": 2492 }, { "epoch": 0.52, "learning_rate": 9.609243697478993e-06, "logits/chosen": -2.3193390369415283, "logits/rejected": -2.1731221675872803, "logps/chosen": -246.1613311767578, "logps/rejected": -234.50965881347656, "loss": 0.4198, "rewards/accuracies": 0.6875, "rewards/chosen": -2.6315531730651855, "rewards/margins": 3.3104610443115234, "rewards/rejected": -5.942013740539551, "step": 2493 }, { "epoch": 0.52, "learning_rate": 9.605042016806723e-06, "logits/chosen": -1.9105511903762817, "logits/rejected": -2.056098461151123, "logps/chosen": -377.2398986816406, "logps/rejected": -371.1231384277344, "loss": 0.4123, "rewards/accuracies": 0.8125, "rewards/chosen": -1.841477394104004, "rewards/margins": 2.861736297607422, "rewards/rejected": -4.703213691711426, "step": 2494 }, { "epoch": 0.52, "learning_rate": 9.600840336134455e-06, "logits/chosen": -2.2975151538848877, "logits/rejected": -2.175659418106079, "logps/chosen": -319.239990234375, "logps/rejected": -306.5977783203125, "loss": 0.4597, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4825851917266846, "rewards/margins": 2.254281997680664, "rewards/rejected": -4.7368669509887695, "step": 2495 }, { "epoch": 0.52, "learning_rate": 9.596638655462187e-06, "logits/chosen": -2.369351387023926, "logits/rejected": -2.026219606399536, "logps/chosen": -431.4033508300781, "logps/rejected": -322.7351379394531, "loss": 0.1122, "rewards/accuracies": 0.9375, "rewards/chosen": -1.7852988243103027, "rewards/margins": 4.616962432861328, "rewards/rejected": -6.402260780334473, "step": 2496 }, { "epoch": 0.52, "learning_rate": 9.592436974789917e-06, "logits/chosen": -2.0997745990753174, "logits/rejected": -1.7317702770233154, "logps/chosen": -433.49139404296875, "logps/rejected": -342.0841064453125, "loss": 0.2304, "rewards/accuracies": 0.875, "rewards/chosen": -2.3035593032836914, "rewards/margins": 3.2437310218811035, "rewards/rejected": -5.547290325164795, "step": 2497 }, { "epoch": 0.52, "learning_rate": 9.588235294117649e-06, "logits/chosen": -2.0126049518585205, "logits/rejected": -2.014080047607422, "logps/chosen": -277.796142578125, "logps/rejected": -271.4325256347656, "loss": 0.4414, "rewards/accuracies": 0.6875, "rewards/chosen": -2.6962857246398926, "rewards/margins": 3.405818462371826, "rewards/rejected": -6.102104187011719, "step": 2498 }, { "epoch": 0.52, "learning_rate": 9.584033613445379e-06, "logits/chosen": -2.1265807151794434, "logits/rejected": -2.022434711456299, "logps/chosen": -316.7300109863281, "logps/rejected": -390.12530517578125, "loss": 0.1293, "rewards/accuracies": 0.9375, "rewards/chosen": -2.51875638961792, "rewards/margins": 5.055368900299072, "rewards/rejected": -7.574125289916992, "step": 2499 }, { "epoch": 0.52, "learning_rate": 9.579831932773111e-06, "logits/chosen": -2.3787965774536133, "logits/rejected": -1.7455025911331177, "logps/chosen": -407.04150390625, "logps/rejected": -344.09625244140625, "loss": 0.3443, "rewards/accuracies": 0.8125, "rewards/chosen": -2.2838099002838135, "rewards/margins": 3.2583203315734863, "rewards/rejected": -5.542130470275879, "step": 2500 }, { "epoch": 0.52, "learning_rate": 9.575630252100841e-06, "logits/chosen": -2.4119229316711426, "logits/rejected": -2.168670177459717, "logps/chosen": -293.9447937011719, "logps/rejected": -282.108642578125, "loss": 0.1043, "rewards/accuracies": 0.9375, "rewards/chosen": -2.0820729732513428, "rewards/margins": 4.6340250968933105, "rewards/rejected": -6.716097831726074, "step": 2501 }, { "epoch": 0.52, "learning_rate": 9.571428571428573e-06, "logits/chosen": -2.0610527992248535, "logits/rejected": -2.028764486312866, "logps/chosen": -392.11962890625, "logps/rejected": -350.20660400390625, "loss": 0.4263, "rewards/accuracies": 0.75, "rewards/chosen": -2.5539908409118652, "rewards/margins": 2.3492860794067383, "rewards/rejected": -4.9032769203186035, "step": 2502 }, { "epoch": 0.52, "learning_rate": 9.567226890756303e-06, "logits/chosen": -2.3153438568115234, "logits/rejected": -2.0979604721069336, "logps/chosen": -268.85833740234375, "logps/rejected": -279.8310852050781, "loss": 0.103, "rewards/accuracies": 1.0, "rewards/chosen": -2.7873706817626953, "rewards/margins": 5.002776622772217, "rewards/rejected": -7.79014778137207, "step": 2503 }, { "epoch": 0.52, "learning_rate": 9.563025210084035e-06, "logits/chosen": -1.9433951377868652, "logits/rejected": -1.827419400215149, "logps/chosen": -290.7541198730469, "logps/rejected": -373.6928405761719, "loss": 0.406, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4459328651428223, "rewards/margins": 4.008621692657471, "rewards/rejected": -7.454555034637451, "step": 2504 }, { "epoch": 0.52, "learning_rate": 9.558823529411766e-06, "logits/chosen": -2.309584140777588, "logits/rejected": -1.8415096998214722, "logps/chosen": -335.1734924316406, "logps/rejected": -389.43701171875, "loss": 0.3345, "rewards/accuracies": 0.875, "rewards/chosen": -2.6883304119110107, "rewards/margins": 3.4640326499938965, "rewards/rejected": -6.152363300323486, "step": 2505 }, { "epoch": 0.52, "learning_rate": 9.554621848739497e-06, "logits/chosen": -2.209505558013916, "logits/rejected": -1.7459771633148193, "logps/chosen": -386.4450988769531, "logps/rejected": -244.0200958251953, "loss": 0.6366, "rewards/accuracies": 0.75, "rewards/chosen": -2.361931085586548, "rewards/margins": 2.834162712097168, "rewards/rejected": -5.196094036102295, "step": 2506 }, { "epoch": 0.52, "learning_rate": 9.550420168067228e-06, "logits/chosen": -2.232476234436035, "logits/rejected": -2.1178627014160156, "logps/chosen": -414.0107421875, "logps/rejected": -382.9124755859375, "loss": 0.2299, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2530810832977295, "rewards/margins": 4.126534461975098, "rewards/rejected": -6.379615783691406, "step": 2507 }, { "epoch": 0.52, "learning_rate": 9.54621848739496e-06, "logits/chosen": -2.2799813747406006, "logits/rejected": -1.9662835597991943, "logps/chosen": -266.8333435058594, "logps/rejected": -234.3785400390625, "loss": 0.2949, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3078579902648926, "rewards/margins": 3.4136760234832764, "rewards/rejected": -5.721534252166748, "step": 2508 }, { "epoch": 0.52, "learning_rate": 9.54201680672269e-06, "logits/chosen": -2.110401153564453, "logits/rejected": -1.8340482711791992, "logps/chosen": -352.2585754394531, "logps/rejected": -404.18145751953125, "loss": 0.2744, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9760286808013916, "rewards/margins": 3.059877634048462, "rewards/rejected": -5.0359063148498535, "step": 2509 }, { "epoch": 0.53, "learning_rate": 9.537815126050422e-06, "logits/chosen": -2.256673574447632, "logits/rejected": -1.7906372547149658, "logps/chosen": -450.4818420410156, "logps/rejected": -350.98626708984375, "loss": 0.4995, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6848156452178955, "rewards/margins": 3.273085594177246, "rewards/rejected": -5.957901954650879, "step": 2510 }, { "epoch": 0.53, "learning_rate": 9.533613445378152e-06, "logits/chosen": -2.168178081512451, "logits/rejected": -1.7355785369873047, "logps/chosen": -388.55828857421875, "logps/rejected": -332.3291320800781, "loss": 0.0916, "rewards/accuracies": 1.0, "rewards/chosen": -1.4317495822906494, "rewards/margins": 4.800617218017578, "rewards/rejected": -6.232367038726807, "step": 2511 }, { "epoch": 0.53, "learning_rate": 9.529411764705882e-06, "logits/chosen": -2.5559027194976807, "logits/rejected": -2.1007933616638184, "logps/chosen": -310.33428955078125, "logps/rejected": -284.7672424316406, "loss": 0.3639, "rewards/accuracies": 0.75, "rewards/chosen": -2.927351236343384, "rewards/margins": 2.579256296157837, "rewards/rejected": -5.506607532501221, "step": 2512 }, { "epoch": 0.53, "learning_rate": 9.525210084033614e-06, "logits/chosen": -2.0332939624786377, "logits/rejected": -1.690711259841919, "logps/chosen": -327.4412536621094, "logps/rejected": -327.41357421875, "loss": 0.3181, "rewards/accuracies": 0.75, "rewards/chosen": -3.3663837909698486, "rewards/margins": 3.2649383544921875, "rewards/rejected": -6.631321907043457, "step": 2513 }, { "epoch": 0.53, "learning_rate": 9.521008403361344e-06, "logits/chosen": -1.9128518104553223, "logits/rejected": -2.0001280307769775, "logps/chosen": -430.49176025390625, "logps/rejected": -585.4301147460938, "loss": 0.1853, "rewards/accuracies": 0.9375, "rewards/chosen": -2.566788673400879, "rewards/margins": 3.837230682373047, "rewards/rejected": -6.404019355773926, "step": 2514 }, { "epoch": 0.53, "learning_rate": 9.516806722689076e-06, "logits/chosen": -2.2985007762908936, "logits/rejected": -2.203998565673828, "logps/chosen": -226.50936889648438, "logps/rejected": -280.779296875, "loss": 0.3349, "rewards/accuracies": 0.75, "rewards/chosen": -3.4485068321228027, "rewards/margins": 2.9634742736816406, "rewards/rejected": -6.411981582641602, "step": 2515 }, { "epoch": 0.53, "learning_rate": 9.512605042016806e-06, "logits/chosen": -1.8921406269073486, "logits/rejected": -2.1687142848968506, "logps/chosen": -306.92303466796875, "logps/rejected": -409.34918212890625, "loss": 0.2676, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7105960845947266, "rewards/margins": 4.616537094116211, "rewards/rejected": -7.327133655548096, "step": 2516 }, { "epoch": 0.53, "learning_rate": 9.508403361344538e-06, "logits/chosen": -2.0976529121398926, "logits/rejected": -1.7766838073730469, "logps/chosen": -280.36126708984375, "logps/rejected": -257.9566345214844, "loss": 0.1702, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2920262813568115, "rewards/margins": 3.9331674575805664, "rewards/rejected": -6.225193977355957, "step": 2517 }, { "epoch": 0.53, "learning_rate": 9.504201680672269e-06, "logits/chosen": -2.11857008934021, "logits/rejected": -1.8373630046844482, "logps/chosen": -387.91180419921875, "logps/rejected": -446.870849609375, "loss": 0.0336, "rewards/accuracies": 1.0, "rewards/chosen": -1.7399072647094727, "rewards/margins": 5.450682163238525, "rewards/rejected": -7.190589904785156, "step": 2518 }, { "epoch": 0.53, "learning_rate": 9.5e-06, "logits/chosen": -1.8230106830596924, "logits/rejected": -1.8141568899154663, "logps/chosen": -303.5601806640625, "logps/rejected": -322.4443359375, "loss": 0.2569, "rewards/accuracies": 0.875, "rewards/chosen": -2.457347869873047, "rewards/margins": 4.088535785675049, "rewards/rejected": -6.545883655548096, "step": 2519 }, { "epoch": 0.53, "learning_rate": 9.49579831932773e-06, "logits/chosen": -2.058150291442871, "logits/rejected": -2.3269007205963135, "logps/chosen": -196.10536193847656, "logps/rejected": -246.83120727539062, "loss": 0.4088, "rewards/accuracies": 0.75, "rewards/chosen": -3.1146841049194336, "rewards/margins": 3.557880401611328, "rewards/rejected": -6.672564506530762, "step": 2520 }, { "epoch": 0.53, "learning_rate": 9.491596638655463e-06, "logits/chosen": -2.2051827907562256, "logits/rejected": -1.6986697912216187, "logps/chosen": -355.34686279296875, "logps/rejected": -291.80450439453125, "loss": 0.108, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5224666595458984, "rewards/margins": 5.002082347869873, "rewards/rejected": -7.5245490074157715, "step": 2521 }, { "epoch": 0.53, "learning_rate": 9.487394957983193e-06, "logits/chosen": -2.323888063430786, "logits/rejected": -2.148203134536743, "logps/chosen": -284.5354919433594, "logps/rejected": -283.0790710449219, "loss": 0.2816, "rewards/accuracies": 0.875, "rewards/chosen": -2.6046156883239746, "rewards/margins": 3.734713077545166, "rewards/rejected": -6.339328765869141, "step": 2522 }, { "epoch": 0.53, "learning_rate": 9.483193277310925e-06, "logits/chosen": -2.112504720687866, "logits/rejected": -1.9059298038482666, "logps/chosen": -387.5921630859375, "logps/rejected": -358.6956787109375, "loss": 0.227, "rewards/accuracies": 0.8125, "rewards/chosen": -2.621278762817383, "rewards/margins": 4.938642501831055, "rewards/rejected": -7.5599212646484375, "step": 2523 }, { "epoch": 0.53, "learning_rate": 9.478991596638657e-06, "logits/chosen": -2.171133518218994, "logits/rejected": -1.8824927806854248, "logps/chosen": -400.78460693359375, "logps/rejected": -373.2786865234375, "loss": 0.1441, "rewards/accuracies": 1.0, "rewards/chosen": -2.8059349060058594, "rewards/margins": 4.005174160003662, "rewards/rejected": -6.81110954284668, "step": 2524 }, { "epoch": 0.53, "learning_rate": 9.474789915966387e-06, "logits/chosen": -2.111607074737549, "logits/rejected": -1.998233437538147, "logps/chosen": -319.780029296875, "logps/rejected": -297.7987976074219, "loss": 0.4915, "rewards/accuracies": 0.8125, "rewards/chosen": -3.224074363708496, "rewards/margins": 3.958573818206787, "rewards/rejected": -7.182647705078125, "step": 2525 }, { "epoch": 0.53, "learning_rate": 9.470588235294119e-06, "logits/chosen": -2.301401376724243, "logits/rejected": -2.1560215950012207, "logps/chosen": -325.9587707519531, "logps/rejected": -343.59716796875, "loss": 0.1024, "rewards/accuracies": 1.0, "rewards/chosen": -2.68658447265625, "rewards/margins": 5.390464782714844, "rewards/rejected": -8.077049255371094, "step": 2526 }, { "epoch": 0.53, "learning_rate": 9.466386554621849e-06, "logits/chosen": -2.146578311920166, "logits/rejected": -1.9742950201034546, "logps/chosen": -465.35198974609375, "logps/rejected": -409.40875244140625, "loss": 0.232, "rewards/accuracies": 0.875, "rewards/chosen": -3.1047110557556152, "rewards/margins": 3.148139476776123, "rewards/rejected": -6.252850532531738, "step": 2527 }, { "epoch": 0.53, "learning_rate": 9.462184873949581e-06, "logits/chosen": -2.2264466285705566, "logits/rejected": -1.9232317209243774, "logps/chosen": -436.72650146484375, "logps/rejected": -376.5250244140625, "loss": 0.3923, "rewards/accuracies": 0.875, "rewards/chosen": -3.417790651321411, "rewards/margins": 3.1051182746887207, "rewards/rejected": -6.522909164428711, "step": 2528 }, { "epoch": 0.53, "learning_rate": 9.457983193277311e-06, "logits/chosen": -2.1570754051208496, "logits/rejected": -1.8366494178771973, "logps/chosen": -342.090576171875, "logps/rejected": -323.77850341796875, "loss": 0.4182, "rewards/accuracies": 0.8125, "rewards/chosen": -3.4511475563049316, "rewards/margins": 3.0063416957855225, "rewards/rejected": -6.457489013671875, "step": 2529 }, { "epoch": 0.53, "learning_rate": 9.453781512605043e-06, "logits/chosen": -2.2689995765686035, "logits/rejected": -2.2033443450927734, "logps/chosen": -342.376953125, "logps/rejected": -363.0454406738281, "loss": 0.3438, "rewards/accuracies": 0.875, "rewards/chosen": -3.188932418823242, "rewards/margins": 4.162549018859863, "rewards/rejected": -7.3514814376831055, "step": 2530 }, { "epoch": 0.53, "learning_rate": 9.449579831932773e-06, "logits/chosen": -2.2346906661987305, "logits/rejected": -2.1344492435455322, "logps/chosen": -438.34271240234375, "logps/rejected": -363.677734375, "loss": 0.1747, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8356618881225586, "rewards/margins": 3.488903522491455, "rewards/rejected": -6.324565410614014, "step": 2531 }, { "epoch": 0.53, "learning_rate": 9.445378151260505e-06, "logits/chosen": -2.234792470932007, "logits/rejected": -2.0919010639190674, "logps/chosen": -281.65472412109375, "logps/rejected": -278.4136657714844, "loss": 0.17, "rewards/accuracies": 0.9375, "rewards/chosen": -2.791999340057373, "rewards/margins": 4.423157691955566, "rewards/rejected": -7.2151570320129395, "step": 2532 }, { "epoch": 0.53, "learning_rate": 9.441176470588235e-06, "logits/chosen": -2.1052494049072266, "logits/rejected": -2.055840492248535, "logps/chosen": -285.5604248046875, "logps/rejected": -355.980224609375, "loss": 0.1288, "rewards/accuracies": 0.9375, "rewards/chosen": -3.395705461502075, "rewards/margins": 4.4319915771484375, "rewards/rejected": -7.827697277069092, "step": 2533 }, { "epoch": 0.53, "learning_rate": 9.436974789915967e-06, "logits/chosen": -2.3428354263305664, "logits/rejected": -2.0979807376861572, "logps/chosen": -394.2915344238281, "logps/rejected": -431.5267028808594, "loss": 0.4827, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2202205657958984, "rewards/margins": 4.644374370574951, "rewards/rejected": -7.864595413208008, "step": 2534 }, { "epoch": 0.53, "learning_rate": 9.432773109243698e-06, "logits/chosen": -2.5848116874694824, "logits/rejected": -2.0564680099487305, "logps/chosen": -375.3398742675781, "logps/rejected": -338.7669677734375, "loss": 0.337, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3333778381347656, "rewards/margins": 3.6800408363342285, "rewards/rejected": -6.013419151306152, "step": 2535 }, { "epoch": 0.53, "learning_rate": 9.42857142857143e-06, "logits/chosen": -2.174773931503296, "logits/rejected": -1.9295463562011719, "logps/chosen": -380.8409729003906, "logps/rejected": -344.0479736328125, "loss": 0.0953, "rewards/accuracies": 1.0, "rewards/chosen": -2.2969679832458496, "rewards/margins": 4.161410331726074, "rewards/rejected": -6.458378314971924, "step": 2536 }, { "epoch": 0.53, "learning_rate": 9.42436974789916e-06, "logits/chosen": -2.273197889328003, "logits/rejected": -2.0794014930725098, "logps/chosen": -382.24139404296875, "logps/rejected": -367.83453369140625, "loss": 0.1858, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1326944828033447, "rewards/margins": 3.704744338989258, "rewards/rejected": -6.837438583374023, "step": 2537 }, { "epoch": 0.53, "learning_rate": 9.420168067226892e-06, "logits/chosen": -2.1383862495422363, "logits/rejected": -1.583884835243225, "logps/chosen": -423.390380859375, "logps/rejected": -338.5177917480469, "loss": 0.647, "rewards/accuracies": 0.8125, "rewards/chosen": -3.6184678077697754, "rewards/margins": 3.1192736625671387, "rewards/rejected": -6.737740993499756, "step": 2538 }, { "epoch": 0.53, "learning_rate": 9.415966386554622e-06, "logits/chosen": -2.1102585792541504, "logits/rejected": -2.008113384246826, "logps/chosen": -236.2758331298828, "logps/rejected": -438.6126708984375, "loss": 0.1283, "rewards/accuracies": 0.875, "rewards/chosen": -3.6426756381988525, "rewards/margins": 4.893115043640137, "rewards/rejected": -8.53579044342041, "step": 2539 }, { "epoch": 0.53, "learning_rate": 9.411764705882354e-06, "logits/chosen": -1.9974117279052734, "logits/rejected": -1.9260965585708618, "logps/chosen": -332.7476501464844, "logps/rejected": -316.2168273925781, "loss": 0.2161, "rewards/accuracies": 0.9375, "rewards/chosen": -3.078606605529785, "rewards/margins": 3.4742813110351562, "rewards/rejected": -6.552887916564941, "step": 2540 }, { "epoch": 0.53, "learning_rate": 9.407563025210084e-06, "logits/chosen": -2.1985411643981934, "logits/rejected": -2.1689884662628174, "logps/chosen": -473.10382080078125, "logps/rejected": -372.552001953125, "loss": 0.1044, "rewards/accuracies": 1.0, "rewards/chosen": -2.685713768005371, "rewards/margins": 5.397963047027588, "rewards/rejected": -8.083677291870117, "step": 2541 }, { "epoch": 0.53, "learning_rate": 9.403361344537816e-06, "logits/chosen": -2.0937719345092773, "logits/rejected": -2.144117593765259, "logps/chosen": -257.310546875, "logps/rejected": -354.2978210449219, "loss": 0.1695, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1185336112976074, "rewards/margins": 4.972678184509277, "rewards/rejected": -8.091211318969727, "step": 2542 }, { "epoch": 0.53, "learning_rate": 9.399159663865546e-06, "logits/chosen": -1.9987530708312988, "logits/rejected": -2.2044856548309326, "logps/chosen": -243.2696533203125, "logps/rejected": -358.2451171875, "loss": 0.594, "rewards/accuracies": 0.75, "rewards/chosen": -3.61677885055542, "rewards/margins": 3.6245479583740234, "rewards/rejected": -7.241326332092285, "step": 2543 }, { "epoch": 0.53, "learning_rate": 9.394957983193278e-06, "logits/chosen": -2.6127560138702393, "logits/rejected": -2.3447189331054688, "logps/chosen": -400.9631652832031, "logps/rejected": -371.0506896972656, "loss": 0.1134, "rewards/accuracies": 1.0, "rewards/chosen": -1.8211510181427002, "rewards/margins": 4.983794689178467, "rewards/rejected": -6.804945945739746, "step": 2544 }, { "epoch": 0.53, "learning_rate": 9.390756302521008e-06, "logits/chosen": -2.37842059135437, "logits/rejected": -2.18770432472229, "logps/chosen": -374.48046875, "logps/rejected": -347.82623291015625, "loss": 0.2057, "rewards/accuracies": 0.875, "rewards/chosen": -1.1693285703659058, "rewards/margins": 5.296117782592773, "rewards/rejected": -6.465446472167969, "step": 2545 }, { "epoch": 0.53, "learning_rate": 9.38655462184874e-06, "logits/chosen": -2.119645833969116, "logits/rejected": -2.2463600635528564, "logps/chosen": -312.02764892578125, "logps/rejected": -404.00872802734375, "loss": 0.1784, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5017592906951904, "rewards/margins": 5.538944244384766, "rewards/rejected": -8.040704727172852, "step": 2546 }, { "epoch": 0.53, "learning_rate": 9.382352941176472e-06, "logits/chosen": -2.4114084243774414, "logits/rejected": -1.9574097394943237, "logps/chosen": -474.704345703125, "logps/rejected": -385.8339538574219, "loss": 0.2403, "rewards/accuracies": 0.9375, "rewards/chosen": -2.181457996368408, "rewards/margins": 4.81538200378418, "rewards/rejected": -6.996840476989746, "step": 2547 }, { "epoch": 0.53, "learning_rate": 9.378151260504202e-06, "logits/chosen": -2.1346471309661865, "logits/rejected": -1.9008607864379883, "logps/chosen": -258.4367370605469, "logps/rejected": -241.75396728515625, "loss": 0.6987, "rewards/accuracies": 0.8125, "rewards/chosen": -3.449197769165039, "rewards/margins": 2.8112971782684326, "rewards/rejected": -6.260494709014893, "step": 2548 }, { "epoch": 0.53, "learning_rate": 9.373949579831934e-06, "logits/chosen": -2.092656373977661, "logits/rejected": -1.788853406906128, "logps/chosen": -433.49664306640625, "logps/rejected": -292.1119384765625, "loss": 0.2048, "rewards/accuracies": 0.875, "rewards/chosen": -1.957458257675171, "rewards/margins": 4.3302531242370605, "rewards/rejected": -6.287711143493652, "step": 2549 }, { "epoch": 0.53, "learning_rate": 9.369747899159664e-06, "logits/chosen": -2.3631653785705566, "logits/rejected": -1.8547563552856445, "logps/chosen": -397.82257080078125, "logps/rejected": -366.6249694824219, "loss": 0.2385, "rewards/accuracies": 0.9375, "rewards/chosen": -3.6198830604553223, "rewards/margins": 3.241133689880371, "rewards/rejected": -6.861016273498535, "step": 2550 }, { "epoch": 0.53, "learning_rate": 9.365546218487396e-06, "logits/chosen": -2.1993772983551025, "logits/rejected": -1.9715027809143066, "logps/chosen": -357.59527587890625, "logps/rejected": -387.57830810546875, "loss": 0.7124, "rewards/accuracies": 0.6875, "rewards/chosen": -2.9526937007904053, "rewards/margins": 3.0046770572662354, "rewards/rejected": -5.957370281219482, "step": 2551 }, { "epoch": 0.53, "learning_rate": 9.361344537815127e-06, "logits/chosen": -2.4792256355285645, "logits/rejected": -2.3154115676879883, "logps/chosen": -255.52005004882812, "logps/rejected": -288.7085266113281, "loss": 0.4241, "rewards/accuracies": 0.8125, "rewards/chosen": -2.933387279510498, "rewards/margins": 3.574796438217163, "rewards/rejected": -6.508183479309082, "step": 2552 }, { "epoch": 0.53, "learning_rate": 9.357142857142859e-06, "logits/chosen": -2.463165760040283, "logits/rejected": -1.7777312994003296, "logps/chosen": -375.28375244140625, "logps/rejected": -367.20050048828125, "loss": 0.2777, "rewards/accuracies": 0.875, "rewards/chosen": -2.9922327995300293, "rewards/margins": 4.594165802001953, "rewards/rejected": -7.586398124694824, "step": 2553 }, { "epoch": 0.53, "learning_rate": 9.352941176470589e-06, "logits/chosen": -2.365229368209839, "logits/rejected": -2.02795147895813, "logps/chosen": -575.577880859375, "logps/rejected": -464.572998046875, "loss": 0.3341, "rewards/accuracies": 0.875, "rewards/chosen": -2.211085796356201, "rewards/margins": 3.429306745529175, "rewards/rejected": -5.640392780303955, "step": 2554 }, { "epoch": 0.53, "learning_rate": 9.34873949579832e-06, "logits/chosen": -2.081172466278076, "logits/rejected": -1.6347401142120361, "logps/chosen": -390.55810546875, "logps/rejected": -368.6292724609375, "loss": 0.1993, "rewards/accuracies": 0.9375, "rewards/chosen": -3.218534231185913, "rewards/margins": 4.430109024047852, "rewards/rejected": -7.6486430168151855, "step": 2555 }, { "epoch": 0.53, "learning_rate": 9.344537815126051e-06, "logits/chosen": -2.2170302867889404, "logits/rejected": -1.7161248922348022, "logps/chosen": -303.3514404296875, "logps/rejected": -304.1554260253906, "loss": 0.3066, "rewards/accuracies": 0.875, "rewards/chosen": -2.3877222537994385, "rewards/margins": 4.722976207733154, "rewards/rejected": -7.1106977462768555, "step": 2556 }, { "epoch": 0.53, "learning_rate": 9.340336134453783e-06, "logits/chosen": -2.178788661956787, "logits/rejected": -1.8989672660827637, "logps/chosen": -298.6821594238281, "logps/rejected": -332.08087158203125, "loss": 0.3726, "rewards/accuracies": 0.8125, "rewards/chosen": -3.8281090259552, "rewards/margins": 3.697291374206543, "rewards/rejected": -7.525400161743164, "step": 2557 }, { "epoch": 0.54, "learning_rate": 9.336134453781513e-06, "logits/chosen": -2.3828001022338867, "logits/rejected": -1.632203459739685, "logps/chosen": -344.9515686035156, "logps/rejected": -344.1308898925781, "loss": 0.387, "rewards/accuracies": 0.875, "rewards/chosen": -1.9686893224716187, "rewards/margins": 4.584531784057617, "rewards/rejected": -6.553220748901367, "step": 2558 }, { "epoch": 0.54, "learning_rate": 9.331932773109245e-06, "logits/chosen": -2.446682929992676, "logits/rejected": -2.491478443145752, "logps/chosen": -300.871826171875, "logps/rejected": -419.3265380859375, "loss": 0.0867, "rewards/accuracies": 1.0, "rewards/chosen": -2.8470587730407715, "rewards/margins": 5.517639636993408, "rewards/rejected": -8.36469841003418, "step": 2559 }, { "epoch": 0.54, "learning_rate": 9.327731092436975e-06, "logits/chosen": -2.0064337253570557, "logits/rejected": -2.1416516304016113, "logps/chosen": -247.630615234375, "logps/rejected": -385.41314697265625, "loss": 0.2106, "rewards/accuracies": 0.8125, "rewards/chosen": -2.85239839553833, "rewards/margins": 4.437196731567383, "rewards/rejected": -7.289595603942871, "step": 2560 }, { "epoch": 0.54, "learning_rate": 9.323529411764707e-06, "logits/chosen": -2.3393592834472656, "logits/rejected": -1.9963867664337158, "logps/chosen": -242.6816864013672, "logps/rejected": -270.532470703125, "loss": 0.1097, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4430789947509766, "rewards/margins": 4.7362236976623535, "rewards/rejected": -7.17930269241333, "step": 2561 }, { "epoch": 0.54, "learning_rate": 9.319327731092437e-06, "logits/chosen": -2.2107598781585693, "logits/rejected": -1.7756901979446411, "logps/chosen": -347.0983581542969, "logps/rejected": -295.2074890136719, "loss": 0.1596, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4553799629211426, "rewards/margins": 4.009431838989258, "rewards/rejected": -6.464812278747559, "step": 2562 }, { "epoch": 0.54, "learning_rate": 9.31512605042017e-06, "logits/chosen": -2.297487735748291, "logits/rejected": -1.8098678588867188, "logps/chosen": -341.30523681640625, "logps/rejected": -402.459716796875, "loss": 0.3214, "rewards/accuracies": 0.875, "rewards/chosen": -2.6589231491088867, "rewards/margins": 5.263858795166016, "rewards/rejected": -7.922781944274902, "step": 2563 }, { "epoch": 0.54, "learning_rate": 9.3109243697479e-06, "logits/chosen": -2.148975372314453, "logits/rejected": -2.149716854095459, "logps/chosen": -339.5742492675781, "logps/rejected": -288.9598693847656, "loss": 0.3535, "rewards/accuracies": 0.875, "rewards/chosen": -2.34553861618042, "rewards/margins": 4.230324745178223, "rewards/rejected": -6.575863838195801, "step": 2564 }, { "epoch": 0.54, "learning_rate": 9.306722689075631e-06, "logits/chosen": -2.296535015106201, "logits/rejected": -1.8774789571762085, "logps/chosen": -302.58282470703125, "logps/rejected": -257.0469055175781, "loss": 0.2817, "rewards/accuracies": 0.875, "rewards/chosen": -2.46828556060791, "rewards/margins": 3.013158082962036, "rewards/rejected": -5.481443405151367, "step": 2565 }, { "epoch": 0.54, "learning_rate": 9.302521008403362e-06, "logits/chosen": -2.0437076091766357, "logits/rejected": -1.8062479496002197, "logps/chosen": -354.3843994140625, "logps/rejected": -416.3578186035156, "loss": 0.3051, "rewards/accuracies": 0.875, "rewards/chosen": -2.923539876937866, "rewards/margins": 5.1094160079956055, "rewards/rejected": -8.032955169677734, "step": 2566 }, { "epoch": 0.54, "learning_rate": 9.298319327731094e-06, "logits/chosen": -2.1177310943603516, "logits/rejected": -1.8696045875549316, "logps/chosen": -283.68768310546875, "logps/rejected": -254.80938720703125, "loss": 0.1382, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4072346687316895, "rewards/margins": 4.758561611175537, "rewards/rejected": -7.165797233581543, "step": 2567 }, { "epoch": 0.54, "learning_rate": 9.294117647058824e-06, "logits/chosen": -2.1093902587890625, "logits/rejected": -1.8260833024978638, "logps/chosen": -341.4385681152344, "logps/rejected": -343.77423095703125, "loss": 0.6989, "rewards/accuracies": 0.6875, "rewards/chosen": -3.6635022163391113, "rewards/margins": 2.3317060470581055, "rewards/rejected": -5.995208263397217, "step": 2568 }, { "epoch": 0.54, "learning_rate": 9.289915966386556e-06, "logits/chosen": -2.2569215297698975, "logits/rejected": -2.148149013519287, "logps/chosen": -300.59686279296875, "logps/rejected": -294.5800476074219, "loss": 0.2444, "rewards/accuracies": 0.75, "rewards/chosen": -3.311414957046509, "rewards/margins": 3.8160738945007324, "rewards/rejected": -7.12748908996582, "step": 2569 }, { "epoch": 0.54, "learning_rate": 9.285714285714288e-06, "logits/chosen": -2.2656607627868652, "logits/rejected": -1.7776451110839844, "logps/chosen": -371.0494384765625, "logps/rejected": -386.62384033203125, "loss": 0.119, "rewards/accuracies": 0.9375, "rewards/chosen": -2.2209994792938232, "rewards/margins": 5.91341495513916, "rewards/rejected": -8.134414672851562, "step": 2570 }, { "epoch": 0.54, "learning_rate": 9.281512605042018e-06, "logits/chosen": -2.3822526931762695, "logits/rejected": -2.024885654449463, "logps/chosen": -341.25274658203125, "logps/rejected": -340.01080322265625, "loss": 0.2822, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7695624828338623, "rewards/margins": 4.532607078552246, "rewards/rejected": -7.302169322967529, "step": 2571 }, { "epoch": 0.54, "learning_rate": 9.27731092436975e-06, "logits/chosen": -2.1967086791992188, "logits/rejected": -1.936452865600586, "logps/chosen": -220.71705627441406, "logps/rejected": -289.6693115234375, "loss": 0.2589, "rewards/accuracies": 0.875, "rewards/chosen": -2.8824918270111084, "rewards/margins": 4.057369232177734, "rewards/rejected": -6.939861297607422, "step": 2572 }, { "epoch": 0.54, "learning_rate": 9.27310924369748e-06, "logits/chosen": -2.1816163063049316, "logits/rejected": -2.0887491703033447, "logps/chosen": -378.16412353515625, "logps/rejected": -364.8260498046875, "loss": 0.4068, "rewards/accuracies": 0.75, "rewards/chosen": -2.208500385284424, "rewards/margins": 3.5802958011627197, "rewards/rejected": -5.7887959480285645, "step": 2573 }, { "epoch": 0.54, "learning_rate": 9.268907563025212e-06, "logits/chosen": -2.514922618865967, "logits/rejected": -2.0112464427948, "logps/chosen": -447.1953125, "logps/rejected": -442.54864501953125, "loss": 0.1463, "rewards/accuracies": 0.9375, "rewards/chosen": -2.279115676879883, "rewards/margins": 4.6169867515563965, "rewards/rejected": -6.8961029052734375, "step": 2574 }, { "epoch": 0.54, "learning_rate": 9.264705882352942e-06, "logits/chosen": -2.353874683380127, "logits/rejected": -2.1691641807556152, "logps/chosen": -294.71722412109375, "logps/rejected": -317.8603515625, "loss": 0.1574, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7350270748138428, "rewards/margins": 3.9950876235961914, "rewards/rejected": -6.730114459991455, "step": 2575 }, { "epoch": 0.54, "learning_rate": 9.260504201680674e-06, "logits/chosen": -2.1018621921539307, "logits/rejected": -1.7307868003845215, "logps/chosen": -295.99041748046875, "logps/rejected": -355.86468505859375, "loss": 0.2477, "rewards/accuracies": 0.875, "rewards/chosen": -2.4273812770843506, "rewards/margins": 4.7862548828125, "rewards/rejected": -7.21363639831543, "step": 2576 }, { "epoch": 0.54, "learning_rate": 9.256302521008404e-06, "logits/chosen": -1.9776158332824707, "logits/rejected": -2.1583383083343506, "logps/chosen": -424.2509765625, "logps/rejected": -420.47650146484375, "loss": 0.1513, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9167046546936035, "rewards/margins": 5.897480010986328, "rewards/rejected": -8.81418514251709, "step": 2577 }, { "epoch": 0.54, "learning_rate": 9.252100840336136e-06, "logits/chosen": -2.0331006050109863, "logits/rejected": -1.9048519134521484, "logps/chosen": -388.6837158203125, "logps/rejected": -312.158203125, "loss": 0.5198, "rewards/accuracies": 0.75, "rewards/chosen": -2.9923412799835205, "rewards/margins": 2.4050796031951904, "rewards/rejected": -5.397420883178711, "step": 2578 }, { "epoch": 0.54, "learning_rate": 9.247899159663866e-06, "logits/chosen": -2.337507724761963, "logits/rejected": -2.2936506271362305, "logps/chosen": -328.03741455078125, "logps/rejected": -372.49237060546875, "loss": 0.1646, "rewards/accuracies": 0.9375, "rewards/chosen": -1.964887022972107, "rewards/margins": 5.622317314147949, "rewards/rejected": -7.5872039794921875, "step": 2579 }, { "epoch": 0.54, "learning_rate": 9.243697478991598e-06, "logits/chosen": -1.9623291492462158, "logits/rejected": -2.17836332321167, "logps/chosen": -340.5430908203125, "logps/rejected": -392.6394958496094, "loss": 0.1688, "rewards/accuracies": 0.875, "rewards/chosen": -2.9706039428710938, "rewards/margins": 3.4422855377197266, "rewards/rejected": -6.412889003753662, "step": 2580 }, { "epoch": 0.54, "learning_rate": 9.239495798319328e-06, "logits/chosen": -2.2881360054016113, "logits/rejected": -1.9698160886764526, "logps/chosen": -564.8394165039062, "logps/rejected": -408.98504638671875, "loss": 0.4924, "rewards/accuracies": 0.75, "rewards/chosen": -2.5167877674102783, "rewards/margins": 3.31068754196167, "rewards/rejected": -5.827474594116211, "step": 2581 }, { "epoch": 0.54, "learning_rate": 9.23529411764706e-06, "logits/chosen": -2.0503244400024414, "logits/rejected": -2.069507122039795, "logps/chosen": -292.56622314453125, "logps/rejected": -419.79974365234375, "loss": 0.2276, "rewards/accuracies": 0.875, "rewards/chosen": -2.765777826309204, "rewards/margins": 4.641334533691406, "rewards/rejected": -7.4071125984191895, "step": 2582 }, { "epoch": 0.54, "learning_rate": 9.23109243697479e-06, "logits/chosen": -2.0481579303741455, "logits/rejected": -2.0596923828125, "logps/chosen": -262.9170837402344, "logps/rejected": -304.87054443359375, "loss": 0.2464, "rewards/accuracies": 0.875, "rewards/chosen": -2.7521820068359375, "rewards/margins": 3.893430709838867, "rewards/rejected": -6.645613193511963, "step": 2583 }, { "epoch": 0.54, "learning_rate": 9.226890756302523e-06, "logits/chosen": -2.185044288635254, "logits/rejected": -1.5094817876815796, "logps/chosen": -346.2208557128906, "logps/rejected": -355.1290283203125, "loss": 0.1542, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6816561222076416, "rewards/margins": 4.00274658203125, "rewards/rejected": -6.6844024658203125, "step": 2584 }, { "epoch": 0.54, "learning_rate": 9.222689075630253e-06, "logits/chosen": -2.153420925140381, "logits/rejected": -1.7344470024108887, "logps/chosen": -374.46807861328125, "logps/rejected": -261.6391296386719, "loss": 0.5289, "rewards/accuracies": 0.6875, "rewards/chosen": -2.0606772899627686, "rewards/margins": 2.821770429611206, "rewards/rejected": -4.882447719573975, "step": 2585 }, { "epoch": 0.54, "learning_rate": 9.218487394957983e-06, "logits/chosen": -2.0616300106048584, "logits/rejected": -1.7246735095977783, "logps/chosen": -324.0895080566406, "logps/rejected": -334.64532470703125, "loss": 0.143, "rewards/accuracies": 0.9375, "rewards/chosen": -2.841618537902832, "rewards/margins": 5.997417449951172, "rewards/rejected": -8.839035987854004, "step": 2586 }, { "epoch": 0.54, "learning_rate": 9.214285714285715e-06, "logits/chosen": -2.087186813354492, "logits/rejected": -1.5766535997390747, "logps/chosen": -353.9708557128906, "logps/rejected": -271.87548828125, "loss": 0.4163, "rewards/accuracies": 0.8125, "rewards/chosen": -2.5630550384521484, "rewards/margins": 4.894275188446045, "rewards/rejected": -7.457330226898193, "step": 2587 }, { "epoch": 0.54, "learning_rate": 9.210084033613445e-06, "logits/chosen": -2.0295844078063965, "logits/rejected": -2.0846285820007324, "logps/chosen": -248.7470245361328, "logps/rejected": -300.9535217285156, "loss": 0.2358, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9439549446105957, "rewards/margins": 4.458446502685547, "rewards/rejected": -7.402401924133301, "step": 2588 }, { "epoch": 0.54, "learning_rate": 9.205882352941177e-06, "logits/chosen": -2.112820625305176, "logits/rejected": -2.02443790435791, "logps/chosen": -345.8756408691406, "logps/rejected": -354.57257080078125, "loss": 0.1852, "rewards/accuracies": 0.8125, "rewards/chosen": -2.617520809173584, "rewards/margins": 4.981188774108887, "rewards/rejected": -7.598709583282471, "step": 2589 }, { "epoch": 0.54, "learning_rate": 9.201680672268907e-06, "logits/chosen": -2.3481132984161377, "logits/rejected": -2.384918451309204, "logps/chosen": -258.334228515625, "logps/rejected": -276.2754211425781, "loss": 0.0383, "rewards/accuracies": 1.0, "rewards/chosen": -1.7667877674102783, "rewards/margins": 6.250648498535156, "rewards/rejected": -8.017436027526855, "step": 2590 }, { "epoch": 0.54, "learning_rate": 9.19747899159664e-06, "logits/chosen": -2.3376073837280273, "logits/rejected": -1.988825798034668, "logps/chosen": -393.5895080566406, "logps/rejected": -325.53167724609375, "loss": 0.064, "rewards/accuracies": 1.0, "rewards/chosen": -2.9253807067871094, "rewards/margins": 5.674491882324219, "rewards/rejected": -8.599872589111328, "step": 2591 }, { "epoch": 0.54, "learning_rate": 9.19327731092437e-06, "logits/chosen": -2.304267406463623, "logits/rejected": -1.6741276979446411, "logps/chosen": -367.6330871582031, "logps/rejected": -323.0014953613281, "loss": 0.2516, "rewards/accuracies": 0.875, "rewards/chosen": -1.7452031373977661, "rewards/margins": 5.045428276062012, "rewards/rejected": -6.790631294250488, "step": 2592 }, { "epoch": 0.54, "learning_rate": 9.189075630252101e-06, "logits/chosen": -2.062570571899414, "logits/rejected": -2.148332118988037, "logps/chosen": -272.76373291015625, "logps/rejected": -308.37774658203125, "loss": 0.3286, "rewards/accuracies": 0.8125, "rewards/chosen": -2.939138889312744, "rewards/margins": 3.520430088043213, "rewards/rejected": -6.459568977355957, "step": 2593 }, { "epoch": 0.54, "learning_rate": 9.184873949579832e-06, "logits/chosen": -2.1087734699249268, "logits/rejected": -2.1768598556518555, "logps/chosen": -341.25469970703125, "logps/rejected": -380.8143310546875, "loss": 0.4837, "rewards/accuracies": 0.875, "rewards/chosen": -2.4032437801361084, "rewards/margins": 3.6222915649414062, "rewards/rejected": -6.025535583496094, "step": 2594 }, { "epoch": 0.54, "learning_rate": 9.180672268907563e-06, "logits/chosen": -1.9333209991455078, "logits/rejected": -2.0346693992614746, "logps/chosen": -347.0496826171875, "logps/rejected": -307.85882568359375, "loss": 0.2877, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5659470558166504, "rewards/margins": 3.8817567825317383, "rewards/rejected": -6.447703838348389, "step": 2595 }, { "epoch": 0.54, "learning_rate": 9.176470588235294e-06, "logits/chosen": -2.4154295921325684, "logits/rejected": -1.8625118732452393, "logps/chosen": -350.6398620605469, "logps/rejected": -308.16326904296875, "loss": 0.1595, "rewards/accuracies": 0.9375, "rewards/chosen": -2.232438087463379, "rewards/margins": 4.106030464172363, "rewards/rejected": -6.338468551635742, "step": 2596 }, { "epoch": 0.54, "learning_rate": 9.172268907563026e-06, "logits/chosen": -2.2323338985443115, "logits/rejected": -1.9212353229522705, "logps/chosen": -318.3753662109375, "logps/rejected": -365.13555908203125, "loss": 0.2857, "rewards/accuracies": 0.875, "rewards/chosen": -3.0739917755126953, "rewards/margins": 2.8681516647338867, "rewards/rejected": -5.942143440246582, "step": 2597 }, { "epoch": 0.54, "learning_rate": 9.168067226890757e-06, "logits/chosen": -1.9061769247055054, "logits/rejected": -1.8680685758590698, "logps/chosen": -361.2035827636719, "logps/rejected": -349.549072265625, "loss": 0.1701, "rewards/accuracies": 0.9375, "rewards/chosen": -2.1295711994171143, "rewards/margins": 4.801074981689453, "rewards/rejected": -6.930645942687988, "step": 2598 }, { "epoch": 0.54, "learning_rate": 9.163865546218488e-06, "logits/chosen": -2.3487040996551514, "logits/rejected": -1.9796628952026367, "logps/chosen": -289.9422912597656, "logps/rejected": -346.2483825683594, "loss": 0.6777, "rewards/accuracies": 0.75, "rewards/chosen": -3.595804214477539, "rewards/margins": 2.2968692779541016, "rewards/rejected": -5.892673492431641, "step": 2599 }, { "epoch": 0.54, "learning_rate": 9.15966386554622e-06, "logits/chosen": -2.0097591876983643, "logits/rejected": -1.485403299331665, "logps/chosen": -274.05865478515625, "logps/rejected": -259.80865478515625, "loss": 0.0951, "rewards/accuracies": 1.0, "rewards/chosen": -3.5306198596954346, "rewards/margins": 4.995075225830078, "rewards/rejected": -8.525693893432617, "step": 2600 }, { "epoch": 0.54, "learning_rate": 9.15546218487395e-06, "logits/chosen": -2.0895721912384033, "logits/rejected": -2.200493335723877, "logps/chosen": -331.234375, "logps/rejected": -339.5184326171875, "loss": 0.1062, "rewards/accuracies": 1.0, "rewards/chosen": -2.4182701110839844, "rewards/margins": 3.144052028656006, "rewards/rejected": -5.562321662902832, "step": 2601 }, { "epoch": 0.54, "learning_rate": 9.151260504201682e-06, "logits/chosen": -1.5985027551651, "logits/rejected": -1.690771222114563, "logps/chosen": -299.86248779296875, "logps/rejected": -331.09771728515625, "loss": 0.2417, "rewards/accuracies": 0.875, "rewards/chosen": -3.1488723754882812, "rewards/margins": 5.041675090789795, "rewards/rejected": -8.190546989440918, "step": 2602 }, { "epoch": 0.54, "learning_rate": 9.147058823529412e-06, "logits/chosen": -2.359854221343994, "logits/rejected": -2.2595980167388916, "logps/chosen": -401.08905029296875, "logps/rejected": -358.1273193359375, "loss": 0.3294, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7206554412841797, "rewards/margins": 3.2542667388916016, "rewards/rejected": -5.974922180175781, "step": 2603 }, { "epoch": 0.54, "learning_rate": 9.142857142857144e-06, "logits/chosen": -2.379088878631592, "logits/rejected": -1.968593955039978, "logps/chosen": -386.78558349609375, "logps/rejected": -320.34033203125, "loss": 0.0766, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5262081623077393, "rewards/margins": 6.539731979370117, "rewards/rejected": -9.065940856933594, "step": 2604 }, { "epoch": 0.54, "learning_rate": 9.138655462184874e-06, "logits/chosen": -1.9846725463867188, "logits/rejected": -2.355806350708008, "logps/chosen": -353.7377014160156, "logps/rejected": -346.3930969238281, "loss": 0.2074, "rewards/accuracies": 0.875, "rewards/chosen": -1.883202314376831, "rewards/margins": 5.493682384490967, "rewards/rejected": -7.376884460449219, "step": 2605 }, { "epoch": 0.55, "learning_rate": 9.134453781512606e-06, "logits/chosen": -2.172497510910034, "logits/rejected": -2.4107048511505127, "logps/chosen": -319.21929931640625, "logps/rejected": -395.21673583984375, "loss": 0.4582, "rewards/accuracies": 0.6875, "rewards/chosen": -2.57279109954834, "rewards/margins": 3.936209201812744, "rewards/rejected": -6.509000778198242, "step": 2606 }, { "epoch": 0.55, "learning_rate": 9.130252100840336e-06, "logits/chosen": -2.122621536254883, "logits/rejected": -2.1791207790374756, "logps/chosen": -293.9839782714844, "logps/rejected": -413.198974609375, "loss": 0.1166, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3903279304504395, "rewards/margins": 5.709153652191162, "rewards/rejected": -8.099481582641602, "step": 2607 }, { "epoch": 0.55, "learning_rate": 9.126050420168068e-06, "logits/chosen": -2.069037437438965, "logits/rejected": -2.413374423980713, "logps/chosen": -286.9517822265625, "logps/rejected": -393.9789123535156, "loss": 0.1748, "rewards/accuracies": 0.875, "rewards/chosen": -2.3851985931396484, "rewards/margins": 4.1733717918396, "rewards/rejected": -6.558570384979248, "step": 2608 }, { "epoch": 0.55, "learning_rate": 9.121848739495798e-06, "logits/chosen": -2.1006903648376465, "logits/rejected": -1.9250049591064453, "logps/chosen": -314.93084716796875, "logps/rejected": -347.17388916015625, "loss": 0.0984, "rewards/accuracies": 0.9375, "rewards/chosen": -2.137950897216797, "rewards/margins": 5.0205488204956055, "rewards/rejected": -7.158499717712402, "step": 2609 }, { "epoch": 0.55, "learning_rate": 9.11764705882353e-06, "logits/chosen": -1.9699406623840332, "logits/rejected": -1.966078758239746, "logps/chosen": -252.5470428466797, "logps/rejected": -331.76568603515625, "loss": 0.25, "rewards/accuracies": 0.875, "rewards/chosen": -3.402479648590088, "rewards/margins": 3.463578224182129, "rewards/rejected": -6.866057395935059, "step": 2610 }, { "epoch": 0.55, "learning_rate": 9.11344537815126e-06, "logits/chosen": -2.2181761264801025, "logits/rejected": -1.8519394397735596, "logps/chosen": -367.24224853515625, "logps/rejected": -299.8636779785156, "loss": 0.2962, "rewards/accuracies": 0.8125, "rewards/chosen": -2.5875656604766846, "rewards/margins": 4.67170524597168, "rewards/rejected": -7.259271621704102, "step": 2611 }, { "epoch": 0.55, "learning_rate": 9.109243697478992e-06, "logits/chosen": -2.304582118988037, "logits/rejected": -1.9881919622421265, "logps/chosen": -297.2028503417969, "logps/rejected": -350.5053405761719, "loss": 0.2676, "rewards/accuracies": 0.875, "rewards/chosen": -3.0449090003967285, "rewards/margins": 3.859830856323242, "rewards/rejected": -6.904739856719971, "step": 2612 }, { "epoch": 0.55, "learning_rate": 9.105042016806723e-06, "logits/chosen": -2.447077989578247, "logits/rejected": -1.5826685428619385, "logps/chosen": -364.481689453125, "logps/rejected": -272.84552001953125, "loss": 0.2932, "rewards/accuracies": 0.875, "rewards/chosen": -2.888453722000122, "rewards/margins": 4.477534294128418, "rewards/rejected": -7.365988254547119, "step": 2613 }, { "epoch": 0.55, "learning_rate": 9.100840336134455e-06, "logits/chosen": -2.1794469356536865, "logits/rejected": -1.8452938795089722, "logps/chosen": -383.5345153808594, "logps/rejected": -301.6110534667969, "loss": 0.2406, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8869540691375732, "rewards/margins": 3.0920464992523193, "rewards/rejected": -5.979000568389893, "step": 2614 }, { "epoch": 0.55, "learning_rate": 9.096638655462185e-06, "logits/chosen": -2.2501351833343506, "logits/rejected": -1.8008877038955688, "logps/chosen": -286.72064208984375, "logps/rejected": -423.5406799316406, "loss": 0.2278, "rewards/accuracies": 0.875, "rewards/chosen": -2.5307388305664062, "rewards/margins": 4.129040241241455, "rewards/rejected": -6.659779071807861, "step": 2615 }, { "epoch": 0.55, "learning_rate": 9.092436974789917e-06, "logits/chosen": -2.246356725692749, "logits/rejected": -2.2131903171539307, "logps/chosen": -428.2558898925781, "logps/rejected": -399.03460693359375, "loss": 0.1047, "rewards/accuracies": 1.0, "rewards/chosen": -1.9555671215057373, "rewards/margins": 6.324339866638184, "rewards/rejected": -8.2799072265625, "step": 2616 }, { "epoch": 0.55, "learning_rate": 9.088235294117647e-06, "logits/chosen": -2.4375953674316406, "logits/rejected": -2.1075685024261475, "logps/chosen": -414.30987548828125, "logps/rejected": -336.76202392578125, "loss": 0.4661, "rewards/accuracies": 0.6875, "rewards/chosen": -2.8523030281066895, "rewards/margins": 3.1354942321777344, "rewards/rejected": -5.987796783447266, "step": 2617 }, { "epoch": 0.55, "learning_rate": 9.084033613445379e-06, "logits/chosen": -2.1061532497406006, "logits/rejected": -1.478698492050171, "logps/chosen": -422.8421630859375, "logps/rejected": -333.0334167480469, "loss": 0.0569, "rewards/accuracies": 1.0, "rewards/chosen": -3.028029680252075, "rewards/margins": 6.226371765136719, "rewards/rejected": -9.254401206970215, "step": 2618 }, { "epoch": 0.55, "learning_rate": 9.07983193277311e-06, "logits/chosen": -2.2607040405273438, "logits/rejected": -2.214386463165283, "logps/chosen": -234.00039672851562, "logps/rejected": -282.59423828125, "loss": 0.1547, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2320210933685303, "rewards/margins": 4.761127948760986, "rewards/rejected": -7.993149280548096, "step": 2619 }, { "epoch": 0.55, "learning_rate": 9.075630252100841e-06, "logits/chosen": -2.273449420928955, "logits/rejected": -2.0686428546905518, "logps/chosen": -211.99765014648438, "logps/rejected": -293.23638916015625, "loss": 0.49, "rewards/accuracies": 0.75, "rewards/chosen": -3.5950934886932373, "rewards/margins": 4.220656871795654, "rewards/rejected": -7.8157501220703125, "step": 2620 }, { "epoch": 0.55, "learning_rate": 9.071428571428573e-06, "logits/chosen": -2.3413877487182617, "logits/rejected": -1.8345166444778442, "logps/chosen": -382.54913330078125, "logps/rejected": -316.766845703125, "loss": 0.2132, "rewards/accuracies": 0.875, "rewards/chosen": -2.405459403991699, "rewards/margins": 4.8582048416137695, "rewards/rejected": -7.263664245605469, "step": 2621 }, { "epoch": 0.55, "learning_rate": 9.067226890756303e-06, "logits/chosen": -2.36049747467041, "logits/rejected": -1.904428482055664, "logps/chosen": -378.9596862792969, "logps/rejected": -385.3060302734375, "loss": 0.2854, "rewards/accuracies": 0.8125, "rewards/chosen": -3.327695846557617, "rewards/margins": 3.909191608428955, "rewards/rejected": -7.2368879318237305, "step": 2622 }, { "epoch": 0.55, "learning_rate": 9.063025210084035e-06, "logits/chosen": -2.44170880317688, "logits/rejected": -1.9767940044403076, "logps/chosen": -362.39764404296875, "logps/rejected": -421.70013427734375, "loss": 0.2259, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2055587768554688, "rewards/margins": 5.011026859283447, "rewards/rejected": -8.216585159301758, "step": 2623 }, { "epoch": 0.55, "learning_rate": 9.058823529411765e-06, "logits/chosen": -2.313199281692505, "logits/rejected": -1.9724633693695068, "logps/chosen": -334.0782470703125, "logps/rejected": -316.8968200683594, "loss": 0.1953, "rewards/accuracies": 0.9375, "rewards/chosen": -2.973860502243042, "rewards/margins": 4.250874042510986, "rewards/rejected": -7.224734306335449, "step": 2624 }, { "epoch": 0.55, "learning_rate": 9.054621848739497e-06, "logits/chosen": -2.252244472503662, "logits/rejected": -2.0120794773101807, "logps/chosen": -352.8179016113281, "logps/rejected": -302.3409118652344, "loss": 0.2013, "rewards/accuracies": 0.9375, "rewards/chosen": -2.972867250442505, "rewards/margins": 4.9938530921936035, "rewards/rejected": -7.966720104217529, "step": 2625 }, { "epoch": 0.55, "learning_rate": 9.050420168067227e-06, "logits/chosen": -1.92987060546875, "logits/rejected": -2.245865821838379, "logps/chosen": -223.7242889404297, "logps/rejected": -353.7231140136719, "loss": 0.236, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3670947551727295, "rewards/margins": 5.663341999053955, "rewards/rejected": -9.030437469482422, "step": 2626 }, { "epoch": 0.55, "learning_rate": 9.04621848739496e-06, "logits/chosen": -2.368917942047119, "logits/rejected": -2.198566436767578, "logps/chosen": -390.9451904296875, "logps/rejected": -369.2320556640625, "loss": 0.3857, "rewards/accuracies": 0.875, "rewards/chosen": -3.053164482116699, "rewards/margins": 4.887670516967773, "rewards/rejected": -7.940835475921631, "step": 2627 }, { "epoch": 0.55, "learning_rate": 9.04201680672269e-06, "logits/chosen": -2.2183849811553955, "logits/rejected": -1.978151559829712, "logps/chosen": -481.5416259765625, "logps/rejected": -407.5372009277344, "loss": 0.3501, "rewards/accuracies": 0.875, "rewards/chosen": -2.5582516193389893, "rewards/margins": 5.30425500869751, "rewards/rejected": -7.86250638961792, "step": 2628 }, { "epoch": 0.55, "learning_rate": 9.037815126050421e-06, "logits/chosen": -2.2114574909210205, "logits/rejected": -2.5505189895629883, "logps/chosen": -360.9266357421875, "logps/rejected": -461.0148010253906, "loss": 0.3772, "rewards/accuracies": 0.875, "rewards/chosen": -3.879077911376953, "rewards/margins": 5.504572868347168, "rewards/rejected": -9.383650779724121, "step": 2629 }, { "epoch": 0.55, "learning_rate": 9.033613445378152e-06, "logits/chosen": -2.4712681770324707, "logits/rejected": -2.01299786567688, "logps/chosen": -306.111083984375, "logps/rejected": -305.47943115234375, "loss": 0.1083, "rewards/accuracies": 0.9375, "rewards/chosen": -4.175967216491699, "rewards/margins": 4.66514253616333, "rewards/rejected": -8.841110229492188, "step": 2630 }, { "epoch": 0.55, "learning_rate": 9.029411764705884e-06, "logits/chosen": -2.3037240505218506, "logits/rejected": -2.084071636199951, "logps/chosen": -417.254638671875, "logps/rejected": -482.8475341796875, "loss": 0.2566, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2271175384521484, "rewards/margins": 5.514753818511963, "rewards/rejected": -8.741870880126953, "step": 2631 }, { "epoch": 0.55, "learning_rate": 9.025210084033614e-06, "logits/chosen": -2.3310635089874268, "logits/rejected": -2.063809394836426, "logps/chosen": -335.72723388671875, "logps/rejected": -323.3340148925781, "loss": 0.1859, "rewards/accuracies": 0.875, "rewards/chosen": -3.1652774810791016, "rewards/margins": 4.732568264007568, "rewards/rejected": -7.897845268249512, "step": 2632 }, { "epoch": 0.55, "learning_rate": 9.021008403361346e-06, "logits/chosen": -1.6444947719573975, "logits/rejected": -1.939252495765686, "logps/chosen": -251.89300537109375, "logps/rejected": -350.66424560546875, "loss": 0.333, "rewards/accuracies": 0.75, "rewards/chosen": -2.7179863452911377, "rewards/margins": 5.00375509262085, "rewards/rejected": -7.721741676330566, "step": 2633 }, { "epoch": 0.55, "learning_rate": 9.016806722689076e-06, "logits/chosen": -2.365762710571289, "logits/rejected": -1.971664547920227, "logps/chosen": -399.0832824707031, "logps/rejected": -360.23370361328125, "loss": 0.2766, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8267745971679688, "rewards/margins": 3.66393780708313, "rewards/rejected": -6.4907121658325195, "step": 2634 }, { "epoch": 0.55, "learning_rate": 9.012605042016808e-06, "logits/chosen": -2.3139126300811768, "logits/rejected": -1.9102966785430908, "logps/chosen": -369.58184814453125, "logps/rejected": -499.3059387207031, "loss": 0.2065, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1525888442993164, "rewards/margins": 4.079642295837402, "rewards/rejected": -7.232231140136719, "step": 2635 }, { "epoch": 0.55, "learning_rate": 9.008403361344538e-06, "logits/chosen": -2.1470606327056885, "logits/rejected": -2.103541374206543, "logps/chosen": -521.7222900390625, "logps/rejected": -414.69244384765625, "loss": 0.0871, "rewards/accuracies": 0.9375, "rewards/chosen": -3.5568864345550537, "rewards/margins": 5.589039325714111, "rewards/rejected": -9.145925521850586, "step": 2636 }, { "epoch": 0.55, "learning_rate": 9.00420168067227e-06, "logits/chosen": -2.1644845008850098, "logits/rejected": -1.948533535003662, "logps/chosen": -335.3471984863281, "logps/rejected": -331.1089172363281, "loss": 0.1564, "rewards/accuracies": 1.0, "rewards/chosen": -2.3042542934417725, "rewards/margins": 3.4772984981536865, "rewards/rejected": -5.781552314758301, "step": 2637 }, { "epoch": 0.55, "learning_rate": 9e-06, "logits/chosen": -2.318042278289795, "logits/rejected": -1.7043120861053467, "logps/chosen": -385.737060546875, "logps/rejected": -284.3018798828125, "loss": 0.6149, "rewards/accuracies": 0.8125, "rewards/chosen": -2.235351324081421, "rewards/margins": 4.077353477478027, "rewards/rejected": -6.312704563140869, "step": 2638 }, { "epoch": 0.55, "learning_rate": 8.995798319327732e-06, "logits/chosen": -2.337407350540161, "logits/rejected": -1.94884192943573, "logps/chosen": -359.7461853027344, "logps/rejected": -378.401611328125, "loss": 0.5126, "rewards/accuracies": 0.8125, "rewards/chosen": -3.537168502807617, "rewards/margins": 4.906581878662109, "rewards/rejected": -8.443750381469727, "step": 2639 }, { "epoch": 0.55, "learning_rate": 8.991596638655462e-06, "logits/chosen": -2.1896655559539795, "logits/rejected": -2.149688720703125, "logps/chosen": -335.66717529296875, "logps/rejected": -364.55059814453125, "loss": 0.1801, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3991432189941406, "rewards/margins": 5.550799369812012, "rewards/rejected": -7.949942111968994, "step": 2640 }, { "epoch": 0.55, "learning_rate": 8.987394957983194e-06, "logits/chosen": -1.7884109020233154, "logits/rejected": -1.832045316696167, "logps/chosen": -300.0168151855469, "logps/rejected": -399.049560546875, "loss": 0.0973, "rewards/accuracies": 1.0, "rewards/chosen": -2.3079686164855957, "rewards/margins": 5.28182315826416, "rewards/rejected": -7.589791297912598, "step": 2641 }, { "epoch": 0.55, "learning_rate": 8.983193277310926e-06, "logits/chosen": -2.1261706352233887, "logits/rejected": -1.8228116035461426, "logps/chosen": -335.3315124511719, "logps/rejected": -375.4422912597656, "loss": 0.2771, "rewards/accuracies": 0.875, "rewards/chosen": -3.366669178009033, "rewards/margins": 4.015559673309326, "rewards/rejected": -7.382228851318359, "step": 2642 }, { "epoch": 0.55, "learning_rate": 8.978991596638656e-06, "logits/chosen": -2.3579349517822266, "logits/rejected": -2.2383649349212646, "logps/chosen": -353.3088684082031, "logps/rejected": -275.9657897949219, "loss": 0.6033, "rewards/accuracies": 0.75, "rewards/chosen": -4.352352142333984, "rewards/margins": 3.948674440383911, "rewards/rejected": -8.301026344299316, "step": 2643 }, { "epoch": 0.55, "learning_rate": 8.974789915966388e-06, "logits/chosen": -1.7488627433776855, "logits/rejected": -2.033600091934204, "logps/chosen": -304.53564453125, "logps/rejected": -471.5335998535156, "loss": 0.1309, "rewards/accuracies": 1.0, "rewards/chosen": -3.4654555320739746, "rewards/margins": 5.2220869064331055, "rewards/rejected": -8.687541961669922, "step": 2644 }, { "epoch": 0.55, "learning_rate": 8.970588235294119e-06, "logits/chosen": -1.986555576324463, "logits/rejected": -1.9408445358276367, "logps/chosen": -231.56939697265625, "logps/rejected": -304.6173095703125, "loss": 0.2685, "rewards/accuracies": 0.8125, "rewards/chosen": -3.281092643737793, "rewards/margins": 4.993471622467041, "rewards/rejected": -8.274564743041992, "step": 2645 }, { "epoch": 0.55, "learning_rate": 8.96638655462185e-06, "logits/chosen": -2.3097894191741943, "logits/rejected": -1.9431726932525635, "logps/chosen": -408.22967529296875, "logps/rejected": -348.1778564453125, "loss": 0.2839, "rewards/accuracies": 0.875, "rewards/chosen": -2.806692600250244, "rewards/margins": 4.749147891998291, "rewards/rejected": -7.555840015411377, "step": 2646 }, { "epoch": 0.55, "learning_rate": 8.96218487394958e-06, "logits/chosen": -2.1173343658447266, "logits/rejected": -2.224689483642578, "logps/chosen": -310.6734619140625, "logps/rejected": -407.75958251953125, "loss": 0.0355, "rewards/accuracies": 1.0, "rewards/chosen": -3.568974494934082, "rewards/margins": 7.258604049682617, "rewards/rejected": -10.827579498291016, "step": 2647 }, { "epoch": 0.55, "learning_rate": 8.957983193277313e-06, "logits/chosen": -2.2200005054473877, "logits/rejected": -2.1109769344329834, "logps/chosen": -321.57135009765625, "logps/rejected": -402.0304260253906, "loss": 0.0458, "rewards/accuracies": 1.0, "rewards/chosen": -3.22861385345459, "rewards/margins": 6.363674640655518, "rewards/rejected": -9.592288970947266, "step": 2648 }, { "epoch": 0.55, "learning_rate": 8.953781512605043e-06, "logits/chosen": -2.362168788909912, "logits/rejected": -1.8759689331054688, "logps/chosen": -409.53448486328125, "logps/rejected": -445.63525390625, "loss": 0.2921, "rewards/accuracies": 0.875, "rewards/chosen": -3.8342442512512207, "rewards/margins": 4.363339424133301, "rewards/rejected": -8.197583198547363, "step": 2649 }, { "epoch": 0.55, "learning_rate": 8.949579831932775e-06, "logits/chosen": -1.9577956199645996, "logits/rejected": -2.1056935787200928, "logps/chosen": -158.4167938232422, "logps/rejected": -298.6907958984375, "loss": 0.29, "rewards/accuracies": 0.8125, "rewards/chosen": -3.2304322719573975, "rewards/margins": 5.726587295532227, "rewards/rejected": -8.957019805908203, "step": 2650 }, { "epoch": 0.55, "learning_rate": 8.945378151260505e-06, "logits/chosen": -2.257197141647339, "logits/rejected": -1.9540551900863647, "logps/chosen": -348.2672424316406, "logps/rejected": -284.5853271484375, "loss": 0.5147, "rewards/accuracies": 0.75, "rewards/chosen": -4.159351825714111, "rewards/margins": 2.5745716094970703, "rewards/rejected": -6.733923435211182, "step": 2651 }, { "epoch": 0.55, "learning_rate": 8.941176470588237e-06, "logits/chosen": -2.260843276977539, "logits/rejected": -1.7365663051605225, "logps/chosen": -298.7296447753906, "logps/rejected": -313.97467041015625, "loss": 0.1762, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4675726890563965, "rewards/margins": 6.697518825531006, "rewards/rejected": -10.165090560913086, "step": 2652 }, { "epoch": 0.56, "learning_rate": 8.936974789915967e-06, "logits/chosen": -2.260566473007202, "logits/rejected": -1.9898874759674072, "logps/chosen": -311.6046142578125, "logps/rejected": -348.43505859375, "loss": 0.3002, "rewards/accuracies": 0.875, "rewards/chosen": -3.771434783935547, "rewards/margins": 5.387360572814941, "rewards/rejected": -9.158795356750488, "step": 2653 }, { "epoch": 0.56, "learning_rate": 8.932773109243699e-06, "logits/chosen": -2.0622031688690186, "logits/rejected": -2.251230239868164, "logps/chosen": -517.6419677734375, "logps/rejected": -503.8721923828125, "loss": 0.2465, "rewards/accuracies": 0.8125, "rewards/chosen": -2.4424264430999756, "rewards/margins": 4.305473804473877, "rewards/rejected": -6.747900485992432, "step": 2654 }, { "epoch": 0.56, "learning_rate": 8.92857142857143e-06, "logits/chosen": -2.1060009002685547, "logits/rejected": -1.7831798791885376, "logps/chosen": -273.451904296875, "logps/rejected": -281.1597900390625, "loss": 0.1156, "rewards/accuracies": 1.0, "rewards/chosen": -2.651923894882202, "rewards/margins": 3.244041919708252, "rewards/rejected": -5.895966053009033, "step": 2655 }, { "epoch": 0.56, "learning_rate": 8.924369747899161e-06, "logits/chosen": -1.8955503702163696, "logits/rejected": -1.9887290000915527, "logps/chosen": -321.331298828125, "logps/rejected": -392.4033508300781, "loss": 0.1404, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3340537548065186, "rewards/margins": 3.8122332096099854, "rewards/rejected": -6.146286964416504, "step": 2656 }, { "epoch": 0.56, "learning_rate": 8.920168067226891e-06, "logits/chosen": -2.2199277877807617, "logits/rejected": -2.080019474029541, "logps/chosen": -297.56146240234375, "logps/rejected": -305.3303527832031, "loss": 0.5039, "rewards/accuracies": 0.8125, "rewards/chosen": -3.6488399505615234, "rewards/margins": 4.500877380371094, "rewards/rejected": -8.149718284606934, "step": 2657 }, { "epoch": 0.56, "learning_rate": 8.915966386554623e-06, "logits/chosen": -2.250577211380005, "logits/rejected": -1.9426429271697998, "logps/chosen": -370.91778564453125, "logps/rejected": -377.5274658203125, "loss": 0.1163, "rewards/accuracies": 0.875, "rewards/chosen": -3.5085253715515137, "rewards/margins": 5.800785064697266, "rewards/rejected": -9.309310913085938, "step": 2658 }, { "epoch": 0.56, "learning_rate": 8.911764705882354e-06, "logits/chosen": -2.1390366554260254, "logits/rejected": -2.1604878902435303, "logps/chosen": -326.6769714355469, "logps/rejected": -413.05755615234375, "loss": 0.3347, "rewards/accuracies": 0.875, "rewards/chosen": -3.2052788734436035, "rewards/margins": 4.893599510192871, "rewards/rejected": -8.098877906799316, "step": 2659 }, { "epoch": 0.56, "learning_rate": 8.907563025210085e-06, "logits/chosen": -2.420534610748291, "logits/rejected": -2.1544504165649414, "logps/chosen": -322.2715759277344, "logps/rejected": -289.072021484375, "loss": 0.4826, "rewards/accuracies": 0.8125, "rewards/chosen": -2.048967123031616, "rewards/margins": 4.112607002258301, "rewards/rejected": -6.161574363708496, "step": 2660 }, { "epoch": 0.56, "learning_rate": 8.903361344537816e-06, "logits/chosen": -1.980233907699585, "logits/rejected": -2.0479214191436768, "logps/chosen": -385.18798828125, "logps/rejected": -383.0858154296875, "loss": 0.6965, "rewards/accuracies": 0.6875, "rewards/chosen": -3.68750262260437, "rewards/margins": 1.5842576026916504, "rewards/rejected": -5.271759986877441, "step": 2661 }, { "epoch": 0.56, "learning_rate": 8.899159663865546e-06, "logits/chosen": -2.0453672409057617, "logits/rejected": -1.9170441627502441, "logps/chosen": -273.72271728515625, "logps/rejected": -364.24658203125, "loss": 0.1084, "rewards/accuracies": 0.9375, "rewards/chosen": -3.7582130432128906, "rewards/margins": 5.073298454284668, "rewards/rejected": -8.831510543823242, "step": 2662 }, { "epoch": 0.56, "learning_rate": 8.894957983193278e-06, "logits/chosen": -2.3412487506866455, "logits/rejected": -1.7636182308197021, "logps/chosen": -358.69549560546875, "logps/rejected": -302.4886169433594, "loss": 0.6744, "rewards/accuracies": 0.8125, "rewards/chosen": -3.442138195037842, "rewards/margins": 2.9562692642211914, "rewards/rejected": -6.398407936096191, "step": 2663 }, { "epoch": 0.56, "learning_rate": 8.890756302521008e-06, "logits/chosen": -1.9801467657089233, "logits/rejected": -2.060098171234131, "logps/chosen": -329.5774230957031, "logps/rejected": -370.871826171875, "loss": 0.6701, "rewards/accuracies": 0.75, "rewards/chosen": -3.5938949584960938, "rewards/margins": 3.338050365447998, "rewards/rejected": -6.93194580078125, "step": 2664 }, { "epoch": 0.56, "learning_rate": 8.88655462184874e-06, "logits/chosen": -2.0661187171936035, "logits/rejected": -2.1971991062164307, "logps/chosen": -285.5542297363281, "logps/rejected": -372.9203186035156, "loss": 0.2297, "rewards/accuracies": 0.875, "rewards/chosen": -3.010709762573242, "rewards/margins": 4.790306568145752, "rewards/rejected": -7.801015853881836, "step": 2665 }, { "epoch": 0.56, "learning_rate": 8.88235294117647e-06, "logits/chosen": -2.4135351181030273, "logits/rejected": -2.27453351020813, "logps/chosen": -408.83770751953125, "logps/rejected": -433.5548095703125, "loss": 0.1567, "rewards/accuracies": 1.0, "rewards/chosen": -2.2829365730285645, "rewards/margins": 4.203428268432617, "rewards/rejected": -6.486364841461182, "step": 2666 }, { "epoch": 0.56, "learning_rate": 8.878151260504202e-06, "logits/chosen": -2.2961039543151855, "logits/rejected": -2.1111371517181396, "logps/chosen": -308.9424133300781, "logps/rejected": -275.480712890625, "loss": 0.0982, "rewards/accuracies": 0.9375, "rewards/chosen": -2.5258851051330566, "rewards/margins": 4.584489822387695, "rewards/rejected": -7.11037540435791, "step": 2667 }, { "epoch": 0.56, "learning_rate": 8.873949579831932e-06, "logits/chosen": -2.15207576751709, "logits/rejected": -2.0082287788391113, "logps/chosen": -256.6492004394531, "logps/rejected": -236.60647583007812, "loss": 0.2286, "rewards/accuracies": 0.875, "rewards/chosen": -2.5860178470611572, "rewards/margins": 4.407907009124756, "rewards/rejected": -6.993925094604492, "step": 2668 }, { "epoch": 0.56, "learning_rate": 8.869747899159664e-06, "logits/chosen": -1.8155827522277832, "logits/rejected": -2.276298999786377, "logps/chosen": -274.17694091796875, "logps/rejected": -461.59674072265625, "loss": 0.3218, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1462178230285645, "rewards/margins": 6.446110248565674, "rewards/rejected": -9.592327117919922, "step": 2669 }, { "epoch": 0.56, "learning_rate": 8.865546218487396e-06, "logits/chosen": -2.102386474609375, "logits/rejected": -1.9784517288208008, "logps/chosen": -319.59912109375, "logps/rejected": -332.0497741699219, "loss": 0.2989, "rewards/accuracies": 0.875, "rewards/chosen": -3.7539267539978027, "rewards/margins": 4.721604347229004, "rewards/rejected": -8.475530624389648, "step": 2670 }, { "epoch": 0.56, "learning_rate": 8.861344537815126e-06, "logits/chosen": -2.0956966876983643, "logits/rejected": -2.079134464263916, "logps/chosen": -254.76153564453125, "logps/rejected": -328.09149169921875, "loss": 0.179, "rewards/accuracies": 0.9375, "rewards/chosen": -2.662783622741699, "rewards/margins": 3.861410617828369, "rewards/rejected": -6.524194240570068, "step": 2671 }, { "epoch": 0.56, "learning_rate": 8.857142857142858e-06, "logits/chosen": -2.310729503631592, "logits/rejected": -1.8544069528579712, "logps/chosen": -323.09881591796875, "logps/rejected": -307.9286804199219, "loss": 0.1296, "rewards/accuracies": 1.0, "rewards/chosen": -2.8603925704956055, "rewards/margins": 5.2406206130981445, "rewards/rejected": -8.10101318359375, "step": 2672 }, { "epoch": 0.56, "learning_rate": 8.852941176470588e-06, "logits/chosen": -2.1763594150543213, "logits/rejected": -1.8199400901794434, "logps/chosen": -289.87786865234375, "logps/rejected": -282.49969482421875, "loss": 0.344, "rewards/accuracies": 0.75, "rewards/chosen": -3.309572219848633, "rewards/margins": 2.929065465927124, "rewards/rejected": -6.238637447357178, "step": 2673 }, { "epoch": 0.56, "learning_rate": 8.84873949579832e-06, "logits/chosen": -1.9148685932159424, "logits/rejected": -1.9912877082824707, "logps/chosen": -233.59605407714844, "logps/rejected": -317.5933532714844, "loss": 0.1539, "rewards/accuracies": 0.9375, "rewards/chosen": -3.150411605834961, "rewards/margins": 4.769400596618652, "rewards/rejected": -7.919811725616455, "step": 2674 }, { "epoch": 0.56, "learning_rate": 8.84453781512605e-06, "logits/chosen": -1.9021685123443604, "logits/rejected": -1.7856111526489258, "logps/chosen": -354.42327880859375, "logps/rejected": -425.9520263671875, "loss": 0.3194, "rewards/accuracies": 0.8125, "rewards/chosen": -3.467655658721924, "rewards/margins": 5.39851188659668, "rewards/rejected": -8.866167068481445, "step": 2675 }, { "epoch": 0.56, "learning_rate": 8.840336134453783e-06, "logits/chosen": -2.054248809814453, "logits/rejected": -2.304570436477661, "logps/chosen": -395.2113342285156, "logps/rejected": -464.2568054199219, "loss": 0.3341, "rewards/accuracies": 0.9375, "rewards/chosen": -3.316387176513672, "rewards/margins": 3.7610421180725098, "rewards/rejected": -7.07742977142334, "step": 2676 }, { "epoch": 0.56, "learning_rate": 8.836134453781513e-06, "logits/chosen": -2.1528868675231934, "logits/rejected": -1.893542766571045, "logps/chosen": -310.95538330078125, "logps/rejected": -369.63934326171875, "loss": 0.4258, "rewards/accuracies": 0.8125, "rewards/chosen": -3.431771755218506, "rewards/margins": 5.848884582519531, "rewards/rejected": -9.280654907226562, "step": 2677 }, { "epoch": 0.56, "learning_rate": 8.831932773109245e-06, "logits/chosen": -2.4424333572387695, "logits/rejected": -2.3958985805511475, "logps/chosen": -207.1778564453125, "logps/rejected": -278.8995361328125, "loss": 0.147, "rewards/accuracies": 0.9375, "rewards/chosen": -3.0163660049438477, "rewards/margins": 6.170021057128906, "rewards/rejected": -9.18638801574707, "step": 2678 }, { "epoch": 0.56, "learning_rate": 8.827731092436975e-06, "logits/chosen": -2.1252641677856445, "logits/rejected": -2.199077844619751, "logps/chosen": -450.2701721191406, "logps/rejected": -519.8621826171875, "loss": 0.3131, "rewards/accuracies": 0.8125, "rewards/chosen": -2.8092446327209473, "rewards/margins": 5.226586818695068, "rewards/rejected": -8.035831451416016, "step": 2679 }, { "epoch": 0.56, "learning_rate": 8.823529411764707e-06, "logits/chosen": -2.3932228088378906, "logits/rejected": -2.2165069580078125, "logps/chosen": -275.21795654296875, "logps/rejected": -358.21356201171875, "loss": 0.1564, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8486104011535645, "rewards/margins": 6.026585578918457, "rewards/rejected": -8.87519645690918, "step": 2680 }, { "epoch": 0.56, "learning_rate": 8.819327731092437e-06, "logits/chosen": -2.3068652153015137, "logits/rejected": -1.6249210834503174, "logps/chosen": -505.64593505859375, "logps/rejected": -427.8704833984375, "loss": 0.2642, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2640533447265625, "rewards/margins": 6.481842994689941, "rewards/rejected": -9.745896339416504, "step": 2681 }, { "epoch": 0.56, "learning_rate": 8.815126050420169e-06, "logits/chosen": -2.0739762783050537, "logits/rejected": -1.9446161985397339, "logps/chosen": -355.6173095703125, "logps/rejected": -353.77044677734375, "loss": 0.5856, "rewards/accuracies": 0.6875, "rewards/chosen": -5.1183013916015625, "rewards/margins": 2.5441982746124268, "rewards/rejected": -7.662499904632568, "step": 2682 }, { "epoch": 0.56, "learning_rate": 8.8109243697479e-06, "logits/chosen": -2.202012300491333, "logits/rejected": -2.1079869270324707, "logps/chosen": -231.66783142089844, "logps/rejected": -316.7498779296875, "loss": 0.1875, "rewards/accuracies": 0.9375, "rewards/chosen": -3.097196340560913, "rewards/margins": 4.253522872924805, "rewards/rejected": -7.350719451904297, "step": 2683 }, { "epoch": 0.56, "learning_rate": 8.806722689075631e-06, "logits/chosen": -1.9491394758224487, "logits/rejected": -1.900090217590332, "logps/chosen": -284.14215087890625, "logps/rejected": -302.74249267578125, "loss": 0.2771, "rewards/accuracies": 0.8125, "rewards/chosen": -4.058411121368408, "rewards/margins": 3.4801127910614014, "rewards/rejected": -7.5385236740112305, "step": 2684 }, { "epoch": 0.56, "learning_rate": 8.802521008403361e-06, "logits/chosen": -2.123595952987671, "logits/rejected": -1.8167352676391602, "logps/chosen": -213.9258575439453, "logps/rejected": -290.43975830078125, "loss": 0.1643, "rewards/accuracies": 0.9375, "rewards/chosen": -3.259025812149048, "rewards/margins": 5.2606940269470215, "rewards/rejected": -8.519720077514648, "step": 2685 }, { "epoch": 0.56, "learning_rate": 8.798319327731093e-06, "logits/chosen": -2.1901299953460693, "logits/rejected": -2.0241165161132812, "logps/chosen": -453.7032775878906, "logps/rejected": -394.6129150390625, "loss": 0.2249, "rewards/accuracies": 0.8125, "rewards/chosen": -3.29465389251709, "rewards/margins": 4.586889266967773, "rewards/rejected": -7.8815436363220215, "step": 2686 }, { "epoch": 0.56, "learning_rate": 8.794117647058823e-06, "logits/chosen": -2.125352144241333, "logits/rejected": -1.8213437795639038, "logps/chosen": -277.3586730957031, "logps/rejected": -249.25454711914062, "loss": 0.4034, "rewards/accuracies": 0.8125, "rewards/chosen": -4.321885585784912, "rewards/margins": 2.524087429046631, "rewards/rejected": -6.845973014831543, "step": 2687 }, { "epoch": 0.56, "learning_rate": 8.789915966386555e-06, "logits/chosen": -2.2461905479431152, "logits/rejected": -1.791140079498291, "logps/chosen": -343.4769287109375, "logps/rejected": -279.8666076660156, "loss": 0.1656, "rewards/accuracies": 0.9375, "rewards/chosen": -2.513667583465576, "rewards/margins": 4.670999050140381, "rewards/rejected": -7.184666633605957, "step": 2688 }, { "epoch": 0.56, "learning_rate": 8.785714285714286e-06, "logits/chosen": -2.114018440246582, "logits/rejected": -2.2642788887023926, "logps/chosen": -352.65643310546875, "logps/rejected": -360.87921142578125, "loss": 0.2898, "rewards/accuracies": 0.8125, "rewards/chosen": -3.881795883178711, "rewards/margins": 4.008090019226074, "rewards/rejected": -7.889885902404785, "step": 2689 }, { "epoch": 0.56, "learning_rate": 8.781512605042017e-06, "logits/chosen": -2.1060616970062256, "logits/rejected": -2.007534980773926, "logps/chosen": -327.9744567871094, "logps/rejected": -353.4940185546875, "loss": 0.5127, "rewards/accuracies": 0.875, "rewards/chosen": -4.328692436218262, "rewards/margins": 4.129589557647705, "rewards/rejected": -8.458281517028809, "step": 2690 }, { "epoch": 0.56, "learning_rate": 8.777310924369748e-06, "logits/chosen": -2.3290610313415527, "logits/rejected": -1.9435094594955444, "logps/chosen": -471.1000671386719, "logps/rejected": -440.1175231933594, "loss": 0.5653, "rewards/accuracies": 0.75, "rewards/chosen": -3.4207262992858887, "rewards/margins": 3.469817638397217, "rewards/rejected": -6.8905439376831055, "step": 2691 }, { "epoch": 0.56, "learning_rate": 8.77310924369748e-06, "logits/chosen": -2.036985397338867, "logits/rejected": -1.8986417055130005, "logps/chosen": -339.95147705078125, "logps/rejected": -376.7081604003906, "loss": 0.4592, "rewards/accuracies": 0.875, "rewards/chosen": -2.4301857948303223, "rewards/margins": 3.176327705383301, "rewards/rejected": -5.606513500213623, "step": 2692 }, { "epoch": 0.56, "learning_rate": 8.768907563025212e-06, "logits/chosen": -2.476844072341919, "logits/rejected": -2.2569897174835205, "logps/chosen": -510.7056884765625, "logps/rejected": -439.92913818359375, "loss": 0.3004, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7965469360351562, "rewards/margins": 3.9088680744171143, "rewards/rejected": -6.705414772033691, "step": 2693 }, { "epoch": 0.56, "learning_rate": 8.764705882352942e-06, "logits/chosen": -2.5093770027160645, "logits/rejected": -2.2896218299865723, "logps/chosen": -411.2294616699219, "logps/rejected": -480.22589111328125, "loss": 0.131, "rewards/accuracies": 0.9375, "rewards/chosen": -1.8385963439941406, "rewards/margins": 5.305680751800537, "rewards/rejected": -7.144277572631836, "step": 2694 }, { "epoch": 0.56, "learning_rate": 8.760504201680674e-06, "logits/chosen": -2.0847718715667725, "logits/rejected": -2.381751298904419, "logps/chosen": -343.35723876953125, "logps/rejected": -401.82916259765625, "loss": 0.339, "rewards/accuracies": 0.8125, "rewards/chosen": -3.9296274185180664, "rewards/margins": 3.793790340423584, "rewards/rejected": -7.723417282104492, "step": 2695 }, { "epoch": 0.56, "learning_rate": 8.756302521008404e-06, "logits/chosen": -2.2546093463897705, "logits/rejected": -2.187814474105835, "logps/chosen": -249.77774047851562, "logps/rejected": -447.87176513671875, "loss": 0.1137, "rewards/accuracies": 1.0, "rewards/chosen": -3.3721938133239746, "rewards/margins": 6.034940719604492, "rewards/rejected": -9.407135009765625, "step": 2696 }, { "epoch": 0.56, "learning_rate": 8.752100840336136e-06, "logits/chosen": -2.1390864849090576, "logits/rejected": -1.9791996479034424, "logps/chosen": -319.9798278808594, "logps/rejected": -274.62310791015625, "loss": 0.1254, "rewards/accuracies": 0.9375, "rewards/chosen": -3.61348032951355, "rewards/margins": 4.879994869232178, "rewards/rejected": -8.493474960327148, "step": 2697 }, { "epoch": 0.56, "learning_rate": 8.747899159663866e-06, "logits/chosen": -2.5001142024993896, "logits/rejected": -2.1573007106781006, "logps/chosen": -332.5780029296875, "logps/rejected": -344.8141784667969, "loss": 0.3649, "rewards/accuracies": 0.8125, "rewards/chosen": -1.9403786659240723, "rewards/margins": 3.254493236541748, "rewards/rejected": -5.19487190246582, "step": 2698 }, { "epoch": 0.56, "learning_rate": 8.743697478991598e-06, "logits/chosen": -2.1604132652282715, "logits/rejected": -2.089139461517334, "logps/chosen": -366.3182067871094, "logps/rejected": -417.32781982421875, "loss": 0.1699, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9634768962860107, "rewards/margins": 4.155878067016602, "rewards/rejected": -7.119354724884033, "step": 2699 }, { "epoch": 0.56, "learning_rate": 8.739495798319328e-06, "logits/chosen": -2.213563919067383, "logits/rejected": -1.9549405574798584, "logps/chosen": -258.0982666015625, "logps/rejected": -328.23876953125, "loss": 0.2053, "rewards/accuracies": 0.8125, "rewards/chosen": -2.3350718021392822, "rewards/margins": 4.298139572143555, "rewards/rejected": -6.633211135864258, "step": 2700 }, { "epoch": 0.57, "learning_rate": 8.73529411764706e-06, "logits/chosen": -2.1656322479248047, "logits/rejected": -1.6679805517196655, "logps/chosen": -399.82763671875, "logps/rejected": -326.840576171875, "loss": 0.1248, "rewards/accuracies": 0.9375, "rewards/chosen": -4.259933948516846, "rewards/margins": 4.265561580657959, "rewards/rejected": -8.525495529174805, "step": 2701 }, { "epoch": 0.57, "learning_rate": 8.73109243697479e-06, "logits/chosen": -2.6672751903533936, "logits/rejected": -2.022127866744995, "logps/chosen": -510.03265380859375, "logps/rejected": -349.44293212890625, "loss": 0.3671, "rewards/accuracies": 0.875, "rewards/chosen": -2.8153295516967773, "rewards/margins": 5.115483283996582, "rewards/rejected": -7.930813312530518, "step": 2702 }, { "epoch": 0.57, "learning_rate": 8.726890756302522e-06, "logits/chosen": -1.8718903064727783, "logits/rejected": -2.108630657196045, "logps/chosen": -231.6609344482422, "logps/rejected": -288.3909606933594, "loss": 0.1025, "rewards/accuracies": 1.0, "rewards/chosen": -3.45017409324646, "rewards/margins": 5.107176780700684, "rewards/rejected": -8.557351112365723, "step": 2703 }, { "epoch": 0.57, "learning_rate": 8.722689075630252e-06, "logits/chosen": -2.074998617172241, "logits/rejected": -1.9197508096694946, "logps/chosen": -310.8554992675781, "logps/rejected": -363.63629150390625, "loss": 0.7156, "rewards/accuracies": 0.6875, "rewards/chosen": -4.458477973937988, "rewards/margins": 3.201475143432617, "rewards/rejected": -7.6599531173706055, "step": 2704 }, { "epoch": 0.57, "learning_rate": 8.718487394957984e-06, "logits/chosen": -2.3683180809020996, "logits/rejected": -1.646662712097168, "logps/chosen": -263.2167053222656, "logps/rejected": -266.9175720214844, "loss": 0.2083, "rewards/accuracies": 0.875, "rewards/chosen": -3.8014109134674072, "rewards/margins": 3.9933462142944336, "rewards/rejected": -7.794756889343262, "step": 2705 }, { "epoch": 0.57, "learning_rate": 8.714285714285715e-06, "logits/chosen": -2.214357852935791, "logits/rejected": -2.115215539932251, "logps/chosen": -327.6528015136719, "logps/rejected": -311.1213684082031, "loss": 0.2508, "rewards/accuracies": 0.875, "rewards/chosen": -2.3208508491516113, "rewards/margins": 4.3119659423828125, "rewards/rejected": -6.632816314697266, "step": 2706 }, { "epoch": 0.57, "learning_rate": 8.710084033613447e-06, "logits/chosen": -2.1791768074035645, "logits/rejected": -2.12751841545105, "logps/chosen": -301.55316162109375, "logps/rejected": -236.4023895263672, "loss": 0.3097, "rewards/accuracies": 0.875, "rewards/chosen": -2.9142112731933594, "rewards/margins": 4.173439979553223, "rewards/rejected": -7.08765172958374, "step": 2707 }, { "epoch": 0.57, "learning_rate": 8.705882352941177e-06, "logits/chosen": -2.231198310852051, "logits/rejected": -1.9991087913513184, "logps/chosen": -508.19085693359375, "logps/rejected": -461.7760009765625, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/chosen": -2.078423500061035, "rewards/margins": 6.187633991241455, "rewards/rejected": -8.266057014465332, "step": 2708 }, { "epoch": 0.57, "learning_rate": 8.701680672268909e-06, "logits/chosen": -2.195098400115967, "logits/rejected": -2.091689348220825, "logps/chosen": -366.6156921386719, "logps/rejected": -364.11907958984375, "loss": 0.2665, "rewards/accuracies": 0.875, "rewards/chosen": -3.5850656032562256, "rewards/margins": 3.8540866374969482, "rewards/rejected": -7.439152717590332, "step": 2709 }, { "epoch": 0.57, "learning_rate": 8.697478991596639e-06, "logits/chosen": -2.0250988006591797, "logits/rejected": -1.936099648475647, "logps/chosen": -395.562744140625, "logps/rejected": -501.33209228515625, "loss": 0.1058, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6543171405792236, "rewards/margins": 6.141094207763672, "rewards/rejected": -8.795411109924316, "step": 2710 }, { "epoch": 0.57, "learning_rate": 8.69327731092437e-06, "logits/chosen": -2.071742534637451, "logits/rejected": -1.9866383075714111, "logps/chosen": -284.0357360839844, "logps/rejected": -364.4813232421875, "loss": 0.1637, "rewards/accuracies": 0.9375, "rewards/chosen": -4.164823532104492, "rewards/margins": 5.1248273849487305, "rewards/rejected": -9.289650917053223, "step": 2711 }, { "epoch": 0.57, "learning_rate": 8.689075630252101e-06, "logits/chosen": -2.241704225540161, "logits/rejected": -1.9620360136032104, "logps/chosen": -336.57037353515625, "logps/rejected": -279.80908203125, "loss": 0.2048, "rewards/accuracies": 0.9375, "rewards/chosen": -2.747136116027832, "rewards/margins": 4.1562395095825195, "rewards/rejected": -6.903375625610352, "step": 2712 }, { "epoch": 0.57, "learning_rate": 8.684873949579833e-06, "logits/chosen": -2.1555747985839844, "logits/rejected": -1.872838020324707, "logps/chosen": -371.10052490234375, "logps/rejected": -383.5567932128906, "loss": 0.2636, "rewards/accuracies": 0.875, "rewards/chosen": -2.825657367706299, "rewards/margins": 4.172976493835449, "rewards/rejected": -6.998633861541748, "step": 2713 }, { "epoch": 0.57, "learning_rate": 8.680672268907563e-06, "logits/chosen": -2.2234668731689453, "logits/rejected": -2.1341452598571777, "logps/chosen": -491.4859619140625, "logps/rejected": -395.1780700683594, "loss": 0.2124, "rewards/accuracies": 0.875, "rewards/chosen": -2.460757255554199, "rewards/margins": 5.390682697296143, "rewards/rejected": -7.851439952850342, "step": 2714 }, { "epoch": 0.57, "learning_rate": 8.676470588235295e-06, "logits/chosen": -1.9256107807159424, "logits/rejected": -2.047905683517456, "logps/chosen": -236.48114013671875, "logps/rejected": -267.57220458984375, "loss": 0.3138, "rewards/accuracies": 0.75, "rewards/chosen": -3.3201799392700195, "rewards/margins": 3.5454423427581787, "rewards/rejected": -6.865622520446777, "step": 2715 }, { "epoch": 0.57, "learning_rate": 8.672268907563027e-06, "logits/chosen": -2.3635599613189697, "logits/rejected": -2.1418421268463135, "logps/chosen": -384.3231201171875, "logps/rejected": -424.45611572265625, "loss": 0.339, "rewards/accuracies": 0.75, "rewards/chosen": -3.120706081390381, "rewards/margins": 4.210129261016846, "rewards/rejected": -7.330835342407227, "step": 2716 }, { "epoch": 0.57, "learning_rate": 8.668067226890757e-06, "logits/chosen": -2.3286256790161133, "logits/rejected": -2.213660478591919, "logps/chosen": -289.77691650390625, "logps/rejected": -288.75628662109375, "loss": 0.4781, "rewards/accuracies": 0.75, "rewards/chosen": -4.372096061706543, "rewards/margins": 2.8783774375915527, "rewards/rejected": -7.250473499298096, "step": 2717 }, { "epoch": 0.57, "learning_rate": 8.663865546218489e-06, "logits/chosen": -2.212244749069214, "logits/rejected": -2.0899930000305176, "logps/chosen": -295.803955078125, "logps/rejected": -289.9457702636719, "loss": 0.7864, "rewards/accuracies": 0.8125, "rewards/chosen": -3.9074177742004395, "rewards/margins": 2.805502414703369, "rewards/rejected": -6.712920665740967, "step": 2718 }, { "epoch": 0.57, "learning_rate": 8.65966386554622e-06, "logits/chosen": -1.9873700141906738, "logits/rejected": -1.6415438652038574, "logps/chosen": -233.9061737060547, "logps/rejected": -268.8019714355469, "loss": 0.2341, "rewards/accuracies": 0.875, "rewards/chosen": -3.150973081588745, "rewards/margins": 4.107327461242676, "rewards/rejected": -7.25830078125, "step": 2719 }, { "epoch": 0.57, "learning_rate": 8.655462184873951e-06, "logits/chosen": -1.7120524644851685, "logits/rejected": -2.188736915588379, "logps/chosen": -310.07208251953125, "logps/rejected": -449.23699951171875, "loss": 0.9419, "rewards/accuracies": 0.6875, "rewards/chosen": -3.6013145446777344, "rewards/margins": 5.047693252563477, "rewards/rejected": -8.649007797241211, "step": 2720 }, { "epoch": 0.57, "learning_rate": 8.651260504201681e-06, "logits/chosen": -2.1267619132995605, "logits/rejected": -1.9256224632263184, "logps/chosen": -321.3891296386719, "logps/rejected": -300.41473388671875, "loss": 0.2765, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4399349689483643, "rewards/margins": 4.2923173904418945, "rewards/rejected": -7.73225212097168, "step": 2721 }, { "epoch": 0.57, "learning_rate": 8.647058823529413e-06, "logits/chosen": -2.4357833862304688, "logits/rejected": -2.2987172603607178, "logps/chosen": -339.218017578125, "logps/rejected": -404.8612060546875, "loss": 0.1113, "rewards/accuracies": 0.9375, "rewards/chosen": -3.725738048553467, "rewards/margins": 7.057034015655518, "rewards/rejected": -10.7827730178833, "step": 2722 }, { "epoch": 0.57, "learning_rate": 8.642857142857144e-06, "logits/chosen": -1.9419734477996826, "logits/rejected": -1.9946268796920776, "logps/chosen": -342.5497741699219, "logps/rejected": -334.2364501953125, "loss": 0.339, "rewards/accuracies": 0.8125, "rewards/chosen": -4.621908187866211, "rewards/margins": 3.022494316101074, "rewards/rejected": -7.644402503967285, "step": 2723 }, { "epoch": 0.57, "learning_rate": 8.638655462184876e-06, "logits/chosen": -2.0321571826934814, "logits/rejected": -1.9260424375534058, "logps/chosen": -256.74462890625, "logps/rejected": -317.1990051269531, "loss": 0.4921, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3484292030334473, "rewards/margins": 4.982367515563965, "rewards/rejected": -8.33079719543457, "step": 2724 }, { "epoch": 0.57, "learning_rate": 8.634453781512606e-06, "logits/chosen": -2.0247995853424072, "logits/rejected": -2.14101505279541, "logps/chosen": -271.5347900390625, "logps/rejected": -359.88275146484375, "loss": 0.4362, "rewards/accuracies": 0.8125, "rewards/chosen": -4.442434787750244, "rewards/margins": 3.3044795989990234, "rewards/rejected": -7.746914386749268, "step": 2725 }, { "epoch": 0.57, "learning_rate": 8.630252100840338e-06, "logits/chosen": -1.8769214153289795, "logits/rejected": -1.9412651062011719, "logps/chosen": -312.9871520996094, "logps/rejected": -355.88446044921875, "loss": 0.7236, "rewards/accuracies": 0.6875, "rewards/chosen": -3.403740644454956, "rewards/margins": 3.570530891418457, "rewards/rejected": -6.974271774291992, "step": 2726 }, { "epoch": 0.57, "learning_rate": 8.626050420168068e-06, "logits/chosen": -2.022946357727051, "logits/rejected": -1.8456951379776, "logps/chosen": -437.06903076171875, "logps/rejected": -440.7738037109375, "loss": 0.5504, "rewards/accuracies": 0.6875, "rewards/chosen": -3.5313549041748047, "rewards/margins": 3.5492324829101562, "rewards/rejected": -7.080587863922119, "step": 2727 }, { "epoch": 0.57, "learning_rate": 8.6218487394958e-06, "logits/chosen": -2.2058606147766113, "logits/rejected": -1.8517531156539917, "logps/chosen": -394.8498840332031, "logps/rejected": -420.2686767578125, "loss": 0.5733, "rewards/accuracies": 0.75, "rewards/chosen": -3.546185255050659, "rewards/margins": 4.552314758300781, "rewards/rejected": -8.09850025177002, "step": 2728 }, { "epoch": 0.57, "learning_rate": 8.61764705882353e-06, "logits/chosen": -2.1698315143585205, "logits/rejected": -2.3035449981689453, "logps/chosen": -243.48663330078125, "logps/rejected": -301.6219482421875, "loss": 0.1097, "rewards/accuracies": 0.9375, "rewards/chosen": -4.19541597366333, "rewards/margins": 4.900232315063477, "rewards/rejected": -9.095647811889648, "step": 2729 }, { "epoch": 0.57, "learning_rate": 8.613445378151262e-06, "logits/chosen": -2.1246962547302246, "logits/rejected": -1.770324468612671, "logps/chosen": -389.90789794921875, "logps/rejected": -335.2018127441406, "loss": 0.3047, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2084178924560547, "rewards/margins": 4.77372932434082, "rewards/rejected": -7.982147216796875, "step": 2730 }, { "epoch": 0.57, "learning_rate": 8.609243697478992e-06, "logits/chosen": -2.10770845413208, "logits/rejected": -2.1845364570617676, "logps/chosen": -279.52001953125, "logps/rejected": -329.3961486816406, "loss": 0.375, "rewards/accuracies": 0.875, "rewards/chosen": -3.1655659675598145, "rewards/margins": 2.0775585174560547, "rewards/rejected": -5.243124008178711, "step": 2731 }, { "epoch": 0.57, "learning_rate": 8.605042016806724e-06, "logits/chosen": -2.3080530166625977, "logits/rejected": -1.9859060049057007, "logps/chosen": -358.9238586425781, "logps/rejected": -353.3697204589844, "loss": 0.4408, "rewards/accuracies": 0.9375, "rewards/chosen": -3.16860294342041, "rewards/margins": 5.312753677368164, "rewards/rejected": -8.481356620788574, "step": 2732 }, { "epoch": 0.57, "learning_rate": 8.600840336134454e-06, "logits/chosen": -2.1886751651763916, "logits/rejected": -2.3670477867126465, "logps/chosen": -291.016357421875, "logps/rejected": -432.73663330078125, "loss": 0.2036, "rewards/accuracies": 0.875, "rewards/chosen": -3.796029567718506, "rewards/margins": 4.382058620452881, "rewards/rejected": -8.178088188171387, "step": 2733 }, { "epoch": 0.57, "learning_rate": 8.596638655462186e-06, "logits/chosen": -1.9103906154632568, "logits/rejected": -2.021730661392212, "logps/chosen": -295.30938720703125, "logps/rejected": -338.095703125, "loss": 0.4215, "rewards/accuracies": 0.875, "rewards/chosen": -3.5600361824035645, "rewards/margins": 3.3560800552368164, "rewards/rejected": -6.916116237640381, "step": 2734 }, { "epoch": 0.57, "learning_rate": 8.592436974789916e-06, "logits/chosen": -1.9595222473144531, "logits/rejected": -1.8773701190948486, "logps/chosen": -354.2728271484375, "logps/rejected": -421.49627685546875, "loss": 0.9587, "rewards/accuracies": 0.8125, "rewards/chosen": -3.4703714847564697, "rewards/margins": 3.893065929412842, "rewards/rejected": -7.363436698913574, "step": 2735 }, { "epoch": 0.57, "learning_rate": 8.588235294117647e-06, "logits/chosen": -1.9225609302520752, "logits/rejected": -1.8159414529800415, "logps/chosen": -255.03848266601562, "logps/rejected": -283.30792236328125, "loss": 0.3654, "rewards/accuracies": 0.75, "rewards/chosen": -3.1242165565490723, "rewards/margins": 3.513643264770508, "rewards/rejected": -6.63785982131958, "step": 2736 }, { "epoch": 0.57, "learning_rate": 8.584033613445379e-06, "logits/chosen": -2.1396279335021973, "logits/rejected": -1.8366109132766724, "logps/chosen": -353.5950622558594, "logps/rejected": -317.05364990234375, "loss": 0.3519, "rewards/accuracies": 0.8125, "rewards/chosen": -2.370506763458252, "rewards/margins": 2.6922452449798584, "rewards/rejected": -5.062751770019531, "step": 2737 }, { "epoch": 0.57, "learning_rate": 8.579831932773109e-06, "logits/chosen": -2.2322375774383545, "logits/rejected": -2.0567448139190674, "logps/chosen": -382.1094970703125, "logps/rejected": -453.89105224609375, "loss": 0.2687, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6861536502838135, "rewards/margins": 4.810391902923584, "rewards/rejected": -7.496545314788818, "step": 2738 }, { "epoch": 0.57, "learning_rate": 8.57563025210084e-06, "logits/chosen": -1.9626046419143677, "logits/rejected": -1.8405790328979492, "logps/chosen": -257.1316833496094, "logps/rejected": -334.7016296386719, "loss": 0.2199, "rewards/accuracies": 0.875, "rewards/chosen": -3.153595447540283, "rewards/margins": 5.155060768127441, "rewards/rejected": -8.308655738830566, "step": 2739 }, { "epoch": 0.57, "learning_rate": 8.571428571428571e-06, "logits/chosen": -2.47385835647583, "logits/rejected": -2.13322377204895, "logps/chosen": -443.5631103515625, "logps/rejected": -359.7532653808594, "loss": 0.1944, "rewards/accuracies": 0.9375, "rewards/chosen": -2.4087367057800293, "rewards/margins": 4.697868347167969, "rewards/rejected": -7.106605529785156, "step": 2740 }, { "epoch": 0.57, "learning_rate": 8.567226890756303e-06, "logits/chosen": -1.9346528053283691, "logits/rejected": -1.9264346361160278, "logps/chosen": -241.3891143798828, "logps/rejected": -295.87591552734375, "loss": 0.2242, "rewards/accuracies": 0.875, "rewards/chosen": -2.3510055541992188, "rewards/margins": 4.411670684814453, "rewards/rejected": -6.762675762176514, "step": 2741 }, { "epoch": 0.57, "learning_rate": 8.563025210084033e-06, "logits/chosen": -2.4686498641967773, "logits/rejected": -1.513934850692749, "logps/chosen": -389.2949523925781, "logps/rejected": -313.2209777832031, "loss": 0.1133, "rewards/accuracies": 0.9375, "rewards/chosen": -2.3460185527801514, "rewards/margins": 6.069815635681152, "rewards/rejected": -8.415834426879883, "step": 2742 }, { "epoch": 0.57, "learning_rate": 8.558823529411765e-06, "logits/chosen": -1.976921558380127, "logits/rejected": -2.0476112365722656, "logps/chosen": -295.4115295410156, "logps/rejected": -344.71636962890625, "loss": 0.1671, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9183807373046875, "rewards/margins": 4.755885601043701, "rewards/rejected": -7.6742658615112305, "step": 2743 }, { "epoch": 0.57, "learning_rate": 8.554621848739497e-06, "logits/chosen": -2.357006072998047, "logits/rejected": -2.1295089721679688, "logps/chosen": -336.525634765625, "logps/rejected": -323.97760009765625, "loss": 0.2236, "rewards/accuracies": 0.9375, "rewards/chosen": -3.4714977741241455, "rewards/margins": 2.4537720680236816, "rewards/rejected": -5.925270080566406, "step": 2744 }, { "epoch": 0.57, "learning_rate": 8.550420168067227e-06, "logits/chosen": -2.479200839996338, "logits/rejected": -2.314918279647827, "logps/chosen": -357.25653076171875, "logps/rejected": -326.67742919921875, "loss": 0.2253, "rewards/accuracies": 0.8125, "rewards/chosen": -2.705740451812744, "rewards/margins": 3.5112805366516113, "rewards/rejected": -6.2170209884643555, "step": 2745 }, { "epoch": 0.57, "learning_rate": 8.546218487394959e-06, "logits/chosen": -2.3939208984375, "logits/rejected": -1.787131905555725, "logps/chosen": -472.98162841796875, "logps/rejected": -378.8699645996094, "loss": 0.0715, "rewards/accuracies": 1.0, "rewards/chosen": -3.249552011489868, "rewards/margins": 4.788482666015625, "rewards/rejected": -8.038034439086914, "step": 2746 }, { "epoch": 0.57, "learning_rate": 8.54201680672269e-06, "logits/chosen": -2.2116236686706543, "logits/rejected": -1.9275844097137451, "logps/chosen": -394.430908203125, "logps/rejected": -310.01678466796875, "loss": 0.3442, "rewards/accuracies": 0.9375, "rewards/chosen": -3.295100212097168, "rewards/margins": 4.601902008056641, "rewards/rejected": -7.897002696990967, "step": 2747 }, { "epoch": 0.57, "learning_rate": 8.537815126050421e-06, "logits/chosen": -2.2263567447662354, "logits/rejected": -1.7479074001312256, "logps/chosen": -272.418701171875, "logps/rejected": -248.99655151367188, "loss": 0.2152, "rewards/accuracies": 0.875, "rewards/chosen": -3.277068614959717, "rewards/margins": 3.5347352027893066, "rewards/rejected": -6.811803817749023, "step": 2748 }, { "epoch": 0.58, "learning_rate": 8.533613445378151e-06, "logits/chosen": -2.09126615524292, "logits/rejected": -1.7565053701400757, "logps/chosen": -376.72015380859375, "logps/rejected": -318.0309143066406, "loss": 0.6299, "rewards/accuracies": 0.8125, "rewards/chosen": -4.174788475036621, "rewards/margins": 3.727644920349121, "rewards/rejected": -7.9024338722229, "step": 2749 }, { "epoch": 0.58, "learning_rate": 8.529411764705883e-06, "logits/chosen": -1.9707107543945312, "logits/rejected": -2.1534948348999023, "logps/chosen": -389.84368896484375, "logps/rejected": -344.63653564453125, "loss": 0.249, "rewards/accuracies": 0.75, "rewards/chosen": -2.834862470626831, "rewards/margins": 3.604830741882324, "rewards/rejected": -6.439692974090576, "step": 2750 }, { "epoch": 0.58, "learning_rate": 8.525210084033614e-06, "logits/chosen": -1.9963254928588867, "logits/rejected": -1.783385992050171, "logps/chosen": -272.95709228515625, "logps/rejected": -267.8773193359375, "loss": 0.2217, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2382700443267822, "rewards/margins": 3.3290963172912598, "rewards/rejected": -6.567366600036621, "step": 2751 }, { "epoch": 0.58, "learning_rate": 8.521008403361345e-06, "logits/chosen": -1.9712328910827637, "logits/rejected": -2.0587267875671387, "logps/chosen": -331.9048767089844, "logps/rejected": -351.9537658691406, "loss": 0.481, "rewards/accuracies": 0.6875, "rewards/chosen": -4.469090938568115, "rewards/margins": 3.4372684955596924, "rewards/rejected": -7.9063591957092285, "step": 2752 }, { "epoch": 0.58, "learning_rate": 8.516806722689076e-06, "logits/chosen": -1.8107143640518188, "logits/rejected": -2.1633617877960205, "logps/chosen": -339.5399475097656, "logps/rejected": -420.3126220703125, "loss": 0.3497, "rewards/accuracies": 0.9375, "rewards/chosen": -2.945976734161377, "rewards/margins": 3.6860883235931396, "rewards/rejected": -6.6320648193359375, "step": 2753 }, { "epoch": 0.58, "learning_rate": 8.512605042016808e-06, "logits/chosen": -2.3672072887420654, "logits/rejected": -2.229079246520996, "logps/chosen": -367.57208251953125, "logps/rejected": -360.6598815917969, "loss": 0.4667, "rewards/accuracies": 0.8125, "rewards/chosen": -3.000546932220459, "rewards/margins": 4.037282943725586, "rewards/rejected": -7.037830352783203, "step": 2754 }, { "epoch": 0.58, "learning_rate": 8.508403361344538e-06, "logits/chosen": -2.0224218368530273, "logits/rejected": -2.164910316467285, "logps/chosen": -363.3284606933594, "logps/rejected": -406.784912109375, "loss": 0.4544, "rewards/accuracies": 0.75, "rewards/chosen": -4.017745018005371, "rewards/margins": 3.6307172775268555, "rewards/rejected": -7.648462295532227, "step": 2755 }, { "epoch": 0.58, "learning_rate": 8.50420168067227e-06, "logits/chosen": -2.3570475578308105, "logits/rejected": -2.0096800327301025, "logps/chosen": -294.29901123046875, "logps/rejected": -287.4659118652344, "loss": 0.2954, "rewards/accuracies": 0.875, "rewards/chosen": -3.8642473220825195, "rewards/margins": 4.2596588134765625, "rewards/rejected": -8.123906135559082, "step": 2756 }, { "epoch": 0.58, "learning_rate": 8.5e-06, "logits/chosen": -2.246244430541992, "logits/rejected": -1.7577433586120605, "logps/chosen": -354.1215515136719, "logps/rejected": -286.30718994140625, "loss": 0.4151, "rewards/accuracies": 0.8125, "rewards/chosen": -4.069730281829834, "rewards/margins": 2.696025848388672, "rewards/rejected": -6.765755653381348, "step": 2757 }, { "epoch": 0.58, "learning_rate": 8.495798319327732e-06, "logits/chosen": -2.0918283462524414, "logits/rejected": -1.9773999452590942, "logps/chosen": -325.2257080078125, "logps/rejected": -681.0278930664062, "loss": 0.1745, "rewards/accuracies": 0.875, "rewards/chosen": -2.468827486038208, "rewards/margins": 5.041810035705566, "rewards/rejected": -7.510637283325195, "step": 2758 }, { "epoch": 0.58, "learning_rate": 8.491596638655462e-06, "logits/chosen": -2.1199092864990234, "logits/rejected": -2.0753183364868164, "logps/chosen": -317.63995361328125, "logps/rejected": -368.48016357421875, "loss": 0.7816, "rewards/accuracies": 0.75, "rewards/chosen": -3.605165719985962, "rewards/margins": 3.129185438156128, "rewards/rejected": -6.73435115814209, "step": 2759 }, { "epoch": 0.58, "learning_rate": 8.487394957983194e-06, "logits/chosen": -2.4660263061523438, "logits/rejected": -2.0746824741363525, "logps/chosen": -500.5414123535156, "logps/rejected": -409.3662109375, "loss": 0.3254, "rewards/accuracies": 0.8125, "rewards/chosen": -3.141390323638916, "rewards/margins": 3.743461847305298, "rewards/rejected": -6.884852409362793, "step": 2760 }, { "epoch": 0.58, "learning_rate": 8.483193277310924e-06, "logits/chosen": -2.048506736755371, "logits/rejected": -2.1086485385894775, "logps/chosen": -244.1903076171875, "logps/rejected": -255.09521484375, "loss": 0.2566, "rewards/accuracies": 0.8125, "rewards/chosen": -2.9720146656036377, "rewards/margins": 3.8307254314422607, "rewards/rejected": -6.80273962020874, "step": 2761 }, { "epoch": 0.58, "learning_rate": 8.478991596638656e-06, "logits/chosen": -1.9258618354797363, "logits/rejected": -1.8111095428466797, "logps/chosen": -289.5670166015625, "logps/rejected": -267.3599853515625, "loss": 0.2838, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1344733238220215, "rewards/margins": 3.522833824157715, "rewards/rejected": -6.6573076248168945, "step": 2762 }, { "epoch": 0.58, "learning_rate": 8.474789915966386e-06, "logits/chosen": -1.9609168767929077, "logits/rejected": -1.9910008907318115, "logps/chosen": -412.28765869140625, "logps/rejected": -428.9263610839844, "loss": 0.1521, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8766653537750244, "rewards/margins": 5.185330390930176, "rewards/rejected": -8.061995506286621, "step": 2763 }, { "epoch": 0.58, "learning_rate": 8.470588235294118e-06, "logits/chosen": -2.2419185638427734, "logits/rejected": -2.4218924045562744, "logps/chosen": -278.5075988769531, "logps/rejected": -370.38385009765625, "loss": 0.4257, "rewards/accuracies": 0.75, "rewards/chosen": -3.6476404666900635, "rewards/margins": 3.3917269706726074, "rewards/rejected": -7.03936767578125, "step": 2764 }, { "epoch": 0.58, "learning_rate": 8.46638655462185e-06, "logits/chosen": -2.202385425567627, "logits/rejected": -1.91288161277771, "logps/chosen": -350.34429931640625, "logps/rejected": -325.995849609375, "loss": 0.2618, "rewards/accuracies": 0.875, "rewards/chosen": -2.996974468231201, "rewards/margins": 3.909031867980957, "rewards/rejected": -6.906006336212158, "step": 2765 }, { "epoch": 0.58, "learning_rate": 8.46218487394958e-06, "logits/chosen": -1.9805129766464233, "logits/rejected": -1.8966350555419922, "logps/chosen": -328.09906005859375, "logps/rejected": -473.0970153808594, "loss": 0.52, "rewards/accuracies": 0.75, "rewards/chosen": -3.1002235412597656, "rewards/margins": 2.876615524291992, "rewards/rejected": -5.976839065551758, "step": 2766 }, { "epoch": 0.58, "learning_rate": 8.457983193277312e-06, "logits/chosen": -2.1060500144958496, "logits/rejected": -1.880358338356018, "logps/chosen": -238.4542236328125, "logps/rejected": -262.7034912109375, "loss": 0.3534, "rewards/accuracies": 0.875, "rewards/chosen": -3.021040439605713, "rewards/margins": 2.067019462585449, "rewards/rejected": -5.088059902191162, "step": 2767 }, { "epoch": 0.58, "learning_rate": 8.453781512605043e-06, "logits/chosen": -2.1656126976013184, "logits/rejected": -1.620773434638977, "logps/chosen": -450.619873046875, "logps/rejected": -300.18896484375, "loss": 0.1291, "rewards/accuracies": 0.9375, "rewards/chosen": -2.9355082511901855, "rewards/margins": 4.130982875823975, "rewards/rejected": -7.06649112701416, "step": 2768 }, { "epoch": 0.58, "learning_rate": 8.449579831932774e-06, "logits/chosen": -1.9953278303146362, "logits/rejected": -1.7384717464447021, "logps/chosen": -340.85662841796875, "logps/rejected": -329.06097412109375, "loss": 0.3799, "rewards/accuracies": 0.75, "rewards/chosen": -2.7691805362701416, "rewards/margins": 3.792186737060547, "rewards/rejected": -6.561367034912109, "step": 2769 }, { "epoch": 0.58, "learning_rate": 8.445378151260505e-06, "logits/chosen": -1.913532018661499, "logits/rejected": -2.2707715034484863, "logps/chosen": -455.59881591796875, "logps/rejected": -611.9102783203125, "loss": 0.6188, "rewards/accuracies": 0.625, "rewards/chosen": -4.793298721313477, "rewards/margins": 3.1364054679870605, "rewards/rejected": -7.929703712463379, "step": 2770 }, { "epoch": 0.58, "learning_rate": 8.441176470588237e-06, "logits/chosen": -2.1847782135009766, "logits/rejected": -2.245988130569458, "logps/chosen": -286.3011779785156, "logps/rejected": -327.3034362792969, "loss": 0.2174, "rewards/accuracies": 0.9375, "rewards/chosen": -3.532351493835449, "rewards/margins": 4.090540885925293, "rewards/rejected": -7.622891902923584, "step": 2771 }, { "epoch": 0.58, "learning_rate": 8.436974789915967e-06, "logits/chosen": -2.3693184852600098, "logits/rejected": -1.832909345626831, "logps/chosen": -307.59466552734375, "logps/rejected": -270.8913269042969, "loss": 0.307, "rewards/accuracies": 0.875, "rewards/chosen": -3.4480719566345215, "rewards/margins": 2.7676219940185547, "rewards/rejected": -6.215693473815918, "step": 2772 }, { "epoch": 0.58, "learning_rate": 8.432773109243699e-06, "logits/chosen": -2.3170132637023926, "logits/rejected": -2.1645610332489014, "logps/chosen": -313.81915283203125, "logps/rejected": -330.9603271484375, "loss": 0.173, "rewards/accuracies": 0.875, "rewards/chosen": -3.2745020389556885, "rewards/margins": 4.4955902099609375, "rewards/rejected": -7.770092010498047, "step": 2773 }, { "epoch": 0.58, "learning_rate": 8.428571428571429e-06, "logits/chosen": -1.7949676513671875, "logits/rejected": -1.7654428482055664, "logps/chosen": -313.2247314453125, "logps/rejected": -284.2032165527344, "loss": 0.2479, "rewards/accuracies": 0.875, "rewards/chosen": -3.2744646072387695, "rewards/margins": 3.678178310394287, "rewards/rejected": -6.952643394470215, "step": 2774 }, { "epoch": 0.58, "learning_rate": 8.424369747899161e-06, "logits/chosen": -2.18754243850708, "logits/rejected": -2.008413791656494, "logps/chosen": -362.43621826171875, "logps/rejected": -312.5030517578125, "loss": 0.486, "rewards/accuracies": 0.75, "rewards/chosen": -3.667646646499634, "rewards/margins": 2.5805516242980957, "rewards/rejected": -6.248198509216309, "step": 2775 }, { "epoch": 0.58, "learning_rate": 8.420168067226891e-06, "logits/chosen": -1.9346718788146973, "logits/rejected": -1.8294777870178223, "logps/chosen": -355.0703125, "logps/rejected": -350.0106506347656, "loss": 0.4653, "rewards/accuracies": 0.75, "rewards/chosen": -4.0981059074401855, "rewards/margins": 2.373842239379883, "rewards/rejected": -6.471948146820068, "step": 2776 }, { "epoch": 0.58, "learning_rate": 8.415966386554623e-06, "logits/chosen": -2.3585987091064453, "logits/rejected": -1.9688363075256348, "logps/chosen": -418.8023681640625, "logps/rejected": -376.44287109375, "loss": 0.6314, "rewards/accuracies": 0.875, "rewards/chosen": -2.7327287197113037, "rewards/margins": 4.166337966918945, "rewards/rejected": -6.899066925048828, "step": 2777 }, { "epoch": 0.58, "learning_rate": 8.411764705882353e-06, "logits/chosen": -2.085252523422241, "logits/rejected": -1.9837133884429932, "logps/chosen": -307.84088134765625, "logps/rejected": -310.72540283203125, "loss": 0.0782, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6739141941070557, "rewards/margins": 4.90252161026001, "rewards/rejected": -7.5764360427856445, "step": 2778 }, { "epoch": 0.58, "learning_rate": 8.407563025210085e-06, "logits/chosen": -2.0152535438537598, "logits/rejected": -2.121246576309204, "logps/chosen": -305.7744445800781, "logps/rejected": -425.6885986328125, "loss": 0.1232, "rewards/accuracies": 1.0, "rewards/chosen": -2.6054189205169678, "rewards/margins": 4.896279811859131, "rewards/rejected": -7.501698970794678, "step": 2779 }, { "epoch": 0.58, "learning_rate": 8.403361344537815e-06, "logits/chosen": -2.1084814071655273, "logits/rejected": -1.8833231925964355, "logps/chosen": -373.2188720703125, "logps/rejected": -368.16717529296875, "loss": 0.4532, "rewards/accuracies": 0.8125, "rewards/chosen": -4.181241989135742, "rewards/margins": 3.458427906036377, "rewards/rejected": -7.639669895172119, "step": 2780 }, { "epoch": 0.58, "learning_rate": 8.399159663865547e-06, "logits/chosen": -2.082890272140503, "logits/rejected": -1.941098690032959, "logps/chosen": -294.1342468261719, "logps/rejected": -416.9605407714844, "loss": 0.2295, "rewards/accuracies": 0.875, "rewards/chosen": -3.9211888313293457, "rewards/margins": 3.5427868366241455, "rewards/rejected": -7.46397590637207, "step": 2781 }, { "epoch": 0.58, "learning_rate": 8.394957983193277e-06, "logits/chosen": -2.0943055152893066, "logits/rejected": -1.982130527496338, "logps/chosen": -296.4791564941406, "logps/rejected": -322.4605712890625, "loss": 0.5699, "rewards/accuracies": 0.75, "rewards/chosen": -3.8016984462738037, "rewards/margins": 2.4455103874206543, "rewards/rejected": -6.247209072113037, "step": 2782 }, { "epoch": 0.58, "learning_rate": 8.39075630252101e-06, "logits/chosen": -2.210841417312622, "logits/rejected": -1.247216820716858, "logps/chosen": -362.2924499511719, "logps/rejected": -358.727783203125, "loss": 0.1903, "rewards/accuracies": 0.875, "rewards/chosen": -3.836737632751465, "rewards/margins": 5.2190260887146, "rewards/rejected": -9.055764198303223, "step": 2783 }, { "epoch": 0.58, "learning_rate": 8.38655462184874e-06, "logits/chosen": -2.2934329509735107, "logits/rejected": -1.888594150543213, "logps/chosen": -351.5435791015625, "logps/rejected": -293.0517883300781, "loss": 0.621, "rewards/accuracies": 0.625, "rewards/chosen": -3.551410436630249, "rewards/margins": 2.662245273590088, "rewards/rejected": -6.213655471801758, "step": 2784 }, { "epoch": 0.58, "learning_rate": 8.382352941176472e-06, "logits/chosen": -2.1756348609924316, "logits/rejected": -2.043821334838867, "logps/chosen": -360.9361572265625, "logps/rejected": -348.6968688964844, "loss": 0.5248, "rewards/accuracies": 0.75, "rewards/chosen": -3.6509859561920166, "rewards/margins": 2.452268123626709, "rewards/rejected": -6.103254318237305, "step": 2785 }, { "epoch": 0.58, "learning_rate": 8.378151260504202e-06, "logits/chosen": -1.7733654975891113, "logits/rejected": -2.1611368656158447, "logps/chosen": -322.57891845703125, "logps/rejected": -406.50506591796875, "loss": 0.3779, "rewards/accuracies": 0.875, "rewards/chosen": -3.5862035751342773, "rewards/margins": 3.7182374000549316, "rewards/rejected": -7.304441452026367, "step": 2786 }, { "epoch": 0.58, "learning_rate": 8.373949579831934e-06, "logits/chosen": -1.7838860750198364, "logits/rejected": -1.7842795848846436, "logps/chosen": -322.0689697265625, "logps/rejected": -374.1538391113281, "loss": 0.1184, "rewards/accuracies": 0.9375, "rewards/chosen": -2.7560086250305176, "rewards/margins": 4.20697021484375, "rewards/rejected": -6.962978839874268, "step": 2787 }, { "epoch": 0.58, "learning_rate": 8.369747899159666e-06, "logits/chosen": -1.9061880111694336, "logits/rejected": -1.9838675260543823, "logps/chosen": -279.1058349609375, "logps/rejected": -332.3639221191406, "loss": 0.4248, "rewards/accuracies": 0.8125, "rewards/chosen": -3.7480697631835938, "rewards/margins": 2.9088857173919678, "rewards/rejected": -6.656955242156982, "step": 2788 }, { "epoch": 0.58, "learning_rate": 8.365546218487396e-06, "logits/chosen": -1.9424360990524292, "logits/rejected": -2.121950626373291, "logps/chosen": -441.90869140625, "logps/rejected": -400.1189270019531, "loss": 0.1411, "rewards/accuracies": 0.9375, "rewards/chosen": -2.360245704650879, "rewards/margins": 4.127459526062012, "rewards/rejected": -6.487705707550049, "step": 2789 }, { "epoch": 0.58, "learning_rate": 8.361344537815128e-06, "logits/chosen": -2.2728476524353027, "logits/rejected": -1.8819782733917236, "logps/chosen": -302.7781677246094, "logps/rejected": -253.2996063232422, "loss": 0.146, "rewards/accuracies": 1.0, "rewards/chosen": -2.595346689224243, "rewards/margins": 4.623626232147217, "rewards/rejected": -7.218973159790039, "step": 2790 }, { "epoch": 0.58, "learning_rate": 8.357142857142858e-06, "logits/chosen": -2.2668657302856445, "logits/rejected": -1.9130792617797852, "logps/chosen": -461.9660949707031, "logps/rejected": -356.5831604003906, "loss": 0.1988, "rewards/accuracies": 0.9375, "rewards/chosen": -2.409268617630005, "rewards/margins": 3.6587791442871094, "rewards/rejected": -6.068047523498535, "step": 2791 }, { "epoch": 0.58, "learning_rate": 8.35294117647059e-06, "logits/chosen": -2.04266619682312, "logits/rejected": -1.8873331546783447, "logps/chosen": -328.31317138671875, "logps/rejected": -318.4830017089844, "loss": 0.6483, "rewards/accuracies": 0.9375, "rewards/chosen": -3.6163482666015625, "rewards/margins": 3.330782890319824, "rewards/rejected": -6.94713020324707, "step": 2792 }, { "epoch": 0.58, "learning_rate": 8.34873949579832e-06, "logits/chosen": -2.2646713256835938, "logits/rejected": -1.931402564048767, "logps/chosen": -283.7231140136719, "logps/rejected": -335.3396911621094, "loss": 0.3156, "rewards/accuracies": 0.8125, "rewards/chosen": -3.831061601638794, "rewards/margins": 3.654247283935547, "rewards/rejected": -7.485308647155762, "step": 2793 }, { "epoch": 0.58, "learning_rate": 8.344537815126052e-06, "logits/chosen": -2.2812507152557373, "logits/rejected": -2.142279863357544, "logps/chosen": -301.21826171875, "logps/rejected": -331.44244384765625, "loss": 0.4129, "rewards/accuracies": 0.875, "rewards/chosen": -3.496366024017334, "rewards/margins": 3.1071934700012207, "rewards/rejected": -6.603559494018555, "step": 2794 }, { "epoch": 0.58, "learning_rate": 8.340336134453782e-06, "logits/chosen": -2.236924648284912, "logits/rejected": -1.9672925472259521, "logps/chosen": -393.2303466796875, "logps/rejected": -373.8767395019531, "loss": 0.3419, "rewards/accuracies": 0.875, "rewards/chosen": -2.8130574226379395, "rewards/margins": 4.237860679626465, "rewards/rejected": -7.0509185791015625, "step": 2795 }, { "epoch": 0.58, "learning_rate": 8.336134453781514e-06, "logits/chosen": -2.3719825744628906, "logits/rejected": -2.116333246231079, "logps/chosen": -364.54010009765625, "logps/rejected": -399.49456787109375, "loss": 0.6433, "rewards/accuracies": 0.8125, "rewards/chosen": -4.476327896118164, "rewards/margins": 2.731562852859497, "rewards/rejected": -7.207890510559082, "step": 2796 }, { "epoch": 0.59, "learning_rate": 8.331932773109244e-06, "logits/chosen": -2.025804042816162, "logits/rejected": -2.1762540340423584, "logps/chosen": -201.6788330078125, "logps/rejected": -288.7137451171875, "loss": 0.2266, "rewards/accuracies": 0.9375, "rewards/chosen": -3.5049009323120117, "rewards/margins": 3.50118088722229, "rewards/rejected": -7.0060811042785645, "step": 2797 }, { "epoch": 0.59, "learning_rate": 8.327731092436976e-06, "logits/chosen": -2.3107378482818604, "logits/rejected": -1.8063175678253174, "logps/chosen": -309.6197814941406, "logps/rejected": -310.44390869140625, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -2.897059917449951, "rewards/margins": 5.263006210327148, "rewards/rejected": -8.160066604614258, "step": 2798 }, { "epoch": 0.59, "learning_rate": 8.323529411764707e-06, "logits/chosen": -2.088073492050171, "logits/rejected": -1.984992265701294, "logps/chosen": -298.46868896484375, "logps/rejected": -299.87664794921875, "loss": 0.2191, "rewards/accuracies": 0.9375, "rewards/chosen": -3.6454598903656006, "rewards/margins": 3.8441081047058105, "rewards/rejected": -7.48956823348999, "step": 2799 }, { "epoch": 0.59, "learning_rate": 8.319327731092438e-06, "logits/chosen": -1.8999435901641846, "logits/rejected": -1.975258708000183, "logps/chosen": -233.81503295898438, "logps/rejected": -267.8039245605469, "loss": 0.423, "rewards/accuracies": 0.8125, "rewards/chosen": -3.6107382774353027, "rewards/margins": 2.935202121734619, "rewards/rejected": -6.545940399169922, "step": 2800 }, { "epoch": 0.59, "learning_rate": 8.315126050420169e-06, "logits/chosen": -2.0748801231384277, "logits/rejected": -1.9033212661743164, "logps/chosen": -376.46484375, "logps/rejected": -376.0827941894531, "loss": 0.0856, "rewards/accuracies": 0.9375, "rewards/chosen": -2.930233955383301, "rewards/margins": 5.441582202911377, "rewards/rejected": -8.37181568145752, "step": 2801 }, { "epoch": 0.59, "learning_rate": 8.3109243697479e-06, "logits/chosen": -2.065798759460449, "logits/rejected": -2.2086679935455322, "logps/chosen": -277.2217712402344, "logps/rejected": -395.1743469238281, "loss": 0.4226, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7898495197296143, "rewards/margins": 2.7232718467712402, "rewards/rejected": -5.513121128082275, "step": 2802 }, { "epoch": 0.59, "learning_rate": 8.30672268907563e-06, "logits/chosen": -2.1602094173431396, "logits/rejected": -1.749508023262024, "logps/chosen": -346.4497375488281, "logps/rejected": -289.76220703125, "loss": 0.2109, "rewards/accuracies": 0.8125, "rewards/chosen": -2.653831720352173, "rewards/margins": 4.648664474487305, "rewards/rejected": -7.302495956420898, "step": 2803 }, { "epoch": 0.59, "learning_rate": 8.302521008403363e-06, "logits/chosen": -2.107135534286499, "logits/rejected": -1.8716074228286743, "logps/chosen": -365.9205322265625, "logps/rejected": -354.135498046875, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -2.993846893310547, "rewards/margins": 4.114795207977295, "rewards/rejected": -7.108642578125, "step": 2804 }, { "epoch": 0.59, "learning_rate": 8.298319327731093e-06, "logits/chosen": -2.174389362335205, "logits/rejected": -1.9666194915771484, "logps/chosen": -326.303466796875, "logps/rejected": -291.7364807128906, "loss": 0.2614, "rewards/accuracies": 0.875, "rewards/chosen": -3.902897596359253, "rewards/margins": 4.314565658569336, "rewards/rejected": -8.217463493347168, "step": 2805 }, { "epoch": 0.59, "learning_rate": 8.294117647058825e-06, "logits/chosen": -2.284073829650879, "logits/rejected": -2.0911784172058105, "logps/chosen": -450.76458740234375, "logps/rejected": -396.35565185546875, "loss": 0.3316, "rewards/accuracies": 0.8125, "rewards/chosen": -3.68430495262146, "rewards/margins": 3.2674429416656494, "rewards/rejected": -6.951747894287109, "step": 2806 }, { "epoch": 0.59, "learning_rate": 8.289915966386555e-06, "logits/chosen": -2.363044500350952, "logits/rejected": -1.9413715600967407, "logps/chosen": -490.34033203125, "logps/rejected": -455.1481628417969, "loss": 0.1363, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6697349548339844, "rewards/margins": 5.12384557723999, "rewards/rejected": -7.793581008911133, "step": 2807 }, { "epoch": 0.59, "learning_rate": 8.285714285714287e-06, "logits/chosen": -2.07851505279541, "logits/rejected": -1.738268256187439, "logps/chosen": -317.51806640625, "logps/rejected": -324.86822509765625, "loss": 0.297, "rewards/accuracies": 0.8125, "rewards/chosen": -3.6573843955993652, "rewards/margins": 3.86836576461792, "rewards/rejected": -7.525750160217285, "step": 2808 }, { "epoch": 0.59, "learning_rate": 8.281512605042017e-06, "logits/chosen": -1.879288911819458, "logits/rejected": -1.9739577770233154, "logps/chosen": -393.1387023925781, "logps/rejected": -398.0206604003906, "loss": 0.209, "rewards/accuracies": 0.9375, "rewards/chosen": -3.2824854850769043, "rewards/margins": 2.966374397277832, "rewards/rejected": -6.248859405517578, "step": 2809 }, { "epoch": 0.59, "learning_rate": 8.277310924369747e-06, "logits/chosen": -1.7616772651672363, "logits/rejected": -2.0976154804229736, "logps/chosen": -215.555908203125, "logps/rejected": -275.7451477050781, "loss": 0.176, "rewards/accuracies": 0.9375, "rewards/chosen": -3.925398349761963, "rewards/margins": 5.10221529006958, "rewards/rejected": -9.027612686157227, "step": 2810 }, { "epoch": 0.59, "learning_rate": 8.27310924369748e-06, "logits/chosen": -2.186342716217041, "logits/rejected": -1.6078993082046509, "logps/chosen": -374.6521301269531, "logps/rejected": -321.64398193359375, "loss": 0.2071, "rewards/accuracies": 0.875, "rewards/chosen": -3.4683961868286133, "rewards/margins": 3.606710195541382, "rewards/rejected": -7.075106620788574, "step": 2811 }, { "epoch": 0.59, "learning_rate": 8.26890756302521e-06, "logits/chosen": -2.2170135974884033, "logits/rejected": -2.0392019748687744, "logps/chosen": -412.68218994140625, "logps/rejected": -439.02056884765625, "loss": 0.3074, "rewards/accuracies": 0.9375, "rewards/chosen": -2.8754336833953857, "rewards/margins": 4.200932025909424, "rewards/rejected": -7.0763654708862305, "step": 2812 }, { "epoch": 0.59, "learning_rate": 8.264705882352941e-06, "logits/chosen": -1.7978732585906982, "logits/rejected": -2.045762538909912, "logps/chosen": -255.79513549804688, "logps/rejected": -351.8050231933594, "loss": 0.4799, "rewards/accuracies": 0.8125, "rewards/chosen": -3.6061131954193115, "rewards/margins": 4.833450794219971, "rewards/rejected": -8.439563751220703, "step": 2813 }, { "epoch": 0.59, "learning_rate": 8.260504201680672e-06, "logits/chosen": -2.3886878490448, "logits/rejected": -1.910915732383728, "logps/chosen": -370.0023193359375, "logps/rejected": -311.5486145019531, "loss": 0.3669, "rewards/accuracies": 0.875, "rewards/chosen": -2.9370834827423096, "rewards/margins": 3.734679698944092, "rewards/rejected": -6.6717634201049805, "step": 2814 }, { "epoch": 0.59, "learning_rate": 8.256302521008404e-06, "logits/chosen": -2.293048143386841, "logits/rejected": -1.751259446144104, "logps/chosen": -380.3153076171875, "logps/rejected": -323.15106201171875, "loss": 0.2363, "rewards/accuracies": 0.875, "rewards/chosen": -2.8120436668395996, "rewards/margins": 3.2259268760681152, "rewards/rejected": -6.037970542907715, "step": 2815 }, { "epoch": 0.59, "learning_rate": 8.252100840336136e-06, "logits/chosen": -2.1402781009674072, "logits/rejected": -2.1231849193573, "logps/chosen": -219.87908935546875, "logps/rejected": -263.8876647949219, "loss": 0.2605, "rewards/accuracies": 0.875, "rewards/chosen": -3.0515546798706055, "rewards/margins": 3.040764093399048, "rewards/rejected": -6.092319011688232, "step": 2816 }, { "epoch": 0.59, "learning_rate": 8.247899159663866e-06, "logits/chosen": -2.347778797149658, "logits/rejected": -2.0777573585510254, "logps/chosen": -337.0054931640625, "logps/rejected": -352.59539794921875, "loss": 0.1095, "rewards/accuracies": 0.9375, "rewards/chosen": -3.0044875144958496, "rewards/margins": 5.388765335083008, "rewards/rejected": -8.393253326416016, "step": 2817 }, { "epoch": 0.59, "learning_rate": 8.243697478991598e-06, "logits/chosen": -2.123249053955078, "logits/rejected": -1.9542696475982666, "logps/chosen": -357.5397644042969, "logps/rejected": -544.1553344726562, "loss": 1.2184, "rewards/accuracies": 0.4375, "rewards/chosen": -3.882512092590332, "rewards/margins": 0.9422541856765747, "rewards/rejected": -4.824766635894775, "step": 2818 }, { "epoch": 0.59, "learning_rate": 8.239495798319328e-06, "logits/chosen": -1.9939534664154053, "logits/rejected": -2.119971752166748, "logps/chosen": -284.2933654785156, "logps/rejected": -383.06951904296875, "loss": 0.6059, "rewards/accuracies": 0.8125, "rewards/chosen": -3.894986152648926, "rewards/margins": 3.1070070266723633, "rewards/rejected": -7.001993179321289, "step": 2819 }, { "epoch": 0.59, "learning_rate": 8.23529411764706e-06, "logits/chosen": -2.3356690406799316, "logits/rejected": -2.06516695022583, "logps/chosen": -335.189697265625, "logps/rejected": -333.42047119140625, "loss": 0.2821, "rewards/accuracies": 0.9375, "rewards/chosen": -3.6549527645111084, "rewards/margins": 2.70896053314209, "rewards/rejected": -6.363913536071777, "step": 2820 }, { "epoch": 0.59, "learning_rate": 8.23109243697479e-06, "logits/chosen": -2.038480758666992, "logits/rejected": -1.8009753227233887, "logps/chosen": -331.6080322265625, "logps/rejected": -333.0943298339844, "loss": 0.0706, "rewards/accuracies": 1.0, "rewards/chosen": -2.7101237773895264, "rewards/margins": 5.231769561767578, "rewards/rejected": -7.941892623901367, "step": 2821 }, { "epoch": 0.59, "learning_rate": 8.226890756302522e-06, "logits/chosen": -2.0843725204467773, "logits/rejected": -1.7788591384887695, "logps/chosen": -337.2210693359375, "logps/rejected": -372.18585205078125, "loss": 0.337, "rewards/accuracies": 0.75, "rewards/chosen": -3.717348098754883, "rewards/margins": 3.6554651260375977, "rewards/rejected": -7.372813701629639, "step": 2822 }, { "epoch": 0.59, "learning_rate": 8.222689075630252e-06, "logits/chosen": -1.8485604524612427, "logits/rejected": -2.3213250637054443, "logps/chosen": -224.17706298828125, "logps/rejected": -452.4322204589844, "loss": 0.2053, "rewards/accuracies": 0.875, "rewards/chosen": -3.2848198413848877, "rewards/margins": 5.009344100952148, "rewards/rejected": -8.294164657592773, "step": 2823 }, { "epoch": 0.59, "learning_rate": 8.218487394957984e-06, "logits/chosen": -2.095917224884033, "logits/rejected": -1.7116236686706543, "logps/chosen": -355.4200744628906, "logps/rejected": -320.6032409667969, "loss": 0.2091, "rewards/accuracies": 0.875, "rewards/chosen": -2.125521421432495, "rewards/margins": 4.299933433532715, "rewards/rejected": -6.425454139709473, "step": 2824 }, { "epoch": 0.59, "learning_rate": 8.214285714285714e-06, "logits/chosen": -2.322499990463257, "logits/rejected": -2.0416998863220215, "logps/chosen": -421.985595703125, "logps/rejected": -385.819580078125, "loss": 0.2117, "rewards/accuracies": 0.8125, "rewards/chosen": -3.3511154651641846, "rewards/margins": 4.265036582946777, "rewards/rejected": -7.616151809692383, "step": 2825 }, { "epoch": 0.59, "learning_rate": 8.210084033613446e-06, "logits/chosen": -2.109950065612793, "logits/rejected": -1.7112679481506348, "logps/chosen": -342.0282287597656, "logps/rejected": -386.3279113769531, "loss": 0.5215, "rewards/accuracies": 0.6875, "rewards/chosen": -4.720414161682129, "rewards/margins": 2.8072731494903564, "rewards/rejected": -7.527687072753906, "step": 2826 }, { "epoch": 0.59, "learning_rate": 8.205882352941176e-06, "logits/chosen": -2.043193817138672, "logits/rejected": -1.8053600788116455, "logps/chosen": -340.2882995605469, "logps/rejected": -283.55450439453125, "loss": 0.4156, "rewards/accuracies": 0.875, "rewards/chosen": -4.3082804679870605, "rewards/margins": 4.165438175201416, "rewards/rejected": -8.473718643188477, "step": 2827 }, { "epoch": 0.59, "learning_rate": 8.201680672268908e-06, "logits/chosen": -2.2017405033111572, "logits/rejected": -1.9761722087860107, "logps/chosen": -320.93365478515625, "logps/rejected": -292.62908935546875, "loss": 0.3305, "rewards/accuracies": 0.875, "rewards/chosen": -3.529244899749756, "rewards/margins": 4.6572675704956055, "rewards/rejected": -8.18651294708252, "step": 2828 }, { "epoch": 0.59, "learning_rate": 8.197478991596639e-06, "logits/chosen": -1.9891941547393799, "logits/rejected": -2.301286220550537, "logps/chosen": -287.46368408203125, "logps/rejected": -434.3602294921875, "loss": 0.2346, "rewards/accuracies": 0.875, "rewards/chosen": -3.409396171569824, "rewards/margins": 6.277019500732422, "rewards/rejected": -9.686415672302246, "step": 2829 }, { "epoch": 0.59, "learning_rate": 8.19327731092437e-06, "logits/chosen": -1.7858808040618896, "logits/rejected": -1.7814605236053467, "logps/chosen": -296.35528564453125, "logps/rejected": -334.67218017578125, "loss": 0.2394, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1191277503967285, "rewards/margins": 4.2347588539123535, "rewards/rejected": -7.353886604309082, "step": 2830 }, { "epoch": 0.59, "learning_rate": 8.1890756302521e-06, "logits/chosen": -2.403583526611328, "logits/rejected": -2.051482915878296, "logps/chosen": -344.85809326171875, "logps/rejected": -307.8115234375, "loss": 0.2398, "rewards/accuracies": 0.875, "rewards/chosen": -3.3161840438842773, "rewards/margins": 3.2084336280822754, "rewards/rejected": -6.524618148803711, "step": 2831 }, { "epoch": 0.59, "learning_rate": 8.184873949579833e-06, "logits/chosen": -2.1043310165405273, "logits/rejected": -2.0739669799804688, "logps/chosen": -309.0305480957031, "logps/rejected": -405.19696044921875, "loss": 0.167, "rewards/accuracies": 0.9375, "rewards/chosen": -3.588003158569336, "rewards/margins": 4.840431213378906, "rewards/rejected": -8.428434371948242, "step": 2832 }, { "epoch": 0.59, "learning_rate": 8.180672268907563e-06, "logits/chosen": -2.2396187782287598, "logits/rejected": -2.1531238555908203, "logps/chosen": -277.57025146484375, "logps/rejected": -323.8175048828125, "loss": 0.2929, "rewards/accuracies": 0.8125, "rewards/chosen": -3.723935127258301, "rewards/margins": 2.4785430431365967, "rewards/rejected": -6.202478408813477, "step": 2833 }, { "epoch": 0.59, "learning_rate": 8.176470588235295e-06, "logits/chosen": -2.009207248687744, "logits/rejected": -2.1428966522216797, "logps/chosen": -272.13909912109375, "logps/rejected": -355.3739013671875, "loss": 0.1675, "rewards/accuracies": 0.9375, "rewards/chosen": -4.269925594329834, "rewards/margins": 4.545327186584473, "rewards/rejected": -8.815254211425781, "step": 2834 }, { "epoch": 0.59, "learning_rate": 8.172268907563025e-06, "logits/chosen": -2.255329132080078, "logits/rejected": -2.042604923248291, "logps/chosen": -457.68280029296875, "logps/rejected": -343.29901123046875, "loss": 0.3561, "rewards/accuracies": 0.875, "rewards/chosen": -3.5013070106506348, "rewards/margins": 3.7814860343933105, "rewards/rejected": -7.2827935218811035, "step": 2835 }, { "epoch": 0.59, "learning_rate": 8.168067226890757e-06, "logits/chosen": -1.8427557945251465, "logits/rejected": -2.123448610305786, "logps/chosen": -319.8478088378906, "logps/rejected": -425.02716064453125, "loss": 0.2068, "rewards/accuracies": 0.8125, "rewards/chosen": -2.94172739982605, "rewards/margins": 6.207667827606201, "rewards/rejected": -9.149395942687988, "step": 2836 }, { "epoch": 0.59, "learning_rate": 8.163865546218487e-06, "logits/chosen": -2.0624234676361084, "logits/rejected": -2.131868600845337, "logps/chosen": -328.3984069824219, "logps/rejected": -394.0900573730469, "loss": 0.504, "rewards/accuracies": 0.8125, "rewards/chosen": -3.672734498977661, "rewards/margins": 2.772357225418091, "rewards/rejected": -6.44509220123291, "step": 2837 }, { "epoch": 0.59, "learning_rate": 8.159663865546219e-06, "logits/chosen": -2.002519369125366, "logits/rejected": -2.3567581176757812, "logps/chosen": -263.5201416015625, "logps/rejected": -352.77752685546875, "loss": 0.5185, "rewards/accuracies": 0.8125, "rewards/chosen": -4.1634368896484375, "rewards/margins": 3.137320041656494, "rewards/rejected": -7.300756454467773, "step": 2838 }, { "epoch": 0.59, "learning_rate": 8.155462184873951e-06, "logits/chosen": -2.0792806148529053, "logits/rejected": -2.0239672660827637, "logps/chosen": -348.33917236328125, "logps/rejected": -391.73919677734375, "loss": 0.2867, "rewards/accuracies": 0.875, "rewards/chosen": -4.483395576477051, "rewards/margins": 4.1585822105407715, "rewards/rejected": -8.64197826385498, "step": 2839 }, { "epoch": 0.59, "learning_rate": 8.151260504201681e-06, "logits/chosen": -2.3560848236083984, "logits/rejected": -1.748203992843628, "logps/chosen": -428.44305419921875, "logps/rejected": -355.63946533203125, "loss": 0.8137, "rewards/accuracies": 0.75, "rewards/chosen": -3.6552109718322754, "rewards/margins": 3.1481997966766357, "rewards/rejected": -6.80341100692749, "step": 2840 }, { "epoch": 0.59, "learning_rate": 8.147058823529413e-06, "logits/chosen": -2.2991445064544678, "logits/rejected": -1.9024101495742798, "logps/chosen": -349.4647216796875, "logps/rejected": -412.1666564941406, "loss": 0.1101, "rewards/accuracies": 0.9375, "rewards/chosen": -2.6878700256347656, "rewards/margins": 6.302396774291992, "rewards/rejected": -8.990266799926758, "step": 2841 }, { "epoch": 0.59, "learning_rate": 8.142857142857143e-06, "logits/chosen": -2.1942005157470703, "logits/rejected": -1.7929238080978394, "logps/chosen": -416.61358642578125, "logps/rejected": -389.9256896972656, "loss": 0.2395, "rewards/accuracies": 0.9375, "rewards/chosen": -4.389235496520996, "rewards/margins": 5.791066646575928, "rewards/rejected": -10.180302619934082, "step": 2842 }, { "epoch": 0.59, "learning_rate": 8.138655462184875e-06, "logits/chosen": -2.36810302734375, "logits/rejected": -1.8364918231964111, "logps/chosen": -401.8931884765625, "logps/rejected": -354.80712890625, "loss": 0.3152, "rewards/accuracies": 0.875, "rewards/chosen": -4.2738542556762695, "rewards/margins": 3.8468637466430664, "rewards/rejected": -8.120718002319336, "step": 2843 }, { "epoch": 0.59, "learning_rate": 8.134453781512605e-06, "logits/chosen": -2.1607508659362793, "logits/rejected": -1.9271211624145508, "logps/chosen": -321.31622314453125, "logps/rejected": -308.65155029296875, "loss": 0.4544, "rewards/accuracies": 0.6875, "rewards/chosen": -3.333409309387207, "rewards/margins": 3.616272449493408, "rewards/rejected": -6.949681758880615, "step": 2844 }, { "epoch": 0.6, "learning_rate": 8.130252100840337e-06, "logits/chosen": -2.1910910606384277, "logits/rejected": -2.0567409992218018, "logps/chosen": -388.2835693359375, "logps/rejected": -397.63775634765625, "loss": 0.4485, "rewards/accuracies": 0.6875, "rewards/chosen": -3.4273123741149902, "rewards/margins": 4.08199405670166, "rewards/rejected": -7.50930643081665, "step": 2845 }, { "epoch": 0.6, "learning_rate": 8.126050420168068e-06, "logits/chosen": -1.9302513599395752, "logits/rejected": -2.0104446411132812, "logps/chosen": -408.349609375, "logps/rejected": -366.58447265625, "loss": 0.1361, "rewards/accuracies": 1.0, "rewards/chosen": -2.827929973602295, "rewards/margins": 4.033010482788086, "rewards/rejected": -6.860940933227539, "step": 2846 }, { "epoch": 0.6, "learning_rate": 8.1218487394958e-06, "logits/chosen": -2.2164080142974854, "logits/rejected": -1.3722795248031616, "logps/chosen": -313.64495849609375, "logps/rejected": -263.4909973144531, "loss": 0.4127, "rewards/accuracies": 0.8125, "rewards/chosen": -4.483514308929443, "rewards/margins": 4.103476524353027, "rewards/rejected": -8.586990356445312, "step": 2847 }, { "epoch": 0.6, "learning_rate": 8.11764705882353e-06, "logits/chosen": -2.17451548576355, "logits/rejected": -2.138374090194702, "logps/chosen": -292.60052490234375, "logps/rejected": -374.3209533691406, "loss": 0.21, "rewards/accuracies": 0.9375, "rewards/chosen": -3.0047714710235596, "rewards/margins": 5.142330169677734, "rewards/rejected": -8.147100448608398, "step": 2848 }, { "epoch": 0.6, "learning_rate": 8.113445378151262e-06, "logits/chosen": -2.0436458587646484, "logits/rejected": -1.9778953790664673, "logps/chosen": -301.82952880859375, "logps/rejected": -397.66802978515625, "loss": 0.0724, "rewards/accuracies": 0.9375, "rewards/chosen": -3.147308349609375, "rewards/margins": 5.640937328338623, "rewards/rejected": -8.78824520111084, "step": 2849 }, { "epoch": 0.6, "learning_rate": 8.109243697478992e-06, "logits/chosen": -2.169551134109497, "logits/rejected": -1.8833082914352417, "logps/chosen": -252.54771423339844, "logps/rejected": -292.64251708984375, "loss": 0.5495, "rewards/accuracies": 0.6875, "rewards/chosen": -3.90415620803833, "rewards/margins": 2.9267563819885254, "rewards/rejected": -6.8309125900268555, "step": 2850 }, { "epoch": 0.6, "learning_rate": 8.105042016806724e-06, "logits/chosen": -2.1114001274108887, "logits/rejected": -1.8983662128448486, "logps/chosen": -345.64544677734375, "logps/rejected": -396.6995544433594, "loss": 0.2116, "rewards/accuracies": 0.9375, "rewards/chosen": -3.0202040672302246, "rewards/margins": 4.6315717697143555, "rewards/rejected": -7.651776313781738, "step": 2851 }, { "epoch": 0.6, "learning_rate": 8.100840336134454e-06, "logits/chosen": -2.4691951274871826, "logits/rejected": -2.1260340213775635, "logps/chosen": -461.36627197265625, "logps/rejected": -373.4756164550781, "loss": 0.512, "rewards/accuracies": 0.8125, "rewards/chosen": -2.6879405975341797, "rewards/margins": 3.469712972640991, "rewards/rejected": -6.157653331756592, "step": 2852 }, { "epoch": 0.6, "learning_rate": 8.096638655462186e-06, "logits/chosen": -2.1128549575805664, "logits/rejected": -1.9955192804336548, "logps/chosen": -289.69439697265625, "logps/rejected": -373.2474670410156, "loss": 0.6819, "rewards/accuracies": 0.75, "rewards/chosen": -3.7963194847106934, "rewards/margins": 3.129281997680664, "rewards/rejected": -6.925601482391357, "step": 2853 }, { "epoch": 0.6, "learning_rate": 8.092436974789916e-06, "logits/chosen": -1.9095611572265625, "logits/rejected": -1.647695779800415, "logps/chosen": -350.608642578125, "logps/rejected": -318.40875244140625, "loss": 0.1933, "rewards/accuracies": 1.0, "rewards/chosen": -2.8464856147766113, "rewards/margins": 3.5230777263641357, "rewards/rejected": -6.369563579559326, "step": 2854 }, { "epoch": 0.6, "learning_rate": 8.088235294117648e-06, "logits/chosen": -1.9426568746566772, "logits/rejected": -1.8198564052581787, "logps/chosen": -313.92413330078125, "logps/rejected": -332.2087097167969, "loss": 0.1479, "rewards/accuracies": 0.9375, "rewards/chosen": -4.27368688583374, "rewards/margins": 4.817465782165527, "rewards/rejected": -9.09115219116211, "step": 2855 }, { "epoch": 0.6, "learning_rate": 8.084033613445378e-06, "logits/chosen": -2.2103123664855957, "logits/rejected": -2.2698264122009277, "logps/chosen": -235.57872009277344, "logps/rejected": -256.6566162109375, "loss": 0.2125, "rewards/accuracies": 0.9375, "rewards/chosen": -2.961265802383423, "rewards/margins": 4.453272819519043, "rewards/rejected": -7.414538860321045, "step": 2856 }, { "epoch": 0.6, "learning_rate": 8.07983193277311e-06, "logits/chosen": -2.2029149532318115, "logits/rejected": -1.9300525188446045, "logps/chosen": -364.18182373046875, "logps/rejected": -378.244873046875, "loss": 0.3083, "rewards/accuracies": 0.8125, "rewards/chosen": -4.002892971038818, "rewards/margins": 4.774847507476807, "rewards/rejected": -8.777740478515625, "step": 2857 }, { "epoch": 0.6, "learning_rate": 8.07563025210084e-06, "logits/chosen": -2.1541645526885986, "logits/rejected": -2.269373893737793, "logps/chosen": -371.02301025390625, "logps/rejected": -420.470458984375, "loss": 0.4484, "rewards/accuracies": 0.75, "rewards/chosen": -3.891386032104492, "rewards/margins": 2.578678607940674, "rewards/rejected": -6.470065116882324, "step": 2858 }, { "epoch": 0.6, "learning_rate": 8.071428571428572e-06, "logits/chosen": -2.264519214630127, "logits/rejected": -2.0965452194213867, "logps/chosen": -304.95111083984375, "logps/rejected": -332.13262939453125, "loss": 0.1327, "rewards/accuracies": 0.875, "rewards/chosen": -2.532072067260742, "rewards/margins": 4.769775390625, "rewards/rejected": -7.301847457885742, "step": 2859 }, { "epoch": 0.6, "learning_rate": 8.067226890756303e-06, "logits/chosen": -2.408257484436035, "logits/rejected": -2.1373329162597656, "logps/chosen": -444.46441650390625, "logps/rejected": -405.84423828125, "loss": 0.2064, "rewards/accuracies": 0.9375, "rewards/chosen": -3.1656885147094727, "rewards/margins": 4.396812438964844, "rewards/rejected": -7.562500953674316, "step": 2860 }, { "epoch": 0.6, "learning_rate": 8.063025210084034e-06, "logits/chosen": -2.187272071838379, "logits/rejected": -2.0420517921447754, "logps/chosen": -386.6808166503906, "logps/rejected": -349.65887451171875, "loss": 0.2506, "rewards/accuracies": 0.8125, "rewards/chosen": -2.7949306964874268, "rewards/margins": 5.878454208374023, "rewards/rejected": -8.673385620117188, "step": 2861 }, { "epoch": 0.6, "learning_rate": 8.058823529411766e-06, "logits/chosen": -2.3455960750579834, "logits/rejected": -1.427271842956543, "logps/chosen": -345.93450927734375, "logps/rejected": -242.88400268554688, "loss": 0.4899, "rewards/accuracies": 0.8125, "rewards/chosen": -2.987334728240967, "rewards/margins": 3.650768280029297, "rewards/rejected": -6.6381025314331055, "step": 2862 }, { "epoch": 0.6, "learning_rate": 8.054621848739497e-06, "logits/chosen": -2.0288524627685547, "logits/rejected": -1.914783000946045, "logps/chosen": -314.9799499511719, "logps/rejected": -385.12982177734375, "loss": 0.2354, "rewards/accuracies": 0.875, "rewards/chosen": -3.895512342453003, "rewards/margins": 4.287503719329834, "rewards/rejected": -8.183015823364258, "step": 2863 }, { "epoch": 0.6, "learning_rate": 8.050420168067229e-06, "logits/chosen": -2.2847816944122314, "logits/rejected": -2.0739388465881348, "logps/chosen": -270.5831298828125, "logps/rejected": -328.23907470703125, "loss": 0.2799, "rewards/accuracies": 0.9375, "rewards/chosen": -3.534893751144409, "rewards/margins": 5.640796661376953, "rewards/rejected": -9.175689697265625, "step": 2864 }, { "epoch": 0.6, "learning_rate": 8.046218487394959e-06, "logits/chosen": -1.9350030422210693, "logits/rejected": -1.6919705867767334, "logps/chosen": -382.2035827636719, "logps/rejected": -390.9158935546875, "loss": 0.3389, "rewards/accuracies": 0.8125, "rewards/chosen": -4.023379802703857, "rewards/margins": 4.018260478973389, "rewards/rejected": -8.041640281677246, "step": 2865 }, { "epoch": 0.6, "learning_rate": 8.04201680672269e-06, "logits/chosen": -2.129018783569336, "logits/rejected": -2.176754951477051, "logps/chosen": -396.83563232421875, "logps/rejected": -400.2115478515625, "loss": 0.2302, "rewards/accuracies": 0.875, "rewards/chosen": -2.7818636894226074, "rewards/margins": 3.6553544998168945, "rewards/rejected": -6.437218189239502, "step": 2866 }, { "epoch": 0.6, "learning_rate": 8.037815126050421e-06, "logits/chosen": -2.068474769592285, "logits/rejected": -1.9121875762939453, "logps/chosen": -424.01397705078125, "logps/rejected": -347.99053955078125, "loss": 0.4682, "rewards/accuracies": 0.75, "rewards/chosen": -3.936293840408325, "rewards/margins": 2.923370122909546, "rewards/rejected": -6.859663963317871, "step": 2867 }, { "epoch": 0.6, "learning_rate": 8.033613445378153e-06, "logits/chosen": -2.3719639778137207, "logits/rejected": -1.8226454257965088, "logps/chosen": -439.0179138183594, "logps/rejected": -372.7168273925781, "loss": 0.3606, "rewards/accuracies": 0.8125, "rewards/chosen": -3.1236109733581543, "rewards/margins": 4.458876132965088, "rewards/rejected": -7.582486629486084, "step": 2868 } ], "logging_steps": 1, "max_steps": 4780, "num_train_epochs": 1, "save_steps": 239, "total_flos": 0.0, "trial_name": null, "trial_params": null }