{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3750, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.3333333333333334e-08, "logits/chosen": -1.5177760124206543, "logits/rejected": -1.1611042022705078, "logps/chosen": -309.02911376953125, "logps/rejected": -848.8409423828125, "loss": 0.2593, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.3333333333333336e-07, "logits/chosen": -1.5949721336364746, "logits/rejected": -1.165027379989624, "logps/chosen": -451.9952697753906, "logps/rejected": -786.9351806640625, "loss": 0.2269, "rewards/accuracies": 0.2777777910232544, "rewards/chosen": -0.0005774286109954119, "rewards/margins": -0.000440336880274117, "rewards/rejected": -0.00013709173072129488, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.666666666666667e-07, "logits/chosen": -1.557755470275879, "logits/rejected": -1.188490629196167, "logps/chosen": -457.94342041015625, "logps/rejected": -653.2659912109375, "loss": 0.2143, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.0007440468179993331, "rewards/margins": 0.000692694156896323, "rewards/rejected": 5.135267929290421e-05, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.0000000000000003e-07, "logits/chosen": -1.8446645736694336, "logits/rejected": -1.1848350763320923, "logps/chosen": -542.8465576171875, "logps/rejected": -840.0565185546875, "loss": 0.1931, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.001833869144320488, "rewards/margins": 0.002176427748054266, "rewards/rejected": -0.000342558283591643, "step": 30 }, { "epoch": 0.01, "learning_rate": 5.333333333333335e-07, "logits/chosen": -1.3947551250457764, "logits/rejected": -1.0338518619537354, "logps/chosen": -513.7941284179688, "logps/rejected": -851.7515869140625, "loss": 0.1978, "rewards/accuracies": 0.75, "rewards/chosen": 0.0025823800824582577, "rewards/margins": 0.007740010507404804, "rewards/rejected": -0.005157629959285259, "step": 40 }, { "epoch": 0.01, "learning_rate": 6.666666666666667e-07, "logits/chosen": -1.544039249420166, "logits/rejected": -1.1640592813491821, "logps/chosen": -444.1375427246094, "logps/rejected": -751.6806640625, "loss": 0.1943, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.006201503798365593, "rewards/margins": 0.012399530969560146, "rewards/rejected": -0.006198027171194553, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.000000000000001e-07, "logits/chosen": -1.7549737691879272, "logits/rejected": -1.134901523590088, "logps/chosen": -524.1077880859375, "logps/rejected": -897.7693481445312, "loss": 0.1816, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.010963965207338333, "rewards/margins": 0.028727427124977112, "rewards/rejected": -0.01776346191763878, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.333333333333334e-07, "logits/chosen": -1.707073450088501, "logits/rejected": -0.9422414898872375, "logps/chosen": -418.4791564941406, "logps/rejected": -887.1856689453125, "loss": 0.166, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.011874416843056679, "rewards/margins": 0.05441901832818985, "rewards/rejected": -0.042544592171907425, "step": 70 }, { "epoch": 0.02, "learning_rate": 1.066666666666667e-06, "logits/chosen": -1.7464405298233032, "logits/rejected": -1.3240439891815186, "logps/chosen": -393.67059326171875, "logps/rejected": -797.724609375, "loss": 0.1765, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.014196820557117462, "rewards/margins": 0.05819591134786606, "rewards/rejected": -0.043999094516038895, "step": 80 }, { "epoch": 0.02, "learning_rate": 1.2000000000000002e-06, "logits/chosen": -1.47357976436615, "logits/rejected": -0.9985636472702026, "logps/chosen": -445.72979736328125, "logps/rejected": -712.3211059570312, "loss": 0.1603, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.003458250779658556, "rewards/margins": 0.06728541851043701, "rewards/rejected": -0.06382717192173004, "step": 90 }, { "epoch": 0.03, "learning_rate": 1.3333333333333334e-06, "logits/chosen": -1.5050207376480103, "logits/rejected": -1.0137097835540771, "logps/chosen": -409.77252197265625, "logps/rejected": -882.01025390625, "loss": 0.1755, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.011769925244152546, "rewards/margins": 0.09516488015651703, "rewards/rejected": -0.1069348007440567, "step": 100 }, { "epoch": 0.03, "learning_rate": 1.4666666666666669e-06, "logits/chosen": -1.508277177810669, "logits/rejected": -0.9909588098526001, "logps/chosen": -446.08514404296875, "logps/rejected": -970.3818359375, "loss": 0.1234, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.039314642548561096, "rewards/margins": 0.12522205710411072, "rewards/rejected": -0.16453669965267181, "step": 110 }, { "epoch": 0.03, "learning_rate": 1.6000000000000001e-06, "logits/chosen": -1.5001076459884644, "logits/rejected": -1.1417639255523682, "logps/chosen": -595.5568237304688, "logps/rejected": -1075.8310546875, "loss": 0.1178, "rewards/accuracies": 0.75, "rewards/chosen": -0.11702040582895279, "rewards/margins": 0.1300191581249237, "rewards/rejected": -0.2470395863056183, "step": 120 }, { "epoch": 0.03, "learning_rate": 1.7333333333333336e-06, "logits/chosen": -1.6354900598526, "logits/rejected": -1.1173183917999268, "logps/chosen": -596.342041015625, "logps/rejected": -1051.9910888671875, "loss": 0.122, "rewards/accuracies": 0.75, "rewards/chosen": -0.1303187757730484, "rewards/margins": 0.14471343159675598, "rewards/rejected": -0.2750321924686432, "step": 130 }, { "epoch": 0.04, "learning_rate": 1.8666666666666669e-06, "logits/chosen": -1.857448935508728, "logits/rejected": -1.1995853185653687, "logps/chosen": -488.5594177246094, "logps/rejected": -1067.145263671875, "loss": 0.1382, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11596964299678802, "rewards/margins": 0.1565937101840973, "rewards/rejected": -0.2725633978843689, "step": 140 }, { "epoch": 0.04, "learning_rate": 2.0000000000000003e-06, "logits/chosen": -1.6261777877807617, "logits/rejected": -1.1077944040298462, "logps/chosen": -564.3380737304688, "logps/rejected": -1317.8497314453125, "loss": 0.0805, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14445793628692627, "rewards/margins": 0.2764219641685486, "rewards/rejected": -0.42087993025779724, "step": 150 }, { "epoch": 0.04, "learning_rate": 2.133333333333334e-06, "logits/chosen": -1.4915621280670166, "logits/rejected": -1.1237232685089111, "logps/chosen": -487.80487060546875, "logps/rejected": -1048.151611328125, "loss": 0.0999, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.14958631992340088, "rewards/margins": 0.19907937943935394, "rewards/rejected": -0.3486657440662384, "step": 160 }, { "epoch": 0.05, "learning_rate": 2.266666666666667e-06, "logits/chosen": -1.462720274925232, "logits/rejected": -1.1023520231246948, "logps/chosen": -552.3968505859375, "logps/rejected": -1198.3912353515625, "loss": 0.0924, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1677219718694687, "rewards/margins": 0.21208930015563965, "rewards/rejected": -0.37981128692626953, "step": 170 }, { "epoch": 0.05, "learning_rate": 2.4000000000000003e-06, "logits/chosen": -1.6897376775741577, "logits/rejected": -0.9965440630912781, "logps/chosen": -626.109619140625, "logps/rejected": -1155.2620849609375, "loss": 0.0705, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15819820761680603, "rewards/margins": 0.23869426548480988, "rewards/rejected": -0.3968924880027771, "step": 180 }, { "epoch": 0.05, "learning_rate": 2.5333333333333338e-06, "logits/chosen": -1.8232837915420532, "logits/rejected": -0.823337197303772, "logps/chosen": -675.5474853515625, "logps/rejected": -1195.9923095703125, "loss": 0.1007, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12152433395385742, "rewards/margins": 0.2081596404314041, "rewards/rejected": -0.32968395948410034, "step": 190 }, { "epoch": 0.05, "learning_rate": 2.666666666666667e-06, "logits/chosen": -1.9476888179779053, "logits/rejected": -1.0242823362350464, "logps/chosen": -624.99951171875, "logps/rejected": -1197.134521484375, "loss": 0.0905, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12548482418060303, "rewards/margins": 0.23341400921344757, "rewards/rejected": -0.3588988482952118, "step": 200 }, { "epoch": 0.06, "learning_rate": 2.8000000000000003e-06, "logits/chosen": -1.518812894821167, "logits/rejected": -1.046945571899414, "logps/chosen": -764.122314453125, "logps/rejected": -1280.217041015625, "loss": 0.1437, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23460201919078827, "rewards/margins": 0.19938690960407257, "rewards/rejected": -0.4339889585971832, "step": 210 }, { "epoch": 0.06, "learning_rate": 2.9333333333333338e-06, "logits/chosen": -1.6284335851669312, "logits/rejected": -1.1508177518844604, "logps/chosen": -523.3719482421875, "logps/rejected": -1222.5562744140625, "loss": 0.0616, "rewards/accuracies": 0.875, "rewards/chosen": -0.14848622679710388, "rewards/margins": 0.24298810958862305, "rewards/rejected": -0.39147430658340454, "step": 220 }, { "epoch": 0.06, "learning_rate": 3.066666666666667e-06, "logits/chosen": -1.6719776391983032, "logits/rejected": -0.9572904706001282, "logps/chosen": -747.065185546875, "logps/rejected": -1382.8427734375, "loss": 0.0855, "rewards/accuracies": 0.875, "rewards/chosen": -0.23779627680778503, "rewards/margins": 0.2256799042224884, "rewards/rejected": -0.4634762406349182, "step": 230 }, { "epoch": 0.06, "learning_rate": 3.2000000000000003e-06, "logits/chosen": -1.840018630027771, "logits/rejected": -1.1092523336410522, "logps/chosen": -612.7644653320312, "logps/rejected": -1217.589111328125, "loss": 0.0733, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16975006461143494, "rewards/margins": 0.2756669223308563, "rewards/rejected": -0.44541701674461365, "step": 240 }, { "epoch": 0.07, "learning_rate": 3.3333333333333333e-06, "logits/chosen": -1.574760913848877, "logits/rejected": -1.1205968856811523, "logps/chosen": -629.0726318359375, "logps/rejected": -1156.187255859375, "loss": 0.0998, "rewards/accuracies": 0.875, "rewards/chosen": -0.13886727392673492, "rewards/margins": 0.202668234705925, "rewards/rejected": -0.3415355086326599, "step": 250 }, { "epoch": 0.07, "learning_rate": 3.4666666666666672e-06, "logits/chosen": -1.7936065196990967, "logits/rejected": -0.9479697942733765, "logps/chosen": -538.048583984375, "logps/rejected": -1271.905029296875, "loss": 0.063, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10844652354717255, "rewards/margins": 0.2883257269859314, "rewards/rejected": -0.39677220582962036, "step": 260 }, { "epoch": 0.07, "learning_rate": 3.6000000000000003e-06, "logits/chosen": -1.7110416889190674, "logits/rejected": -1.0976063013076782, "logps/chosen": -601.929443359375, "logps/rejected": -1087.264404296875, "loss": 0.074, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13733436167240143, "rewards/margins": 0.2208215445280075, "rewards/rejected": -0.35815590620040894, "step": 270 }, { "epoch": 0.07, "learning_rate": 3.7333333333333337e-06, "logits/chosen": -1.684920310974121, "logits/rejected": -1.0355182886123657, "logps/chosen": -502.43438720703125, "logps/rejected": -1141.5850830078125, "loss": 0.0975, "rewards/accuracies": 0.75, "rewards/chosen": -0.10886510461568832, "rewards/margins": 0.2565527558326721, "rewards/rejected": -0.3654178977012634, "step": 280 }, { "epoch": 0.08, "learning_rate": 3.866666666666667e-06, "logits/chosen": -1.7269914150238037, "logits/rejected": -0.9788557887077332, "logps/chosen": -717.2500610351562, "logps/rejected": -1336.142333984375, "loss": 0.0838, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.23452234268188477, "rewards/margins": 0.29266995191574097, "rewards/rejected": -0.527192234992981, "step": 290 }, { "epoch": 0.08, "learning_rate": 4.000000000000001e-06, "logits/chosen": -1.7060441970825195, "logits/rejected": -1.1475598812103271, "logps/chosen": -730.9443969726562, "logps/rejected": -1244.9676513671875, "loss": 0.1341, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.255375474691391, "rewards/margins": 0.19869598746299744, "rewards/rejected": -0.4540714621543884, "step": 300 }, { "epoch": 0.08, "learning_rate": 4.133333333333333e-06, "logits/chosen": -1.7061704397201538, "logits/rejected": -1.1434520483016968, "logps/chosen": -705.079833984375, "logps/rejected": -1187.599365234375, "loss": 0.1019, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13966450095176697, "rewards/margins": 0.22368240356445312, "rewards/rejected": -0.3633468747138977, "step": 310 }, { "epoch": 0.09, "learning_rate": 4.266666666666668e-06, "logits/chosen": -2.0864200592041016, "logits/rejected": -1.0598593950271606, "logps/chosen": -621.1461181640625, "logps/rejected": -1218.3201904296875, "loss": 0.076, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12446784973144531, "rewards/margins": 0.285147488117218, "rewards/rejected": -0.40961527824401855, "step": 320 }, { "epoch": 0.09, "learning_rate": 4.4e-06, "logits/chosen": -1.633683204650879, "logits/rejected": -1.259135127067566, "logps/chosen": -594.0384521484375, "logps/rejected": -1040.244873046875, "loss": 0.1407, "rewards/accuracies": 0.75, "rewards/chosen": -0.13469278812408447, "rewards/margins": 0.18073201179504395, "rewards/rejected": -0.3154247999191284, "step": 330 }, { "epoch": 0.09, "learning_rate": 4.533333333333334e-06, "logits/chosen": -1.5713467597961426, "logits/rejected": -1.1714986562728882, "logps/chosen": -552.28515625, "logps/rejected": -954.9786376953125, "loss": 0.102, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16208061575889587, "rewards/margins": 0.17426642775535583, "rewards/rejected": -0.3363470435142517, "step": 340 }, { "epoch": 0.09, "learning_rate": 4.666666666666667e-06, "logits/chosen": -1.383143424987793, "logits/rejected": -1.1191000938415527, "logps/chosen": -495.08245849609375, "logps/rejected": -1157.8853759765625, "loss": 0.0813, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1548539251089096, "rewards/margins": 0.23339009284973145, "rewards/rejected": -0.38824400305747986, "step": 350 }, { "epoch": 0.1, "learning_rate": 4.800000000000001e-06, "logits/chosen": -1.7334524393081665, "logits/rejected": -1.072506070137024, "logps/chosen": -742.59814453125, "logps/rejected": -1228.1810302734375, "loss": 0.128, "rewards/accuracies": 0.75, "rewards/chosen": -0.15262000262737274, "rewards/margins": 0.23166091740131378, "rewards/rejected": -0.3842809200286865, "step": 360 }, { "epoch": 0.1, "learning_rate": 4.933333333333334e-06, "logits/chosen": -1.8477518558502197, "logits/rejected": -1.3124161958694458, "logps/chosen": -677.6192626953125, "logps/rejected": -1154.7989501953125, "loss": 0.1155, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12368879467248917, "rewards/margins": 0.20320534706115723, "rewards/rejected": -0.32689422369003296, "step": 370 }, { "epoch": 0.1, "learning_rate": 4.999972922944898e-06, "logits/chosen": -1.8957774639129639, "logits/rejected": -1.3899834156036377, "logps/chosen": -547.0092163085938, "logps/rejected": -1174.024658203125, "loss": 0.0877, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.06287173926830292, "rewards/margins": 0.2412436455488205, "rewards/rejected": -0.304115355014801, "step": 380 }, { "epoch": 0.1, "learning_rate": 4.999756310023261e-06, "logits/chosen": -1.791933298110962, "logits/rejected": -1.1157985925674438, "logps/chosen": -516.9517211914062, "logps/rejected": -1007.5950317382812, "loss": 0.0993, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.06450649350881577, "rewards/margins": 0.22859111428260803, "rewards/rejected": -0.293097585439682, "step": 390 }, { "epoch": 0.11, "learning_rate": 4.999323102948655e-06, "logits/chosen": -1.6634111404418945, "logits/rejected": -0.9339207410812378, "logps/chosen": -541.6902465820312, "logps/rejected": -1222.6890869140625, "loss": 0.0786, "rewards/accuracies": 0.875, "rewards/chosen": -0.11366814374923706, "rewards/margins": 0.3344195783138275, "rewards/rejected": -0.44808775186538696, "step": 400 }, { "epoch": 0.11, "learning_rate": 4.998673339256785e-06, "logits/chosen": -1.7227275371551514, "logits/rejected": -1.4248206615447998, "logps/chosen": -612.4100341796875, "logps/rejected": -1344.530517578125, "loss": 0.0903, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16698965430259705, "rewards/margins": 0.24890287220478058, "rewards/rejected": -0.41589251160621643, "step": 410 }, { "epoch": 0.11, "learning_rate": 4.997807075247147e-06, "logits/chosen": -1.7536413669586182, "logits/rejected": -1.3619048595428467, "logps/chosen": -464.01678466796875, "logps/rejected": -1163.01953125, "loss": 0.0692, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.021548744291067123, "rewards/margins": 0.24685168266296387, "rewards/rejected": -0.2684004306793213, "step": 420 }, { "epoch": 0.11, "learning_rate": 4.996724385978142e-06, "logits/chosen": -2.028895139694214, "logits/rejected": -1.2840532064437866, "logps/chosen": -478.65533447265625, "logps/rejected": -1112.1922607421875, "loss": 0.0923, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.00986658688634634, "rewards/margins": 0.23762169480323792, "rewards/rejected": -0.24748826026916504, "step": 430 }, { "epoch": 0.12, "learning_rate": 4.995425365260585e-06, "logits/chosen": -1.9320755004882812, "logits/rejected": -1.3463362455368042, "logps/chosen": -468.8946838378906, "logps/rejected": -1079.24951171875, "loss": 0.0844, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.05752667784690857, "rewards/margins": 0.25381767749786377, "rewards/rejected": -0.31134432554244995, "step": 440 }, { "epoch": 0.12, "learning_rate": 4.993910125649561e-06, "logits/chosen": -1.8947498798370361, "logits/rejected": -1.4494032859802246, "logps/chosen": -612.285400390625, "logps/rejected": -1175.610595703125, "loss": 0.0961, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.20282471179962158, "rewards/margins": 0.24255582690238953, "rewards/rejected": -0.4453805387020111, "step": 450 }, { "epoch": 0.12, "learning_rate": 4.992178798434684e-06, "logits/chosen": -1.8909610509872437, "logits/rejected": -1.2547855377197266, "logps/chosen": -703.7716064453125, "logps/rejected": -1249.4072265625, "loss": 0.0749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18841485679149628, "rewards/margins": 0.24599456787109375, "rewards/rejected": -0.4344094693660736, "step": 460 }, { "epoch": 0.13, "learning_rate": 4.990231533628719e-06, "logits/chosen": -1.9798517227172852, "logits/rejected": -1.4687498807907104, "logps/chosen": -451.41192626953125, "logps/rejected": -1110.53466796875, "loss": 0.0835, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.05847315117716789, "rewards/margins": 0.2581016421318054, "rewards/rejected": -0.3165748119354248, "step": 470 }, { "epoch": 0.13, "learning_rate": 4.988068499954578e-06, "logits/chosen": -1.9682140350341797, "logits/rejected": -1.161273717880249, "logps/chosen": -534.8321533203125, "logps/rejected": -1069.1895751953125, "loss": 0.0861, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.08595879375934601, "rewards/margins": 0.2487233430147171, "rewards/rejected": -0.3346821367740631, "step": 480 }, { "epoch": 0.13, "learning_rate": 4.985689884830711e-06, "logits/chosen": -1.800855040550232, "logits/rejected": -1.0105341672897339, "logps/chosen": -645.858154296875, "logps/rejected": -1335.704833984375, "loss": 0.06, "rewards/accuracies": 0.875, "rewards/chosen": -0.17263874411582947, "rewards/margins": 0.28426748514175415, "rewards/rejected": -0.4569062292575836, "step": 490 }, { "epoch": 0.13, "learning_rate": 4.983095894354858e-06, "logits/chosen": -1.845642328262329, "logits/rejected": -1.124245285987854, "logps/chosen": -684.5682373046875, "logps/rejected": -1378.302490234375, "loss": 0.0689, "rewards/accuracies": 0.875, "rewards/chosen": -0.19359903037548065, "rewards/margins": 0.3165056109428406, "rewards/rejected": -0.5101046562194824, "step": 500 }, { "epoch": 0.14, "learning_rate": 4.980286753286196e-06, "logits/chosen": -1.7543941736221313, "logits/rejected": -1.139478087425232, "logps/chosen": -591.703857421875, "logps/rejected": -1196.9752197265625, "loss": 0.0752, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17276380956172943, "rewards/margins": 0.274630606174469, "rewards/rejected": -0.44739437103271484, "step": 510 }, { "epoch": 0.14, "learning_rate": 4.97726270502586e-06, "logits/chosen": -1.7981364727020264, "logits/rejected": -1.2439225912094116, "logps/chosen": -622.7135620117188, "logps/rejected": -1289.652587890625, "loss": 0.0642, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16895990073680878, "rewards/margins": 0.27986788749694824, "rewards/rejected": -0.4488278329372406, "step": 520 }, { "epoch": 0.14, "learning_rate": 4.974024011595864e-06, "logits/chosen": -1.7756052017211914, "logits/rejected": -1.273600697517395, "logps/chosen": -779.8721923828125, "logps/rejected": -1319.716064453125, "loss": 0.0889, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19681531190872192, "rewards/margins": 0.24762988090515137, "rewards/rejected": -0.4444451928138733, "step": 530 }, { "epoch": 0.14, "learning_rate": 4.970570953616383e-06, "logits/chosen": -1.7580140829086304, "logits/rejected": -1.2909038066864014, "logps/chosen": -611.9743041992188, "logps/rejected": -1292.6343994140625, "loss": 0.0655, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14458617568016052, "rewards/margins": 0.3040325939655304, "rewards/rejected": -0.44861873984336853, "step": 540 }, { "epoch": 0.15, "learning_rate": 4.966903830281449e-06, "logits/chosen": -2.1057114601135254, "logits/rejected": -1.096806287765503, "logps/chosen": -525.5113525390625, "logps/rejected": -1102.981689453125, "loss": 0.0727, "rewards/accuracies": 0.875, "rewards/chosen": -0.09377844631671906, "rewards/margins": 0.2827639877796173, "rewards/rejected": -0.3765423893928528, "step": 550 }, { "epoch": 0.15, "learning_rate": 4.9630229593330226e-06, "logits/chosen": -1.8463417291641235, "logits/rejected": -1.2035672664642334, "logps/chosen": -658.9556884765625, "logps/rejected": -1261.522705078125, "loss": 0.0586, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1989162266254425, "rewards/margins": 0.25805556774139404, "rewards/rejected": -0.45697179436683655, "step": 560 }, { "epoch": 0.15, "learning_rate": 4.958928677033465e-06, "logits/chosen": -1.9065221548080444, "logits/rejected": -1.3213990926742554, "logps/chosen": -802.2817993164062, "logps/rejected": -1280.823486328125, "loss": 0.1215, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.2689030170440674, "rewards/margins": 0.23530542850494385, "rewards/rejected": -0.5042084455490112, "step": 570 }, { "epoch": 0.15, "learning_rate": 4.954621338136399e-06, "logits/chosen": -1.796940803527832, "logits/rejected": -1.1448333263397217, "logps/chosen": -800.59619140625, "logps/rejected": -1366.584716796875, "loss": 0.0668, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.21505789458751678, "rewards/margins": 0.30636119842529297, "rewards/rejected": -0.5214190483093262, "step": 580 }, { "epoch": 0.16, "learning_rate": 4.95010131585597e-06, "logits/chosen": -1.6272817850112915, "logits/rejected": -0.8004865646362305, "logps/chosen": -684.5340576171875, "logps/rejected": -1158.3621826171875, "loss": 0.1052, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14372439682483673, "rewards/margins": 0.24541731178760529, "rewards/rejected": -0.389141708612442, "step": 590 }, { "epoch": 0.16, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -1.791548490524292, "logits/rejected": -1.2827670574188232, "logps/chosen": -447.36956787109375, "logps/rejected": -1077.491943359375, "loss": 0.0668, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08956549316644669, "rewards/margins": 0.26021477580070496, "rewards/rejected": -0.34978026151657104, "step": 600 }, { "epoch": 0.16, "learning_rate": 4.940424806108619e-06, "logits/chosen": -1.989205002784729, "logits/rejected": -1.3596338033676147, "logps/chosen": -683.2862548828125, "logps/rejected": -1159.7490234375, "loss": 0.1118, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.177694171667099, "rewards/margins": 0.19292449951171875, "rewards/rejected": -0.37061864137649536, "step": 610 }, { "epoch": 0.17, "learning_rate": 4.935269157073597e-06, "logits/chosen": -1.8252586126327515, "logits/rejected": -1.4199360609054565, "logps/chosen": -499.2095642089844, "logps/rejected": -1207.395263671875, "loss": 0.0594, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09857722371816635, "rewards/margins": 0.3041486144065857, "rewards/rejected": -0.40272584557533264, "step": 620 }, { "epoch": 0.17, "learning_rate": 4.9299025014463665e-06, "logits/chosen": -1.646740198135376, "logits/rejected": -0.9128702878952026, "logps/chosen": -546.8827514648438, "logps/rejected": -1260.8980712890625, "loss": 0.0639, "rewards/accuracies": 0.875, "rewards/chosen": -0.10494896024465561, "rewards/margins": 0.3301486670970917, "rewards/rejected": -0.4350976347923279, "step": 630 }, { "epoch": 0.17, "learning_rate": 4.924325304226745e-06, "logits/chosen": -1.8604828119277954, "logits/rejected": -1.0761983394622803, "logps/chosen": -693.2246704101562, "logps/rejected": -1271.934326171875, "loss": 0.0518, "rewards/accuracies": 0.875, "rewards/chosen": -0.19305351376533508, "rewards/margins": 0.2916993200778961, "rewards/rejected": -0.4847528040409088, "step": 640 }, { "epoch": 0.17, "learning_rate": 4.91853804865716e-06, "logits/chosen": -1.38680100440979, "logits/rejected": -0.9548083543777466, "logps/chosen": -604.3922729492188, "logps/rejected": -1208.889404296875, "loss": 0.09, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18794074654579163, "rewards/margins": 0.24451354146003723, "rewards/rejected": -0.43245425820350647, "step": 650 }, { "epoch": 0.18, "learning_rate": 4.912541236180779e-06, "logits/chosen": -1.663114309310913, "logits/rejected": -1.3176881074905396, "logps/chosen": -644.1502685546875, "logps/rejected": -1255.1209716796875, "loss": 0.107, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18079988658428192, "rewards/margins": 0.23088447749614716, "rewards/rejected": -0.4116843640804291, "step": 660 }, { "epoch": 0.18, "learning_rate": 4.9063353863980565e-06, "logits/chosen": -1.651368498802185, "logits/rejected": -1.0333479642868042, "logps/chosen": -622.9571533203125, "logps/rejected": -1196.442138671875, "loss": 0.1069, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13679654896259308, "rewards/margins": 0.2643422484397888, "rewards/rejected": -0.4011387825012207, "step": 670 }, { "epoch": 0.18, "learning_rate": 4.899921037021719e-06, "logits/chosen": -1.8946233987808228, "logits/rejected": -1.4026496410369873, "logps/chosen": -608.56005859375, "logps/rejected": -1102.85107421875, "loss": 0.1145, "rewards/accuracies": 0.75, "rewards/chosen": -0.18631377816200256, "rewards/margins": 0.20509126782417297, "rewards/rejected": -0.39140504598617554, "step": 680 }, { "epoch": 0.18, "learning_rate": 4.893298743830168e-06, "logits/chosen": -1.664089560508728, "logits/rejected": -1.1638383865356445, "logps/chosen": -681.1607055664062, "logps/rejected": -1325.0721435546875, "loss": 0.0936, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.213485985994339, "rewards/margins": 0.2793218493461609, "rewards/rejected": -0.4928078055381775, "step": 690 }, { "epoch": 0.19, "learning_rate": 4.88646908061933e-06, "logits/chosen": -1.7145121097564697, "logits/rejected": -1.0264990329742432, "logps/chosen": -640.181396484375, "logps/rejected": -1224.787841796875, "loss": 0.0784, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20638792216777802, "rewards/margins": 0.2602555751800537, "rewards/rejected": -0.46664348244667053, "step": 700 }, { "epoch": 0.19, "learning_rate": 4.879432639152935e-06, "logits/chosen": -1.5630238056182861, "logits/rejected": -0.8563889265060425, "logps/chosen": -688.0330810546875, "logps/rejected": -1333.184326171875, "loss": 0.087, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20561043918132782, "rewards/margins": 0.285735547542572, "rewards/rejected": -0.4913460314273834, "step": 710 }, { "epoch": 0.19, "learning_rate": 4.8721900291112415e-06, "logits/chosen": -1.7675155401229858, "logits/rejected": -1.3769733905792236, "logps/chosen": -606.588134765625, "logps/rejected": -1178.3841552734375, "loss": 0.0998, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.10641028732061386, "rewards/margins": 0.26885437965393066, "rewards/rejected": -0.3752647042274475, "step": 720 }, { "epoch": 0.19, "learning_rate": 4.864741878038218e-06, "logits/chosen": -1.5133328437805176, "logits/rejected": -1.1089714765548706, "logps/chosen": -560.0144653320312, "logps/rejected": -1235.3402099609375, "loss": 0.085, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12044046074151993, "rewards/margins": 0.26784801483154297, "rewards/rejected": -0.3882884979248047, "step": 730 }, { "epoch": 0.2, "learning_rate": 4.857088831287158e-06, "logits/chosen": -1.8833305835723877, "logits/rejected": -1.1213592290878296, "logps/chosen": -614.7459716796875, "logps/rejected": -1231.1275634765625, "loss": 0.0612, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14084835350513458, "rewards/margins": 0.25714007019996643, "rewards/rejected": -0.3979884088039398, "step": 740 }, { "epoch": 0.2, "learning_rate": 4.849231551964771e-06, "logits/chosen": -1.6487013101577759, "logits/rejected": -1.0574976205825806, "logps/chosen": -638.1517333984375, "logps/rejected": -1221.0208740234375, "loss": 0.0773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15978939831256866, "rewards/margins": 0.2666565477848053, "rewards/rejected": -0.4264459013938904, "step": 750 }, { "epoch": 0.2, "learning_rate": 4.841170720873723e-06, "logits/chosen": -1.6006218194961548, "logits/rejected": -1.0765124559402466, "logps/chosen": -648.8009643554688, "logps/rejected": -1193.5694580078125, "loss": 0.0903, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.20380504429340363, "rewards/margins": 0.2502138018608093, "rewards/rejected": -0.45401889085769653, "step": 760 }, { "epoch": 0.21, "learning_rate": 4.832907036453647e-06, "logits/chosen": -1.6672321557998657, "logits/rejected": -1.2181921005249023, "logps/chosen": -757.1592407226562, "logps/rejected": -1409.3095703125, "loss": 0.0809, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.24005849659442902, "rewards/margins": 0.2847335636615753, "rewards/rejected": -0.5247920155525208, "step": 770 }, { "epoch": 0.21, "learning_rate": 4.824441214720629e-06, "logits/chosen": -1.3765209913253784, "logits/rejected": -1.0029934644699097, "logps/chosen": -498.0054626464844, "logps/rejected": -1238.66943359375, "loss": 0.0667, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15500691533088684, "rewards/margins": 0.3080350160598755, "rewards/rejected": -0.4630419611930847, "step": 780 }, { "epoch": 0.21, "learning_rate": 4.815773989205165e-06, "logits/chosen": -1.7891244888305664, "logits/rejected": -1.2491934299468994, "logps/chosen": -556.8863525390625, "logps/rejected": -1332.5364990234375, "loss": 0.0617, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13201333582401276, "rewards/margins": 0.3241254687309265, "rewards/rejected": -0.4561387896537781, "step": 790 }, { "epoch": 0.21, "learning_rate": 4.806906110888606e-06, "logits/chosen": -1.6729551553726196, "logits/rejected": -1.187744379043579, "logps/chosen": -529.7684936523438, "logps/rejected": -1193.1453857421875, "loss": 0.0791, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1251310557126999, "rewards/margins": 0.2839392423629761, "rewards/rejected": -0.4090702533721924, "step": 800 }, { "epoch": 0.22, "learning_rate": 4.7978383481380865e-06, "logits/chosen": -1.6085048913955688, "logits/rejected": -1.2299137115478516, "logps/chosen": -579.7472534179688, "logps/rejected": -1075.484130859375, "loss": 0.0994, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15328553318977356, "rewards/margins": 0.2056526243686676, "rewards/rejected": -0.3589381277561188, "step": 810 }, { "epoch": 0.22, "learning_rate": 4.788571486639948e-06, "logits/chosen": -1.4437693357467651, "logits/rejected": -0.9531752467155457, "logps/chosen": -721.705078125, "logps/rejected": -1422.717529296875, "loss": 0.0707, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19354596734046936, "rewards/margins": 0.2650890648365021, "rewards/rejected": -0.45863503217697144, "step": 820 }, { "epoch": 0.22, "learning_rate": 4.779106329331665e-06, "logits/chosen": -1.7850446701049805, "logits/rejected": -1.1975328922271729, "logps/chosen": -639.4754028320312, "logps/rejected": -1183.6280517578125, "loss": 0.1136, "rewards/accuracies": 0.75, "rewards/chosen": -0.18970054388046265, "rewards/margins": 0.22527900338172913, "rewards/rejected": -0.4149795472621918, "step": 830 }, { "epoch": 0.22, "learning_rate": 4.769443696332272e-06, "logits/chosen": -1.6451295614242554, "logits/rejected": -0.9056906700134277, "logps/chosen": -704.8148193359375, "logps/rejected": -1420.8524169921875, "loss": 0.0473, "rewards/accuracies": 1.0, "rewards/chosen": -0.17515410482883453, "rewards/margins": 0.3173523545265198, "rewards/rejected": -0.4925064444541931, "step": 840 }, { "epoch": 0.23, "learning_rate": 4.759584424871302e-06, "logits/chosen": -1.5979700088500977, "logits/rejected": -1.1091766357421875, "logps/chosen": -686.29052734375, "logps/rejected": -1212.19580078125, "loss": 0.1128, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.15797309577465057, "rewards/margins": 0.21523161232471466, "rewards/rejected": -0.3732047379016876, "step": 850 }, { "epoch": 0.23, "learning_rate": 4.749529369216246e-06, "logits/chosen": -1.7736440896987915, "logits/rejected": -1.172586441040039, "logps/chosen": -660.5985107421875, "logps/rejected": -1275.8603515625, "loss": 0.0675, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.12020470947027206, "rewards/margins": 0.2714688181877136, "rewards/rejected": -0.39167362451553345, "step": 860 }, { "epoch": 0.23, "learning_rate": 4.7392794005985324e-06, "logits/chosen": -1.8436565399169922, "logits/rejected": -1.348578929901123, "logps/chosen": -471.02703857421875, "logps/rejected": -1074.3665771484375, "loss": 0.0923, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.061982929706573486, "rewards/margins": 0.23231768608093262, "rewards/rejected": -0.2943006157875061, "step": 870 }, { "epoch": 0.23, "learning_rate": 4.7288354071380415e-06, "logits/chosen": -1.6346375942230225, "logits/rejected": -1.053733468055725, "logps/chosen": -518.6777954101562, "logps/rejected": -1128.2366943359375, "loss": 0.0928, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11276010423898697, "rewards/margins": 0.24085621535778046, "rewards/rejected": -0.35361629724502563, "step": 880 }, { "epoch": 0.24, "learning_rate": 4.7181982937661485e-06, "logits/chosen": -1.5796483755111694, "logits/rejected": -0.9548759460449219, "logps/chosen": -694.7703857421875, "logps/rejected": -1379.165771484375, "loss": 0.0694, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21314816176891327, "rewards/margins": 0.28819718956947327, "rewards/rejected": -0.5013453364372253, "step": 890 }, { "epoch": 0.24, "learning_rate": 4.707368982147318e-06, "logits/chosen": -1.6242942810058594, "logits/rejected": -1.100235104560852, "logps/chosen": -656.1912841796875, "logps/rejected": -1320.0205078125, "loss": 0.0818, "rewards/accuracies": 0.875, "rewards/chosen": -0.20513398945331573, "rewards/margins": 0.2904512286186218, "rewards/rejected": -0.49558526277542114, "step": 900 }, { "epoch": 0.24, "learning_rate": 4.696348410599244e-06, "logits/chosen": -1.6266180276870728, "logits/rejected": -1.112475872039795, "logps/chosen": -615.5531005859375, "logps/rejected": -1176.023681640625, "loss": 0.1074, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21413663029670715, "rewards/margins": 0.24315333366394043, "rewards/rejected": -0.45728999376296997, "step": 910 }, { "epoch": 0.25, "learning_rate": 4.685137534011549e-06, "logits/chosen": -1.8083276748657227, "logits/rejected": -0.9349889755249023, "logps/chosen": -659.9114990234375, "logps/rejected": -1210.7021484375, "loss": 0.0749, "rewards/accuracies": 0.875, "rewards/chosen": -0.1867416799068451, "rewards/margins": 0.27049291133880615, "rewards/rejected": -0.45723456144332886, "step": 920 }, { "epoch": 0.25, "learning_rate": 4.673737323763048e-06, "logits/chosen": -1.5211584568023682, "logits/rejected": -0.8713824152946472, "logps/chosen": -557.5753784179688, "logps/rejected": -1095.0653076171875, "loss": 0.1147, "rewards/accuracies": 0.75, "rewards/chosen": -0.1781322956085205, "rewards/margins": 0.23243267834186554, "rewards/rejected": -0.41056495904922485, "step": 930 }, { "epoch": 0.25, "learning_rate": 4.662148767637578e-06, "logits/chosen": -1.6907579898834229, "logits/rejected": -0.92974454164505, "logps/chosen": -672.3154907226562, "logps/rejected": -1263.9849853515625, "loss": 0.0513, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.14491698145866394, "rewards/margins": 0.2988959848880768, "rewards/rejected": -0.44381293654441833, "step": 940 }, { "epoch": 0.25, "learning_rate": 4.650372869738415e-06, "logits/chosen": -1.713449239730835, "logits/rejected": -1.046890139579773, "logps/chosen": -656.3117065429688, "logps/rejected": -1259.781982421875, "loss": 0.0727, "rewards/accuracies": 0.875, "rewards/chosen": -0.1550595909357071, "rewards/margins": 0.27939510345458984, "rewards/rejected": -0.43445467948913574, "step": 950 }, { "epoch": 0.26, "learning_rate": 4.638410650401267e-06, "logits/chosen": -1.6114223003387451, "logits/rejected": -0.8841923475265503, "logps/chosen": -619.8568725585938, "logps/rejected": -1200.001953125, "loss": 0.092, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.18295657634735107, "rewards/margins": 0.27934443950653076, "rewards/rejected": -0.46230101585388184, "step": 960 }, { "epoch": 0.26, "learning_rate": 4.626263146105875e-06, "logits/chosen": -1.8829456567764282, "logits/rejected": -1.1874229907989502, "logps/chosen": -612.0319213867188, "logps/rejected": -1310.8118896484375, "loss": 0.0626, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15665313601493835, "rewards/margins": 0.32128262519836426, "rewards/rejected": -0.4779357314109802, "step": 970 }, { "epoch": 0.26, "learning_rate": 4.613931409386196e-06, "logits/chosen": -1.7148971557617188, "logits/rejected": -1.1706236600875854, "logps/chosen": -651.0546875, "logps/rejected": -1184.420654296875, "loss": 0.0975, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15911589562892914, "rewards/margins": 0.26245635747909546, "rewards/rejected": -0.4215722680091858, "step": 980 }, { "epoch": 0.26, "learning_rate": 4.601416508739211e-06, "logits/chosen": -1.7349941730499268, "logits/rejected": -1.0320522785186768, "logps/chosen": -641.8563842773438, "logps/rejected": -1191.799072265625, "loss": 0.0864, "rewards/accuracies": 0.875, "rewards/chosen": -0.12740136682987213, "rewards/margins": 0.27434709668159485, "rewards/rejected": -0.4017484784126282, "step": 990 }, { "epoch": 0.27, "learning_rate": 4.588719528532342e-06, "logits/chosen": -1.7186208963394165, "logits/rejected": -1.139203429222107, "logps/chosen": -608.3776245117188, "logps/rejected": -1194.766845703125, "loss": 0.0803, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12015128135681152, "rewards/margins": 0.28770390152931213, "rewards/rejected": -0.4078551232814789, "step": 1000 }, { "epoch": 0.27, "learning_rate": 4.575841568909494e-06, "logits/chosen": -1.9281619787216187, "logits/rejected": -1.021177887916565, "logps/chosen": -606.2479248046875, "logps/rejected": -1277.761474609375, "loss": 0.0629, "rewards/accuracies": 0.875, "rewards/chosen": -0.12076227366924286, "rewards/margins": 0.31744498014450073, "rewards/rejected": -0.4382072389125824, "step": 1010 }, { "epoch": 0.27, "learning_rate": 4.562783745695738e-06, "logits/chosen": -1.772539734840393, "logits/rejected": -1.1384470462799072, "logps/chosen": -584.2857666015625, "logps/rejected": -1120.206787109375, "loss": 0.0924, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10691050440073013, "rewards/margins": 0.25287091732025146, "rewards/rejected": -0.359781414270401, "step": 1020 }, { "epoch": 0.27, "learning_rate": 4.549547190300622e-06, "logits/chosen": -1.6101019382476807, "logits/rejected": -0.995293915271759, "logps/chosen": -663.1490478515625, "logps/rejected": -1269.482666015625, "loss": 0.0903, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19160196185112, "rewards/margins": 0.2655636966228485, "rewards/rejected": -0.4571656584739685, "step": 1030 }, { "epoch": 0.28, "learning_rate": 4.536133049620143e-06, "logits/chosen": -1.6123756170272827, "logits/rejected": -0.8887365460395813, "logps/chosen": -645.6787719726562, "logps/rejected": -1266.5006103515625, "loss": 0.0808, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13368944823741913, "rewards/margins": 0.28590840101242065, "rewards/rejected": -0.4195978045463562, "step": 1040 }, { "epoch": 0.28, "learning_rate": 4.522542485937369e-06, "logits/chosen": -1.7597821950912476, "logits/rejected": -1.0739284753799438, "logps/chosen": -670.746337890625, "logps/rejected": -1303.915771484375, "loss": 0.0671, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15525345504283905, "rewards/margins": 0.28055456280708313, "rewards/rejected": -0.435808002948761, "step": 1050 }, { "epoch": 0.28, "learning_rate": 4.508776676821739e-06, "logits/chosen": -1.6723902225494385, "logits/rejected": -1.1365679502487183, "logps/chosen": -659.1893310546875, "logps/rejected": -1257.1162109375, "loss": 0.0961, "rewards/accuracies": 0.875, "rewards/chosen": -0.18045826256275177, "rewards/margins": 0.28194642066955566, "rewards/rejected": -0.46240463852882385, "step": 1060 }, { "epoch": 0.29, "learning_rate": 4.494836815027022e-06, "logits/chosen": -1.674168348312378, "logits/rejected": -0.8119763135910034, "logps/chosen": -675.2777099609375, "logps/rejected": -1269.997802734375, "loss": 0.1022, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2055848389863968, "rewards/margins": 0.2812694013118744, "rewards/rejected": -0.48685422539711, "step": 1070 }, { "epoch": 0.29, "learning_rate": 4.4807241083879774e-06, "logits/chosen": -1.5376824140548706, "logits/rejected": -0.9127294421195984, "logps/chosen": -633.2178344726562, "logps/rejected": -1178.2454833984375, "loss": 0.0838, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.18428584933280945, "rewards/margins": 0.24417026340961456, "rewards/rejected": -0.4284561276435852, "step": 1080 }, { "epoch": 0.29, "learning_rate": 4.466439779715696e-06, "logits/chosen": -1.4871512651443481, "logits/rejected": -0.8935056924819946, "logps/chosen": -606.7117309570312, "logps/rejected": -1183.592041015625, "loss": 0.0976, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1557384431362152, "rewards/margins": 0.2602311670780182, "rewards/rejected": -0.4159695506095886, "step": 1090 }, { "epoch": 0.29, "learning_rate": 4.451985066691649e-06, "logits/chosen": -1.5563310384750366, "logits/rejected": -1.0307669639587402, "logps/chosen": -558.4649047851562, "logps/rejected": -1085.895751953125, "loss": 0.0918, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1063220277428627, "rewards/margins": 0.26596084237098694, "rewards/rejected": -0.37228289246559143, "step": 1100 }, { "epoch": 0.3, "learning_rate": 4.437361221760449e-06, "logits/chosen": -1.692957878112793, "logits/rejected": -1.1781022548675537, "logps/chosen": -514.2180786132812, "logps/rejected": -1231.850341796875, "loss": 0.0484, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.07757623493671417, "rewards/margins": 0.30450260639190674, "rewards/rejected": -0.3820788860321045, "step": 1110 }, { "epoch": 0.3, "learning_rate": 4.422569512021332e-06, "logits/chosen": -1.6798826456069946, "logits/rejected": -1.1389153003692627, "logps/chosen": -585.5572509765625, "logps/rejected": -1191.422607421875, "loss": 0.083, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06024783104658127, "rewards/margins": 0.2825511693954468, "rewards/rejected": -0.34279894828796387, "step": 1120 }, { "epoch": 0.3, "learning_rate": 4.407611219118363e-06, "logits/chosen": -1.574668526649475, "logits/rejected": -1.1745796203613281, "logps/chosen": -573.3482666015625, "logps/rejected": -1237.1324462890625, "loss": 0.0851, "rewards/accuracies": 0.875, "rewards/chosen": -0.14945417642593384, "rewards/margins": 0.26941633224487305, "rewards/rejected": -0.4188705384731293, "step": 1130 }, { "epoch": 0.3, "learning_rate": 4.3924876391293915e-06, "logits/chosen": -1.5106923580169678, "logits/rejected": -1.0384470224380493, "logps/chosen": -653.4393920898438, "logps/rejected": -1249.9561767578125, "loss": 0.0827, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21140387654304504, "rewards/margins": 0.26517191529273987, "rewards/rejected": -0.4765757620334625, "step": 1140 }, { "epoch": 0.31, "learning_rate": 4.377200082453748e-06, "logits/chosen": -1.553951621055603, "logits/rejected": -1.1727259159088135, "logps/chosen": -584.9124145507812, "logps/rejected": -1274.189453125, "loss": 0.0708, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17560866475105286, "rewards/margins": 0.3020893931388855, "rewards/rejected": -0.47769802808761597, "step": 1150 }, { "epoch": 0.31, "learning_rate": 4.361749873698707e-06, "logits/chosen": -1.6106698513031006, "logits/rejected": -0.9710136651992798, "logps/chosen": -683.4093017578125, "logps/rejected": -1351.5721435546875, "loss": 0.0537, "rewards/accuracies": 0.875, "rewards/chosen": -0.1945325881242752, "rewards/margins": 0.3134706914424896, "rewards/rejected": -0.5080032348632812, "step": 1160 }, { "epoch": 0.31, "learning_rate": 4.346138351564711e-06, "logits/chosen": -1.4298365116119385, "logits/rejected": -0.9186006784439087, "logps/chosen": -629.0538940429688, "logps/rejected": -1190.7537841796875, "loss": 0.1065, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1838223785161972, "rewards/margins": 0.25950607657432556, "rewards/rejected": -0.44332846999168396, "step": 1170 }, { "epoch": 0.31, "learning_rate": 4.330366868729376e-06, "logits/chosen": -1.5331902503967285, "logits/rejected": -0.8202164769172668, "logps/chosen": -719.1092529296875, "logps/rejected": -1225.5712890625, "loss": 0.1239, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.23043549060821533, "rewards/margins": 0.24460101127624512, "rewards/rejected": -0.47503647208213806, "step": 1180 }, { "epoch": 0.32, "learning_rate": 4.3144367917302964e-06, "logits/chosen": -1.5329627990722656, "logits/rejected": -1.0226593017578125, "logps/chosen": -558.7479248046875, "logps/rejected": -1248.3746337890625, "loss": 0.0732, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17867226898670197, "rewards/margins": 0.27305805683135986, "rewards/rejected": -0.451730340719223, "step": 1190 }, { "epoch": 0.32, "learning_rate": 4.2983495008466285e-06, "logits/chosen": -1.4446375370025635, "logits/rejected": -1.2019789218902588, "logps/chosen": -629.9727172851562, "logps/rejected": -1310.1802978515625, "loss": 0.0701, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.19203418493270874, "rewards/margins": 0.26955386996269226, "rewards/rejected": -0.461588054895401, "step": 1200 }, { "epoch": 0.32, "learning_rate": 4.2821063899795015e-06, "logits/chosen": -1.5882418155670166, "logits/rejected": -0.9619570970535278, "logps/chosen": -633.544921875, "logps/rejected": -1342.188720703125, "loss": 0.0702, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15442287921905518, "rewards/margins": 0.3408200740814209, "rewards/rejected": -0.49524298310279846, "step": 1210 }, { "epoch": 0.33, "learning_rate": 4.265708866531238e-06, "logits/chosen": -1.6361687183380127, "logits/rejected": -1.160875916481018, "logps/chosen": -604.972412109375, "logps/rejected": -1180.0421142578125, "loss": 0.0912, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15669772028923035, "rewards/margins": 0.2758365571498871, "rewards/rejected": -0.43253427743911743, "step": 1220 }, { "epoch": 0.33, "learning_rate": 4.249158351283414e-06, "logits/chosen": -1.4963274002075195, "logits/rejected": -1.0508579015731812, "logps/chosen": -670.7274169921875, "logps/rejected": -1422.38037109375, "loss": 0.0547, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.18853971362113953, "rewards/margins": 0.3481212556362152, "rewards/rejected": -0.5366610288619995, "step": 1230 }, { "epoch": 0.33, "learning_rate": 4.232456278273743e-06, "logits/chosen": -1.5074360370635986, "logits/rejected": -1.1942028999328613, "logps/chosen": -621.916015625, "logps/rejected": -1097.22705078125, "loss": 0.1119, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.20438826084136963, "rewards/margins": 0.19910171627998352, "rewards/rejected": -0.40348997712135315, "step": 1240 }, { "epoch": 0.33, "learning_rate": 4.215604094671835e-06, "logits/chosen": -1.7932571172714233, "logits/rejected": -1.156964659690857, "logps/chosen": -605.7149047851562, "logps/rejected": -1170.5318603515625, "loss": 0.0823, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13441742956638336, "rewards/margins": 0.26707369089126587, "rewards/rejected": -0.4014911651611328, "step": 1250 }, { "epoch": 0.34, "learning_rate": 4.198603260653792e-06, "logits/chosen": -1.5639320611953735, "logits/rejected": -1.011678695678711, "logps/chosen": -570.6861572265625, "logps/rejected": -1016.75390625, "loss": 0.1338, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.09839320182800293, "rewards/margins": 0.16072291135787964, "rewards/rejected": -0.25911611318588257, "step": 1260 }, { "epoch": 0.34, "learning_rate": 4.181455249275701e-06, "logits/chosen": -1.3490540981292725, "logits/rejected": -0.8460888862609863, "logps/chosen": -604.7997436523438, "logps/rejected": -1211.717529296875, "loss": 0.0948, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16084125638008118, "rewards/margins": 0.24433521926403046, "rewards/rejected": -0.40517646074295044, "step": 1270 }, { "epoch": 0.34, "learning_rate": 4.1641615463459926e-06, "logits/chosen": -1.4548208713531494, "logits/rejected": -0.9700274467468262, "logps/chosen": -662.6041259765625, "logps/rejected": -1517.4892578125, "loss": 0.043, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16144345700740814, "rewards/margins": 0.34403008222579956, "rewards/rejected": -0.5054734945297241, "step": 1280 }, { "epoch": 0.34, "learning_rate": 4.146723650296701e-06, "logits/chosen": -1.5642893314361572, "logits/rejected": -1.012499213218689, "logps/chosen": -522.3748168945312, "logps/rejected": -1114.373779296875, "loss": 0.0893, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11404868215322495, "rewards/margins": 0.27995288372039795, "rewards/rejected": -0.3940015733242035, "step": 1290 }, { "epoch": 0.35, "learning_rate": 4.129143072053639e-06, "logits/chosen": -1.8208191394805908, "logits/rejected": -1.0344860553741455, "logps/chosen": -717.9315185546875, "logps/rejected": -1288.3831787109375, "loss": 0.0814, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1353410929441452, "rewards/margins": 0.2894688546657562, "rewards/rejected": -0.42480993270874023, "step": 1300 }, { "epoch": 0.35, "learning_rate": 4.111421334905468e-06, "logits/chosen": -1.67376708984375, "logits/rejected": -0.7968643307685852, "logps/chosen": -653.73193359375, "logps/rejected": -1237.788330078125, "loss": 0.0799, "rewards/accuracies": 0.875, "rewards/chosen": -0.10021106898784637, "rewards/margins": 0.2965288758277893, "rewards/rejected": -0.3967399299144745, "step": 1310 }, { "epoch": 0.35, "learning_rate": 4.093559974371725e-06, "logits/chosen": -1.5515210628509521, "logits/rejected": -1.0218132734298706, "logps/chosen": -771.3948974609375, "logps/rejected": -1335.147705078125, "loss": 0.0912, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16722288727760315, "rewards/margins": 0.2642812132835388, "rewards/rejected": -0.4315040707588196, "step": 1320 }, { "epoch": 0.35, "learning_rate": 4.075560538069767e-06, "logits/chosen": -1.6814053058624268, "logits/rejected": -1.175160527229309, "logps/chosen": -561.907470703125, "logps/rejected": -1060.85498046875, "loss": 0.1004, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.06503216922283173, "rewards/margins": 0.25370270013809204, "rewards/rejected": -0.3187348544597626, "step": 1330 }, { "epoch": 0.36, "learning_rate": 4.05742458558068e-06, "logits/chosen": -1.8086637258529663, "logits/rejected": -1.3191715478897095, "logps/chosen": -529.0259399414062, "logps/rejected": -1079.9012451171875, "loss": 0.0936, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0950629860162735, "rewards/margins": 0.24811288714408875, "rewards/rejected": -0.34317582845687866, "step": 1340 }, { "epoch": 0.36, "learning_rate": 4.039153688314146e-06, "logits/chosen": -1.6986970901489258, "logits/rejected": -1.07126784324646, "logps/chosen": -537.1072387695312, "logps/rejected": -1259.658935546875, "loss": 0.0665, "rewards/accuracies": 0.875, "rewards/chosen": -0.07069256901741028, "rewards/margins": 0.353458434343338, "rewards/rejected": -0.4241510331630707, "step": 1350 }, { "epoch": 0.36, "learning_rate": 4.020749429372286e-06, "logits/chosen": -1.6649013757705688, "logits/rejected": -1.1139470338821411, "logps/chosen": -582.5765991210938, "logps/rejected": -1291.3966064453125, "loss": 0.0791, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08450852334499359, "rewards/margins": 0.30345186591148376, "rewards/rejected": -0.38796037435531616, "step": 1360 }, { "epoch": 0.37, "learning_rate": 4.002213403412492e-06, "logits/chosen": -1.6457083225250244, "logits/rejected": -1.1551355123519897, "logps/chosen": -443.95770263671875, "logps/rejected": -1125.097412109375, "loss": 0.0942, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.04620728641748428, "rewards/margins": 0.29895836114883423, "rewards/rejected": -0.3451656699180603, "step": 1370 }, { "epoch": 0.37, "learning_rate": 3.983547216509254e-06, "logits/chosen": -1.8475234508514404, "logits/rejected": -0.9538863897323608, "logps/chosen": -532.26171875, "logps/rejected": -1173.6441650390625, "loss": 0.0721, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07093639671802521, "rewards/margins": 0.30083587765693665, "rewards/rejected": -0.37177228927612305, "step": 1380 }, { "epoch": 0.37, "learning_rate": 3.964752486015001e-06, "logits/chosen": -1.702235460281372, "logits/rejected": -1.0087345838546753, "logps/chosen": -570.5701904296875, "logps/rejected": -1237.5167236328125, "loss": 0.0554, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10062988102436066, "rewards/margins": 0.29185742139816284, "rewards/rejected": -0.3924873471260071, "step": 1390 }, { "epoch": 0.37, "learning_rate": 3.945830840419966e-06, "logits/chosen": -1.8062200546264648, "logits/rejected": -1.2370173931121826, "logps/chosen": -563.0610961914062, "logps/rejected": -1202.4017333984375, "loss": 0.1014, "rewards/accuracies": 0.875, "rewards/chosen": -0.08197510987520218, "rewards/margins": 0.284410685300827, "rewards/rejected": -0.3663857579231262, "step": 1400 }, { "epoch": 0.38, "learning_rate": 3.92678391921108e-06, "logits/chosen": -1.685450792312622, "logits/rejected": -1.268028974533081, "logps/chosen": -474.2938537597656, "logps/rejected": -1090.5147705078125, "loss": 0.0983, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0882890596985817, "rewards/margins": 0.26188915967941284, "rewards/rejected": -0.35017821192741394, "step": 1410 }, { "epoch": 0.38, "learning_rate": 3.907613372729916e-06, "logits/chosen": -1.5756769180297852, "logits/rejected": -1.1073580980300903, "logps/chosen": -508.64654541015625, "logps/rejected": -1192.768798828125, "loss": 0.0905, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11147113889455795, "rewards/margins": 0.2684895396232605, "rewards/rejected": -0.37996068596839905, "step": 1420 }, { "epoch": 0.38, "learning_rate": 3.888320862029699e-06, "logits/chosen": -1.970336675643921, "logits/rejected": -1.1683611869812012, "logps/chosen": -608.2377319335938, "logps/rejected": -1272.260498046875, "loss": 0.091, "rewards/accuracies": 0.875, "rewards/chosen": -0.08651524782180786, "rewards/margins": 0.3126547038555145, "rewards/rejected": -0.399169921875, "step": 1430 }, { "epoch": 0.38, "learning_rate": 3.868908058731376e-06, "logits/chosen": -1.7017894983291626, "logits/rejected": -0.9617505073547363, "logps/chosen": -684.0576782226562, "logps/rejected": -1245.509765625, "loss": 0.0996, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09601768106222153, "rewards/margins": 0.29448553919792175, "rewards/rejected": -0.3905032277107239, "step": 1440 }, { "epoch": 0.39, "learning_rate": 3.849376644878783e-06, "logits/chosen": -1.536409616470337, "logits/rejected": -1.0880365371704102, "logps/chosen": -533.0728149414062, "logps/rejected": -1243.9488525390625, "loss": 0.0928, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.07541215419769287, "rewards/margins": 0.28547203540802, "rewards/rejected": -0.3608841896057129, "step": 1450 }, { "epoch": 0.39, "learning_rate": 3.829728312792895e-06, "logits/chosen": -1.8492475748062134, "logits/rejected": -1.256415605545044, "logps/chosen": -434.9085388183594, "logps/rejected": -1020.18798828125, "loss": 0.0619, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04833076149225235, "rewards/margins": 0.2557259202003479, "rewards/rejected": -0.30405664443969727, "step": 1460 }, { "epoch": 0.39, "learning_rate": 3.8099647649251984e-06, "logits/chosen": -1.5395265817642212, "logits/rejected": -1.087846040725708, "logps/chosen": -591.6812744140625, "logps/rejected": -1221.6868896484375, "loss": 0.0932, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10671044886112213, "rewards/margins": 0.2588750422000885, "rewards/rejected": -0.36558544635772705, "step": 1470 }, { "epoch": 0.39, "learning_rate": 3.790087713710179e-06, "logits/chosen": -1.5241081714630127, "logits/rejected": -0.8990601301193237, "logps/chosen": -703.971923828125, "logps/rejected": -1318.2982177734375, "loss": 0.0708, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16658341884613037, "rewards/margins": 0.270012766122818, "rewards/rejected": -0.436596155166626, "step": 1480 }, { "epoch": 0.4, "learning_rate": 3.770098881416945e-06, "logits/chosen": -1.489585041999817, "logits/rejected": -1.3004378080368042, "logps/chosen": -654.8056030273438, "logps/rejected": -1286.8641357421875, "loss": 0.0956, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.15169434249401093, "rewards/margins": 0.25045520067214966, "rewards/rejected": -0.40214958786964417, "step": 1490 }, { "epoch": 0.4, "learning_rate": 3.7500000000000005e-06, "logits/chosen": -1.6937404870986938, "logits/rejected": -1.0904152393341064, "logps/chosen": -521.1519775390625, "logps/rejected": -1150.9736328125, "loss": 0.0922, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09178796410560608, "rewards/margins": 0.2572135031223297, "rewards/rejected": -0.3490014672279358, "step": 1500 }, { "epoch": 0.4, "learning_rate": 3.7297928109491765e-06, "logits/chosen": -1.5384725332260132, "logits/rejected": -1.1151206493377686, "logps/chosen": -500.30340576171875, "logps/rejected": -1171.871337890625, "loss": 0.0648, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09594957530498505, "rewards/margins": 0.31415387988090515, "rewards/rejected": -0.4101034700870514, "step": 1510 }, { "epoch": 0.41, "learning_rate": 3.7094790651387414e-06, "logits/chosen": -1.750771164894104, "logits/rejected": -1.0950592756271362, "logps/chosen": -557.678466796875, "logps/rejected": -1188.5867919921875, "loss": 0.0778, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14062079787254333, "rewards/margins": 0.282297819852829, "rewards/rejected": -0.4229187071323395, "step": 1520 }, { "epoch": 0.41, "learning_rate": 3.689060522675689e-06, "logits/chosen": -1.5061167478561401, "logits/rejected": -1.0993683338165283, "logps/chosen": -719.0147705078125, "logps/rejected": -1315.07421875, "loss": 0.0885, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22822144627571106, "rewards/margins": 0.24384041130542755, "rewards/rejected": -0.47206181287765503, "step": 1530 }, { "epoch": 0.41, "learning_rate": 3.668538952747236e-06, "logits/chosen": -1.7315492630004883, "logits/rejected": -1.098783254623413, "logps/chosen": -702.6656494140625, "logps/rejected": -1301.657958984375, "loss": 0.0741, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.16757100820541382, "rewards/margins": 0.2810649871826172, "rewards/rejected": -0.4486359655857086, "step": 1540 }, { "epoch": 0.41, "learning_rate": 3.6479161334675294e-06, "logits/chosen": -1.6738449335098267, "logits/rejected": -1.0924396514892578, "logps/chosen": -584.9110107421875, "logps/rejected": -1123.946533203125, "loss": 0.0867, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.09195319563150406, "rewards/margins": 0.2546834349632263, "rewards/rejected": -0.3466365933418274, "step": 1550 }, { "epoch": 0.42, "learning_rate": 3.627193851723577e-06, "logits/chosen": -1.5191009044647217, "logits/rejected": -1.2671594619750977, "logps/chosen": -644.867431640625, "logps/rejected": -1194.0931396484375, "loss": 0.1019, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.133761465549469, "rewards/margins": 0.24754054844379425, "rewards/rejected": -0.38130199909210205, "step": 1560 }, { "epoch": 0.42, "learning_rate": 3.6063739030204226e-06, "logits/chosen": -1.7906509637832642, "logits/rejected": -1.3401691913604736, "logps/chosen": -475.7640075683594, "logps/rejected": -1105.63623046875, "loss": 0.0816, "rewards/accuracies": 0.75, "rewards/chosen": -0.06556513905525208, "rewards/margins": 0.2934816777706146, "rewards/rejected": -0.3590467870235443, "step": 1570 }, { "epoch": 0.42, "learning_rate": 3.5854580913255706e-06, "logits/chosen": -1.6345583200454712, "logits/rejected": -0.9686171412467957, "logps/chosen": -546.1632080078125, "logps/rejected": -1137.38232421875, "loss": 0.0774, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09245268255472183, "rewards/margins": 0.2836568355560303, "rewards/rejected": -0.3761095106601715, "step": 1580 }, { "epoch": 0.42, "learning_rate": 3.564448228912682e-06, "logits/chosen": -1.774083137512207, "logits/rejected": -1.0874212980270386, "logps/chosen": -627.0970458984375, "logps/rejected": -1292.3609619140625, "loss": 0.0548, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14228971302509308, "rewards/margins": 0.29837566614151, "rewards/rejected": -0.4406653940677643, "step": 1590 }, { "epoch": 0.43, "learning_rate": 3.543346136204545e-06, "logits/chosen": -1.492363691329956, "logits/rejected": -0.9753513336181641, "logps/chosen": -715.6846923828125, "logps/rejected": -1311.7313232421875, "loss": 0.1043, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19200663268566132, "rewards/margins": 0.2355610430240631, "rewards/rejected": -0.427567720413208, "step": 1600 }, { "epoch": 0.43, "learning_rate": 3.522153641615345e-06, "logits/chosen": -1.6796743869781494, "logits/rejected": -1.0669097900390625, "logps/chosen": -651.3643798828125, "logps/rejected": -1211.4693603515625, "loss": 0.0636, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12537550926208496, "rewards/margins": 0.29044246673583984, "rewards/rejected": -0.4158180356025696, "step": 1610 }, { "epoch": 0.43, "learning_rate": 3.5008725813922383e-06, "logits/chosen": -1.53739333152771, "logits/rejected": -1.009767770767212, "logps/chosen": -771.9571533203125, "logps/rejected": -1199.3785400390625, "loss": 0.0922, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17814821004867554, "rewards/margins": 0.24210956692695618, "rewards/rejected": -0.4202577471733093, "step": 1620 }, { "epoch": 0.43, "learning_rate": 3.4795047994562463e-06, "logits/chosen": -1.4524638652801514, "logits/rejected": -1.051928162574768, "logps/chosen": -690.6948852539062, "logps/rejected": -1226.161865234375, "loss": 0.0876, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18693287670612335, "rewards/margins": 0.25937145948410034, "rewards/rejected": -0.4463043212890625, "step": 1630 }, { "epoch": 0.44, "learning_rate": 3.458052147242494e-06, "logits/chosen": -1.8221120834350586, "logits/rejected": -1.1258487701416016, "logps/chosen": -630.0181274414062, "logps/rejected": -1044.5679931640625, "loss": 0.1046, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.09280923753976822, "rewards/margins": 0.23150058090686798, "rewards/rejected": -0.3243098556995392, "step": 1640 }, { "epoch": 0.44, "learning_rate": 3.436516483539781e-06, "logits/chosen": -1.8554388284683228, "logits/rejected": -1.3803541660308838, "logps/chosen": -633.9047241210938, "logps/rejected": -1229.6116943359375, "loss": 0.0778, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13664793968200684, "rewards/margins": 0.2661344110965729, "rewards/rejected": -0.4027823805809021, "step": 1650 }, { "epoch": 0.44, "learning_rate": 3.4148996743295305e-06, "logits/chosen": -1.9048486948013306, "logits/rejected": -1.058411955833435, "logps/chosen": -720.1047973632812, "logps/rejected": -1232.9039306640625, "loss": 0.0732, "rewards/accuracies": 0.875, "rewards/chosen": -0.12227736413478851, "rewards/margins": 0.2787812352180481, "rewards/rejected": -0.4010585844516754, "step": 1660 }, { "epoch": 0.45, "learning_rate": 3.3932035926241103e-06, "logits/chosen": -1.6380399465560913, "logits/rejected": -1.262209177017212, "logps/chosen": -482.85662841796875, "logps/rejected": -1295.7718505859375, "loss": 0.0462, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08021150529384613, "rewards/margins": 0.3560211658477783, "rewards/rejected": -0.43623265624046326, "step": 1670 }, { "epoch": 0.45, "learning_rate": 3.3714301183045382e-06, "logits/chosen": -1.595003366470337, "logits/rejected": -1.2377384901046753, "logps/chosen": -547.7657470703125, "logps/rejected": -1363.28125, "loss": 0.0617, "rewards/accuracies": 0.875, "rewards/chosen": -0.12761729955673218, "rewards/margins": 0.29965031147003174, "rewards/rejected": -0.4272676110267639, "step": 1680 }, { "epoch": 0.45, "learning_rate": 3.349581137957604e-06, "logits/chosen": -1.8540780544281006, "logits/rejected": -1.0956978797912598, "logps/chosen": -570.6678466796875, "logps/rejected": -1214.4659423828125, "loss": 0.0458, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.08135120570659637, "rewards/margins": 0.32362592220306396, "rewards/rejected": -0.40497714281082153, "step": 1690 }, { "epoch": 0.45, "learning_rate": 3.3276585447123957e-06, "logits/chosen": -1.4734654426574707, "logits/rejected": -1.1689575910568237, "logps/chosen": -471.4612731933594, "logps/rejected": -1102.623046875, "loss": 0.0775, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.06378956139087677, "rewards/margins": 0.2605707049369812, "rewards/rejected": -0.32436028122901917, "step": 1700 }, { "epoch": 0.46, "learning_rate": 3.3056642380762783e-06, "logits/chosen": -1.6254606246948242, "logits/rejected": -0.9264505505561829, "logps/chosen": -598.3116455078125, "logps/rejected": -1273.1226806640625, "loss": 0.0645, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.10875244438648224, "rewards/margins": 0.3382043242454529, "rewards/rejected": -0.4469567835330963, "step": 1710 }, { "epoch": 0.46, "learning_rate": 3.2836001237702993e-06, "logits/chosen": -1.7050098180770874, "logits/rejected": -1.1736676692962646, "logps/chosen": -605.455078125, "logps/rejected": -1361.8411865234375, "loss": 0.0707, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.09871726483106613, "rewards/margins": 0.318486750125885, "rewards/rejected": -0.41720399260520935, "step": 1720 }, { "epoch": 0.46, "learning_rate": 3.2614681135640696e-06, "logits/chosen": -1.666524887084961, "logits/rejected": -1.0706026554107666, "logps/chosen": -633.6300048828125, "logps/rejected": -1287.6302490234375, "loss": 0.0617, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.08346323668956757, "rewards/margins": 0.29316291213035583, "rewards/rejected": -0.376626193523407, "step": 1730 }, { "epoch": 0.46, "learning_rate": 3.2392701251101172e-06, "logits/chosen": -1.6395193338394165, "logits/rejected": -1.1502461433410645, "logps/chosen": -596.9259033203125, "logps/rejected": -1209.7242431640625, "loss": 0.0985, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13397231698036194, "rewards/margins": 0.2410469949245453, "rewards/rejected": -0.3750193417072296, "step": 1740 }, { "epoch": 0.47, "learning_rate": 3.217008081777726e-06, "logits/chosen": -1.6936429738998413, "logits/rejected": -1.1541160345077515, "logps/chosen": -687.9554443359375, "logps/rejected": -1373.260986328125, "loss": 0.0775, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19113728404045105, "rewards/margins": 0.3089086413383484, "rewards/rejected": -0.500045895576477, "step": 1750 }, { "epoch": 0.47, "learning_rate": 3.1946839124862873e-06, "logits/chosen": -1.499732255935669, "logits/rejected": -1.073246955871582, "logps/chosen": -632.1316528320312, "logps/rejected": -1340.867919921875, "loss": 0.055, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14432887732982635, "rewards/margins": 0.3110666275024414, "rewards/rejected": -0.45539551973342896, "step": 1760 }, { "epoch": 0.47, "learning_rate": 3.1722995515381644e-06, "logits/chosen": -1.6296335458755493, "logits/rejected": -0.9428736567497253, "logps/chosen": -613.3892822265625, "logps/rejected": -1241.071533203125, "loss": 0.0922, "rewards/accuracies": 0.875, "rewards/chosen": -0.1409432291984558, "rewards/margins": 0.28491806983947754, "rewards/rejected": -0.42586126923561096, "step": 1770 }, { "epoch": 0.47, "learning_rate": 3.149856938451094e-06, "logits/chosen": -1.9275051355361938, "logits/rejected": -0.9073979258537292, "logps/chosen": -685.4188842773438, "logps/rejected": -1194.86767578125, "loss": 0.0869, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15064950287342072, "rewards/margins": 0.25674349069595337, "rewards/rejected": -0.4073929786682129, "step": 1780 }, { "epoch": 0.48, "learning_rate": 3.127358017790132e-06, "logits/chosen": -1.5247657299041748, "logits/rejected": -0.8251350522041321, "logps/chosen": -658.7485961914062, "logps/rejected": -1302.989013671875, "loss": 0.0565, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15536119043827057, "rewards/margins": 0.306736022233963, "rewards/rejected": -0.46209725737571716, "step": 1790 }, { "epoch": 0.48, "learning_rate": 3.1048047389991693e-06, "logits/chosen": -1.5092096328735352, "logits/rejected": -0.8953585624694824, "logps/chosen": -563.54638671875, "logps/rejected": -1277.382080078125, "loss": 0.0635, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12220227718353271, "rewards/margins": 0.32350245118141174, "rewards/rejected": -0.44570469856262207, "step": 1800 }, { "epoch": 0.48, "learning_rate": 3.082199056232015e-06, "logits/chosen": -1.555259108543396, "logits/rejected": -1.2513010501861572, "logps/chosen": -576.5824584960938, "logps/rejected": -1193.6871337890625, "loss": 0.1014, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14716719090938568, "rewards/margins": 0.2701808214187622, "rewards/rejected": -0.4173479974269867, "step": 1810 }, { "epoch": 0.49, "learning_rate": 3.059542928183079e-06, "logits/chosen": -1.2370166778564453, "logits/rejected": -0.862767219543457, "logps/chosen": -630.3663330078125, "logps/rejected": -1336.5428466796875, "loss": 0.0544, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1622113287448883, "rewards/margins": 0.32105860114097595, "rewards/rejected": -0.48326998949050903, "step": 1820 }, { "epoch": 0.49, "learning_rate": 3.0368383179176584e-06, "logits/chosen": -1.385969877243042, "logits/rejected": -1.0176479816436768, "logps/chosen": -719.9691772460938, "logps/rejected": -1240.3895263671875, "loss": 0.1078, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.19016364216804504, "rewards/margins": 0.2406831681728363, "rewards/rejected": -0.43084684014320374, "step": 1830 }, { "epoch": 0.49, "learning_rate": 3.0140871927018466e-06, "logits/chosen": -1.6181665658950806, "logits/rejected": -1.2087422609329224, "logps/chosen": -704.0010986328125, "logps/rejected": -1382.859619140625, "loss": 0.0552, "rewards/accuracies": 0.875, "rewards/chosen": -0.19687633216381073, "rewards/margins": 0.28999894857406616, "rewards/rejected": -0.4868752360343933, "step": 1840 }, { "epoch": 0.49, "learning_rate": 2.9912915238320755e-06, "logits/chosen": -1.4043359756469727, "logits/rejected": -1.1318188905715942, "logps/chosen": -555.2379760742188, "logps/rejected": -1354.341796875, "loss": 0.0577, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16941991448402405, "rewards/margins": 0.33636412024497986, "rewards/rejected": -0.5057840347290039, "step": 1850 }, { "epoch": 0.5, "learning_rate": 2.9684532864643123e-06, "logits/chosen": -1.7377849817276, "logits/rejected": -1.16031014919281, "logps/chosen": -689.5167236328125, "logps/rejected": -1164.914794921875, "loss": 0.1023, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21689483523368835, "rewards/margins": 0.22367532551288605, "rewards/rejected": -0.440570205450058, "step": 1860 }, { "epoch": 0.5, "learning_rate": 2.945574459442917e-06, "logits/chosen": -1.6468284130096436, "logits/rejected": -1.190332055091858, "logps/chosen": -639.2093505859375, "logps/rejected": -1291.997802734375, "loss": 0.0592, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1794385313987732, "rewards/margins": 0.31004798412323, "rewards/rejected": -0.4894865155220032, "step": 1870 }, { "epoch": 0.5, "learning_rate": 2.922657025129185e-06, "logits/chosen": -1.5556018352508545, "logits/rejected": -1.0828845500946045, "logps/chosen": -718.0322875976562, "logps/rejected": -1318.2115478515625, "loss": 0.0983, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.27457764744758606, "rewards/margins": 0.23540663719177246, "rewards/rejected": -0.5099843144416809, "step": 1880 }, { "epoch": 0.5, "learning_rate": 2.8997029692295875e-06, "logits/chosen": -1.66329026222229, "logits/rejected": -1.0540707111358643, "logps/chosen": -647.1304931640625, "logps/rejected": -1394.345703125, "loss": 0.0644, "rewards/accuracies": 0.875, "rewards/chosen": -0.16953504085540771, "rewards/margins": 0.3324953019618988, "rewards/rejected": -0.5020303726196289, "step": 1890 }, { "epoch": 0.51, "learning_rate": 2.876714280623708e-06, "logits/chosen": -1.3675628900527954, "logits/rejected": -0.910226047039032, "logps/chosen": -656.5228881835938, "logps/rejected": -1325.244140625, "loss": 0.0585, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18668699264526367, "rewards/margins": 0.3124103248119354, "rewards/rejected": -0.4990972876548767, "step": 1900 }, { "epoch": 0.51, "learning_rate": 2.8536929511919227e-06, "logits/chosen": -1.5113346576690674, "logits/rejected": -0.917544960975647, "logps/chosen": -621.9835815429688, "logps/rejected": -1159.2508544921875, "loss": 0.0598, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17902755737304688, "rewards/margins": 0.25234436988830566, "rewards/rejected": -0.43137192726135254, "step": 1910 }, { "epoch": 0.51, "learning_rate": 2.8306409756428067e-06, "logits/chosen": -1.56842041015625, "logits/rejected": -0.9856363534927368, "logps/chosen": -565.5784301757812, "logps/rejected": -1276.6402587890625, "loss": 0.0583, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.158840149641037, "rewards/margins": 0.29064539074897766, "rewards/rejected": -0.4494854807853699, "step": 1920 }, { "epoch": 0.51, "learning_rate": 2.807560351340302e-06, "logits/chosen": -1.5603129863739014, "logits/rejected": -1.0442321300506592, "logps/chosen": -594.1895751953125, "logps/rejected": -1224.7371826171875, "loss": 0.0977, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17820130288600922, "rewards/margins": 0.2835688889026642, "rewards/rejected": -0.4617701470851898, "step": 1930 }, { "epoch": 0.52, "learning_rate": 2.7844530781306544e-06, "logits/chosen": -1.436647653579712, "logits/rejected": -0.8475536108016968, "logps/chosen": -703.9013061523438, "logps/rejected": -1465.6171875, "loss": 0.0612, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22263050079345703, "rewards/margins": 0.3488808274269104, "rewards/rejected": -0.5715113282203674, "step": 1940 }, { "epoch": 0.52, "learning_rate": 2.761321158169134e-06, "logits/chosen": -1.6537196636199951, "logits/rejected": -1.1116868257522583, "logps/chosen": -717.2203369140625, "logps/rejected": -1459.016357421875, "loss": 0.0578, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19066976010799408, "rewards/margins": 0.35232046246528625, "rewards/rejected": -0.5429901480674744, "step": 1950 }, { "epoch": 0.52, "learning_rate": 2.738166595746554e-06, "logits/chosen": -1.709814429283142, "logits/rejected": -0.9134047627449036, "logps/chosen": -705.0501708984375, "logps/rejected": -1441.9495849609375, "loss": 0.0426, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.172596737742424, "rewards/margins": 0.34574776887893677, "rewards/rejected": -0.518344521522522, "step": 1960 }, { "epoch": 0.53, "learning_rate": 2.7149913971156105e-06, "logits/chosen": -1.6743762493133545, "logits/rejected": -1.134342908859253, "logps/chosen": -682.0380859375, "logps/rejected": -1288.8563232421875, "loss": 0.0783, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.21863889694213867, "rewards/margins": 0.30050721764564514, "rewards/rejected": -0.5191460847854614, "step": 1970 }, { "epoch": 0.53, "learning_rate": 2.6917975703170466e-06, "logits/chosen": -1.384412407875061, "logits/rejected": -0.9328360557556152, "logps/chosen": -756.0421752929688, "logps/rejected": -1321.971923828125, "loss": 0.0844, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.2494826316833496, "rewards/margins": 0.2922648787498474, "rewards/rejected": -0.541747510433197, "step": 1980 }, { "epoch": 0.53, "learning_rate": 2.668587125005663e-06, "logits/chosen": -1.573412299156189, "logits/rejected": -0.9849382638931274, "logps/chosen": -653.5593872070312, "logps/rejected": -1219.503173828125, "loss": 0.0674, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22859041392803192, "rewards/margins": 0.2729692757129669, "rewards/rejected": -0.5015596747398376, "step": 1990 }, { "epoch": 0.53, "learning_rate": 2.6453620722761897e-06, "logits/chosen": -1.6217581033706665, "logits/rejected": -0.829363226890564, "logps/chosen": -687.14794921875, "logps/rejected": -1367.03515625, "loss": 0.0515, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.2579112648963928, "rewards/margins": 0.3186108469963074, "rewards/rejected": -0.5765220522880554, "step": 2000 }, { "epoch": 0.54, "learning_rate": 2.6221244244890336e-06, "logits/chosen": -1.4822559356689453, "logits/rejected": -0.8854954838752747, "logps/chosen": -803.3583984375, "logps/rejected": -1373.9749755859375, "loss": 0.0823, "rewards/accuracies": 0.875, "rewards/chosen": -0.2859230637550354, "rewards/margins": 0.2797744870185852, "rewards/rejected": -0.5656975507736206, "step": 2010 }, { "epoch": 0.54, "learning_rate": 2.5988761950959133e-06, "logits/chosen": -1.405379056930542, "logits/rejected": -0.980889618396759, "logps/chosen": -653.2147216796875, "logps/rejected": -1412.9344482421875, "loss": 0.0489, "rewards/accuracies": 0.875, "rewards/chosen": -0.19988027215003967, "rewards/margins": 0.3394158184528351, "rewards/rejected": -0.5392960906028748, "step": 2020 }, { "epoch": 0.54, "learning_rate": 2.575619398465402e-06, "logits/chosen": -1.5412838459014893, "logits/rejected": -0.9163180589675903, "logps/chosen": -539.6043090820312, "logps/rejected": -1107.312255859375, "loss": 0.0771, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1628035455942154, "rewards/margins": 0.2565108835697174, "rewards/rejected": -0.419314444065094, "step": 2030 }, { "epoch": 0.54, "learning_rate": 2.5523560497083927e-06, "logits/chosen": -1.482742428779602, "logits/rejected": -1.0331344604492188, "logps/chosen": -751.7644653320312, "logps/rejected": -1281.9117431640625, "loss": 0.0847, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.21723918616771698, "rewards/margins": 0.26914697885513306, "rewards/rejected": -0.48638615012168884, "step": 2040 }, { "epoch": 0.55, "learning_rate": 2.5290881645034932e-06, "logits/chosen": -1.6871871948242188, "logits/rejected": -0.9969019889831543, "logps/chosen": -712.149169921875, "logps/rejected": -1383.6920166015625, "loss": 0.0813, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.2281813621520996, "rewards/margins": 0.3213092088699341, "rewards/rejected": -0.5494905710220337, "step": 2050 }, { "epoch": 0.55, "learning_rate": 2.5058177589223766e-06, "logits/chosen": -1.4727680683135986, "logits/rejected": -1.048156976699829, "logps/chosen": -657.0037231445312, "logps/rejected": -1363.59814453125, "loss": 0.0745, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2256421595811844, "rewards/margins": 0.31263765692710876, "rewards/rejected": -0.5382798314094543, "step": 2060 }, { "epoch": 0.55, "learning_rate": 2.482546849255096e-06, "logits/chosen": -1.5077242851257324, "logits/rejected": -0.8984912633895874, "logps/chosen": -722.7063598632812, "logps/rejected": -1281.8148193359375, "loss": 0.0741, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.23482950031757355, "rewards/margins": 0.291878879070282, "rewards/rejected": -0.5267083644866943, "step": 2070 }, { "epoch": 0.55, "learning_rate": 2.4592774518353858e-06, "logits/chosen": -1.6423757076263428, "logits/rejected": -0.9732748866081238, "logps/chosen": -625.0535888671875, "logps/rejected": -1348.394287109375, "loss": 0.0562, "rewards/accuracies": 0.875, "rewards/chosen": -0.19351795315742493, "rewards/margins": 0.3293379247188568, "rewards/rejected": -0.5228558778762817, "step": 2080 }, { "epoch": 0.56, "learning_rate": 2.436011582865945e-06, "logits/chosen": -1.7477773427963257, "logits/rejected": -1.0844796895980835, "logps/chosen": -786.804443359375, "logps/rejected": -1510.250732421875, "loss": 0.0565, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.24655666947364807, "rewards/margins": 0.34261250495910645, "rewards/rejected": -0.5891691446304321, "step": 2090 }, { "epoch": 0.56, "learning_rate": 2.4127512582437486e-06, "logits/chosen": -1.1730453968048096, "logits/rejected": -1.0297441482543945, "logps/chosen": -610.2826538085938, "logps/rejected": -1334.4935302734375, "loss": 0.075, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.21944193542003632, "rewards/margins": 0.28680866956710815, "rewards/rejected": -0.5062506198883057, "step": 2100 }, { "epoch": 0.56, "learning_rate": 2.3894984933853734e-06, "logits/chosen": -1.6909167766571045, "logits/rejected": -1.2207863330841064, "logps/chosen": -736.7841796875, "logps/rejected": -1436.5059814453125, "loss": 0.0715, "rewards/accuracies": 0.875, "rewards/chosen": -0.2114332616329193, "rewards/margins": 0.30715304613113403, "rewards/rejected": -0.518586277961731, "step": 2110 }, { "epoch": 0.57, "learning_rate": 2.366255303052377e-06, "logits/chosen": -1.7539796829223633, "logits/rejected": -1.0083894729614258, "logps/chosen": -702.0449829101562, "logps/rejected": -1368.424072265625, "loss": 0.0588, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17488941550254822, "rewards/margins": 0.3086225390434265, "rewards/rejected": -0.4835119843482971, "step": 2120 }, { "epoch": 0.57, "learning_rate": 2.3430237011767166e-06, "logits/chosen": -1.783517837524414, "logits/rejected": -0.9859122037887573, "logps/chosen": -631.6397705078125, "logps/rejected": -1418.53857421875, "loss": 0.0386, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.13998395204544067, "rewards/margins": 0.3510037064552307, "rewards/rejected": -0.490987628698349, "step": 2130 }, { "epoch": 0.57, "learning_rate": 2.319805700686257e-06, "logits/chosen": -1.5874156951904297, "logits/rejected": -1.0402486324310303, "logps/chosen": -647.2991943359375, "logps/rejected": -1264.6134033203125, "loss": 0.0663, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15432874858379364, "rewards/margins": 0.29131144285202026, "rewards/rejected": -0.4456402361392975, "step": 2140 }, { "epoch": 0.57, "learning_rate": 2.296603313330355e-06, "logits/chosen": -1.6740115880966187, "logits/rejected": -1.049090027809143, "logps/chosen": -650.9720458984375, "logps/rejected": -1069.4693603515625, "loss": 0.1228, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1620035022497177, "rewards/margins": 0.2206643521785736, "rewards/rejected": -0.3826678991317749, "step": 2150 }, { "epoch": 0.58, "learning_rate": 2.2734185495055503e-06, "logits/chosen": -1.4526993036270142, "logits/rejected": -1.0033073425292969, "logps/chosen": -464.20257568359375, "logps/rejected": -1249.659423828125, "loss": 0.0672, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11725373566150665, "rewards/margins": 0.32836082577705383, "rewards/rejected": -0.4456145763397217, "step": 2160 }, { "epoch": 0.58, "learning_rate": 2.250253418081373e-06, "logits/chosen": -1.6637146472930908, "logits/rejected": -1.0717463493347168, "logps/chosen": -662.2470703125, "logps/rejected": -1223.6536865234375, "loss": 0.0843, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1466962993144989, "rewards/margins": 0.2738208770751953, "rewards/rejected": -0.4205172061920166, "step": 2170 }, { "epoch": 0.58, "learning_rate": 2.22710992622628e-06, "logits/chosen": -1.7047134637832642, "logits/rejected": -0.7803869843482971, "logps/chosen": -681.6057739257812, "logps/rejected": -1309.5865478515625, "loss": 0.057, "rewards/accuracies": 0.875, "rewards/chosen": -0.15952114760875702, "rewards/margins": 0.309850811958313, "rewards/rejected": -0.4693719744682312, "step": 2180 }, { "epoch": 0.58, "learning_rate": 2.2039900792337477e-06, "logits/chosen": -1.5044130086898804, "logits/rejected": -0.787899374961853, "logps/chosen": -632.465576171875, "logps/rejected": -1316.4517822265625, "loss": 0.0515, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18237130343914032, "rewards/margins": 0.32328280806541443, "rewards/rejected": -0.5056540369987488, "step": 2190 }, { "epoch": 0.59, "learning_rate": 2.1808958803485134e-06, "logits/chosen": -1.6702648401260376, "logits/rejected": -0.990136444568634, "logps/chosen": -455.564208984375, "logps/rejected": -1168.5498046875, "loss": 0.0599, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12168209254741669, "rewards/margins": 0.3131280541419983, "rewards/rejected": -0.434810072183609, "step": 2200 }, { "epoch": 0.59, "learning_rate": 2.157829330593008e-06, "logits/chosen": -1.6868867874145508, "logits/rejected": -0.9523127675056458, "logps/chosen": -733.1408081054688, "logps/rejected": -1337.63818359375, "loss": 0.0715, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.21179482340812683, "rewards/margins": 0.30666384100914, "rewards/rejected": -0.5184586644172668, "step": 2210 }, { "epoch": 0.59, "learning_rate": 2.134792428593971e-06, "logits/chosen": -1.6914653778076172, "logits/rejected": -1.2282737493515015, "logps/chosen": -614.1090087890625, "logps/rejected": -1088.0286865234375, "loss": 0.1059, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1738576740026474, "rewards/margins": 0.21695072948932648, "rewards/rejected": -0.3908084034919739, "step": 2220 }, { "epoch": 0.59, "learning_rate": 2.1117871704092818e-06, "logits/chosen": -1.6489229202270508, "logits/rejected": -0.9123425483703613, "logps/chosen": -679.4093017578125, "logps/rejected": -1336.1690673828125, "loss": 0.0813, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1528438925743103, "rewards/margins": 0.3031119108200073, "rewards/rejected": -0.4559558033943176, "step": 2230 }, { "epoch": 0.6, "learning_rate": 2.0888155493550027e-06, "logits/chosen": -1.658093810081482, "logits/rejected": -1.2635515928268433, "logps/chosen": -563.3775634765625, "logps/rejected": -1217.8135986328125, "loss": 0.0686, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12988874316215515, "rewards/margins": 0.31268182396888733, "rewards/rejected": -0.44257062673568726, "step": 2240 }, { "epoch": 0.6, "learning_rate": 2.0658795558326745e-06, "logits/chosen": -1.5542079210281372, "logits/rejected": -0.9705120921134949, "logps/chosen": -641.7821655273438, "logps/rejected": -1335.944091796875, "loss": 0.0586, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1482844054698944, "rewards/margins": 0.3350493013858795, "rewards/rejected": -0.48333367705345154, "step": 2250 }, { "epoch": 0.6, "learning_rate": 2.0429811771568468e-06, "logits/chosen": -1.6625282764434814, "logits/rejected": -0.9575098752975464, "logps/chosen": -673.6990966796875, "logps/rejected": -1288.328369140625, "loss": 0.0408, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1419149488210678, "rewards/margins": 0.2989000678062439, "rewards/rejected": -0.4408150315284729, "step": 2260 }, { "epoch": 0.61, "learning_rate": 2.0201223973828917e-06, "logits/chosen": -1.7629448175430298, "logits/rejected": -1.1014915704727173, "logps/chosen": -609.2691650390625, "logps/rejected": -1323.014404296875, "loss": 0.0751, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12834301590919495, "rewards/margins": 0.31924059987068176, "rewards/rejected": -0.4475835859775543, "step": 2270 }, { "epoch": 0.61, "learning_rate": 1.997305197135089e-06, "logits/chosen": -1.5687427520751953, "logits/rejected": -0.9063301086425781, "logps/chosen": -674.9013671875, "logps/rejected": -1229.5550537109375, "loss": 0.0873, "rewards/accuracies": 0.875, "rewards/chosen": -0.12829624116420746, "rewards/margins": 0.24914589524269104, "rewards/rejected": -0.3774421215057373, "step": 2280 }, { "epoch": 0.61, "learning_rate": 1.9745315534350157e-06, "logits/chosen": -1.6142327785491943, "logits/rejected": -1.2041738033294678, "logps/chosen": -527.4874877929688, "logps/rejected": -1276.542724609375, "loss": 0.0724, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.11953765153884888, "rewards/margins": 0.3008989989757538, "rewards/rejected": -0.42043668031692505, "step": 2290 }, { "epoch": 0.61, "learning_rate": 1.9518034395302413e-06, "logits/chosen": -1.584478497505188, "logits/rejected": -0.8746352195739746, "logps/chosen": -507.75128173828125, "logps/rejected": -1205.164794921875, "loss": 0.0654, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.12713788449764252, "rewards/margins": 0.3278200626373291, "rewards/rejected": -0.4549580216407776, "step": 2300 }, { "epoch": 0.62, "learning_rate": 1.9291228247233607e-06, "logits/chosen": -1.452108383178711, "logits/rejected": -0.9603986740112305, "logps/chosen": -750.004638671875, "logps/rejected": -1415.546875, "loss": 0.0642, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.22353100776672363, "rewards/margins": 0.3011419475078583, "rewards/rejected": -0.5246729254722595, "step": 2310 }, { "epoch": 0.62, "learning_rate": 1.9064916742013515e-06, "logits/chosen": -1.5349475145339966, "logits/rejected": -0.8534983396530151, "logps/chosen": -691.6678466796875, "logps/rejected": -1337.88671875, "loss": 0.0564, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19090059399604797, "rewards/margins": 0.3062039911746979, "rewards/rejected": -0.49710458517074585, "step": 2320 }, { "epoch": 0.62, "learning_rate": 1.883911948865306e-06, "logits/chosen": -1.6528412103652954, "logits/rejected": -1.132430076599121, "logps/chosen": -626.3675537109375, "logps/rejected": -1352.5123291015625, "loss": 0.0702, "rewards/accuracies": 0.875, "rewards/chosen": -0.1741064339876175, "rewards/margins": 0.31939199566841125, "rewards/rejected": -0.49349841475486755, "step": 2330 }, { "epoch": 0.62, "learning_rate": 1.8613856051605242e-06, "logits/chosen": -1.8713703155517578, "logits/rejected": -1.2219023704528809, "logps/chosen": -479.8798828125, "logps/rejected": -1113.4814453125, "loss": 0.0658, "rewards/accuracies": 0.75, "rewards/chosen": -0.10413724184036255, "rewards/margins": 0.322685569524765, "rewards/rejected": -0.42682284116744995, "step": 2340 }, { "epoch": 0.63, "learning_rate": 1.8389145949069953e-06, "logits/chosen": -1.5313884019851685, "logits/rejected": -1.042482614517212, "logps/chosen": -540.1700439453125, "logps/rejected": -1220.364990234375, "loss": 0.0499, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13704395294189453, "rewards/margins": 0.29186248779296875, "rewards/rejected": -0.4289064407348633, "step": 2350 }, { "epoch": 0.63, "learning_rate": 1.816500865130279e-06, "logits/chosen": -1.495516300201416, "logits/rejected": -1.306896448135376, "logps/chosen": -562.7477416992188, "logps/rejected": -1185.534912109375, "loss": 0.1051, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14714586734771729, "rewards/margins": 0.26410627365112305, "rewards/rejected": -0.4112521708011627, "step": 2360 }, { "epoch": 0.63, "learning_rate": 1.7941463578928088e-06, "logits/chosen": -1.6695563793182373, "logits/rejected": -0.9721006155014038, "logps/chosen": -601.9846801757812, "logps/rejected": -1367.9365234375, "loss": 0.0603, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1521584689617157, "rewards/margins": 0.35412856936454773, "rewards/rejected": -0.5062869787216187, "step": 2370 }, { "epoch": 0.63, "learning_rate": 1.7718530101256115e-06, "logits/chosen": -1.6784439086914062, "logits/rejected": -1.0285909175872803, "logps/chosen": -690.5152587890625, "logps/rejected": -1287.234130859375, "loss": 0.083, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.17937234044075012, "rewards/margins": 0.27991387248039246, "rewards/rejected": -0.4592861533164978, "step": 2380 }, { "epoch": 0.64, "learning_rate": 1.7496227534604859e-06, "logits/chosen": -1.7998254299163818, "logits/rejected": -1.1072345972061157, "logps/chosen": -648.5956420898438, "logps/rejected": -1205.2398681640625, "loss": 0.0657, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1724121868610382, "rewards/margins": 0.29253289103507996, "rewards/rejected": -0.46494507789611816, "step": 2390 }, { "epoch": 0.64, "learning_rate": 1.7274575140626318e-06, "logits/chosen": -1.6273527145385742, "logits/rejected": -1.183037519454956, "logps/chosen": -620.6856689453125, "logps/rejected": -1215.266845703125, "loss": 0.0901, "rewards/accuracies": 0.75, "rewards/chosen": -0.17143599689006805, "rewards/margins": 0.2781030535697937, "rewards/rejected": -0.44953903555870056, "step": 2400 }, { "epoch": 0.64, "learning_rate": 1.7053592124637557e-06, "logits/chosen": -1.8067405223846436, "logits/rejected": -1.2858269214630127, "logps/chosen": -659.517822265625, "logps/rejected": -1250.8367919921875, "loss": 0.0949, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17256858944892883, "rewards/margins": 0.2582642436027527, "rewards/rejected": -0.43083280324935913, "step": 2410 }, { "epoch": 0.65, "learning_rate": 1.6833297633956647e-06, "logits/chosen": -1.443641185760498, "logits/rejected": -1.0768052339553833, "logps/chosen": -559.7144775390625, "logps/rejected": -1292.55859375, "loss": 0.0641, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.11608362197875977, "rewards/margins": 0.34168118238449097, "rewards/rejected": -0.45776480436325073, "step": 2420 }, { "epoch": 0.65, "learning_rate": 1.661371075624363e-06, "logits/chosen": -1.6469463109970093, "logits/rejected": -1.0461533069610596, "logps/chosen": -603.0146484375, "logps/rejected": -1207.7100830078125, "loss": 0.07, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.10989055782556534, "rewards/margins": 0.2882770299911499, "rewards/rejected": -0.39816758036613464, "step": 2430 }, { "epoch": 0.65, "learning_rate": 1.6394850517846621e-06, "logits/chosen": -1.6739981174468994, "logits/rejected": -1.2458163499832153, "logps/chosen": -687.8931884765625, "logps/rejected": -1236.5269775390625, "loss": 0.0977, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.148878276348114, "rewards/margins": 0.2945893704891205, "rewards/rejected": -0.4434676170349121, "step": 2440 }, { "epoch": 0.65, "learning_rate": 1.6176735882153284e-06, "logits/chosen": -1.6518714427947998, "logits/rejected": -0.9769365191459656, "logps/chosen": -496.71661376953125, "logps/rejected": -1090.7801513671875, "loss": 0.081, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.12074653804302216, "rewards/margins": 0.26993122696876526, "rewards/rejected": -0.3906777799129486, "step": 2450 }, { "epoch": 0.66, "learning_rate": 1.5959385747947697e-06, "logits/chosen": -1.5575945377349854, "logits/rejected": -1.1600332260131836, "logps/chosen": -541.9180908203125, "logps/rejected": -1182.4320068359375, "loss": 0.0797, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16873383522033691, "rewards/margins": 0.2602773606777191, "rewards/rejected": -0.42901119589805603, "step": 2460 }, { "epoch": 0.66, "learning_rate": 1.5742818947772875e-06, "logits/chosen": -1.6753427982330322, "logits/rejected": -0.9124045372009277, "logps/chosen": -678.1805419921875, "logps/rejected": -1311.1553955078125, "loss": 0.0578, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17256096005439758, "rewards/margins": 0.3086041510105133, "rewards/rejected": -0.4811651110649109, "step": 2470 }, { "epoch": 0.66, "learning_rate": 1.552705424629898e-06, "logits/chosen": -1.5884169340133667, "logits/rejected": -0.9173520803451538, "logps/chosen": -705.35009765625, "logps/rejected": -1284.0096435546875, "loss": 0.0683, "rewards/accuracies": 0.875, "rewards/chosen": -0.16716155409812927, "rewards/margins": 0.28985482454299927, "rewards/rejected": -0.4570164084434509, "step": 2480 }, { "epoch": 0.66, "learning_rate": 1.5312110338697427e-06, "logits/chosen": -1.533384084701538, "logits/rejected": -1.1220409870147705, "logps/chosen": -613.6298828125, "logps/rejected": -1082.766357421875, "loss": 0.1039, "rewards/accuracies": 0.75, "rewards/chosen": -0.17001187801361084, "rewards/margins": 0.20055198669433594, "rewards/rejected": -0.3705638647079468, "step": 2490 }, { "epoch": 0.67, "learning_rate": 1.509800584902108e-06, "logits/chosen": -1.4955171346664429, "logits/rejected": -0.8390592336654663, "logps/chosen": -611.5145263671875, "logps/rejected": -1197.4530029296875, "loss": 0.0597, "rewards/accuracies": 0.875, "rewards/chosen": -0.12041016668081284, "rewards/margins": 0.2896363139152527, "rewards/rejected": -0.4100464880466461, "step": 2500 }, { "epoch": 0.67, "learning_rate": 1.4884759328590476e-06, "logits/chosen": -1.914384126663208, "logits/rejected": -1.2168171405792236, "logps/chosen": -600.9666137695312, "logps/rejected": -1124.942626953125, "loss": 0.0888, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11489248275756836, "rewards/margins": 0.26221710443496704, "rewards/rejected": -0.3771095871925354, "step": 2510 }, { "epoch": 0.67, "learning_rate": 1.467238925438646e-06, "logits/chosen": -1.738040566444397, "logits/rejected": -0.8857895731925964, "logps/chosen": -679.0386962890625, "logps/rejected": -1410.933837890625, "loss": 0.0558, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16262464225292206, "rewards/margins": 0.339643657207489, "rewards/rejected": -0.5022683143615723, "step": 2520 }, { "epoch": 0.67, "learning_rate": 1.446091402744923e-06, "logits/chosen": -1.3981173038482666, "logits/rejected": -0.8543848991394043, "logps/chosen": -590.76513671875, "logps/rejected": -1206.71240234375, "loss": 0.0583, "rewards/accuracies": 0.875, "rewards/chosen": -0.15895266830921173, "rewards/margins": 0.2688734829425812, "rewards/rejected": -0.4278261661529541, "step": 2530 }, { "epoch": 0.68, "learning_rate": 1.4250351971283937e-06, "logits/chosen": -1.6803276538848877, "logits/rejected": -1.0602971315383911, "logps/chosen": -679.5074462890625, "logps/rejected": -1229.011962890625, "loss": 0.0871, "rewards/accuracies": 0.875, "rewards/chosen": -0.19001373648643494, "rewards/margins": 0.26307451725006104, "rewards/rejected": -0.45308828353881836, "step": 2540 }, { "epoch": 0.68, "learning_rate": 1.4040721330273063e-06, "logits/chosen": -1.5601686239242554, "logits/rejected": -0.9407947659492493, "logps/chosen": -616.7720336914062, "logps/rejected": -1353.0308837890625, "loss": 0.0501, "rewards/accuracies": 0.875, "rewards/chosen": -0.16183273494243622, "rewards/margins": 0.3334823250770569, "rewards/rejected": -0.4953150749206543, "step": 2550 }, { "epoch": 0.68, "learning_rate": 1.3832040268095589e-06, "logits/chosen": -1.52444589138031, "logits/rejected": -0.7717920541763306, "logps/chosen": -666.0655517578125, "logps/rejected": -1330.985595703125, "loss": 0.0645, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1790360063314438, "rewards/margins": 0.3166579306125641, "rewards/rejected": -0.49569398164749146, "step": 2560 }, { "epoch": 0.69, "learning_rate": 1.362432686615316e-06, "logits/chosen": -1.511685848236084, "logits/rejected": -1.2630524635314941, "logps/chosen": -488.4208068847656, "logps/rejected": -1233.045166015625, "loss": 0.0768, "rewards/accuracies": 0.875, "rewards/chosen": -0.12492994964122772, "rewards/margins": 0.30061617493629456, "rewards/rejected": -0.4255460798740387, "step": 2570 }, { "epoch": 0.69, "learning_rate": 1.3417599122003464e-06, "logits/chosen": -1.624969720840454, "logits/rejected": -0.8495733141899109, "logps/chosen": -633.3519897460938, "logps/rejected": -1253.824951171875, "loss": 0.0632, "rewards/accuracies": 0.875, "rewards/chosen": -0.18349340558052063, "rewards/margins": 0.2837851643562317, "rewards/rejected": -0.4672785699367523, "step": 2580 }, { "epoch": 0.69, "learning_rate": 1.3211874947800747e-06, "logits/chosen": -1.4943301677703857, "logits/rejected": -1.0596059560775757, "logps/chosen": -581.8253784179688, "logps/rejected": -1226.0418701171875, "loss": 0.061, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1564798802137375, "rewards/margins": 0.30731362104415894, "rewards/rejected": -0.4637935161590576, "step": 2590 }, { "epoch": 0.69, "learning_rate": 1.3007172168743854e-06, "logits/chosen": -1.4282827377319336, "logits/rejected": -1.0970618724822998, "logps/chosen": -622.9385375976562, "logps/rejected": -1321.9234619140625, "loss": 0.0718, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.18767477571964264, "rewards/margins": 0.32170000672340393, "rewards/rejected": -0.5093748569488525, "step": 2600 }, { "epoch": 0.7, "learning_rate": 1.280350852153168e-06, "logits/chosen": -1.4260807037353516, "logits/rejected": -0.8716268539428711, "logps/chosen": -704.5125732421875, "logps/rejected": -1340.787353515625, "loss": 0.0737, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.22893400490283966, "rewards/margins": 0.28585508465766907, "rewards/rejected": -0.5147891044616699, "step": 2610 }, { "epoch": 0.7, "learning_rate": 1.260090165282645e-06, "logits/chosen": -1.5481798648834229, "logits/rejected": -1.048405647277832, "logps/chosen": -643.6202392578125, "logps/rejected": -1242.826416015625, "loss": 0.0806, "rewards/accuracies": 0.875, "rewards/chosen": -0.2178986817598343, "rewards/margins": 0.2719099521636963, "rewards/rejected": -0.4898086488246918, "step": 2620 }, { "epoch": 0.7, "learning_rate": 1.2399369117724582e-06, "logits/chosen": -1.650472640991211, "logits/rejected": -0.9782799482345581, "logps/chosen": -707.5630493164062, "logps/rejected": -1298.345458984375, "loss": 0.0753, "rewards/accuracies": 0.875, "rewards/chosen": -0.16964152455329895, "rewards/margins": 0.2897980809211731, "rewards/rejected": -0.45943960547447205, "step": 2630 }, { "epoch": 0.7, "learning_rate": 1.2198928378235717e-06, "logits/chosen": -1.6851444244384766, "logits/rejected": -0.92662113904953, "logps/chosen": -619.037841796875, "logps/rejected": -1287.4412841796875, "loss": 0.0522, "rewards/accuracies": 0.875, "rewards/chosen": -0.13626527786254883, "rewards/margins": 0.3253551125526428, "rewards/rejected": -0.46162039041519165, "step": 2640 }, { "epoch": 0.71, "learning_rate": 1.1999596801769617e-06, "logits/chosen": -1.8352587223052979, "logits/rejected": -1.171582818031311, "logps/chosen": -616.7615966796875, "logps/rejected": -1335.4404296875, "loss": 0.049, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1257292926311493, "rewards/margins": 0.32309678196907043, "rewards/rejected": -0.4488261342048645, "step": 2650 }, { "epoch": 0.71, "learning_rate": 1.1801391659631423e-06, "logits/chosen": -1.6399614810943604, "logits/rejected": -1.0923852920532227, "logps/chosen": -635.6760864257812, "logps/rejected": -1325.152099609375, "loss": 0.0653, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16147668659687042, "rewards/margins": 0.3237590193748474, "rewards/rejected": -0.48523569107055664, "step": 2660 }, { "epoch": 0.71, "learning_rate": 1.160433012552508e-06, "logits/chosen": -1.5931923389434814, "logits/rejected": -1.2261526584625244, "logps/chosen": -603.7941284179688, "logps/rejected": -1107.156982421875, "loss": 0.1027, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.14636698365211487, "rewards/margins": 0.22241589426994324, "rewards/rejected": -0.3687829077243805, "step": 2670 }, { "epoch": 0.71, "learning_rate": 1.1408429274065418e-06, "logits/chosen": -1.481740117073059, "logits/rejected": -1.1304103136062622, "logps/chosen": -606.6870727539062, "logps/rejected": -1188.147216796875, "loss": 0.0763, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1330951750278473, "rewards/margins": 0.2732751667499542, "rewards/rejected": -0.4063703119754791, "step": 2680 }, { "epoch": 0.72, "learning_rate": 1.1213706079298566e-06, "logits/chosen": -1.6204341650009155, "logits/rejected": -1.0541805028915405, "logps/chosen": -554.864990234375, "logps/rejected": -1167.746826171875, "loss": 0.0617, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11585495620965958, "rewards/margins": 0.30027204751968384, "rewards/rejected": -0.4161270260810852, "step": 2690 }, { "epoch": 0.72, "learning_rate": 1.1020177413231334e-06, "logits/chosen": -1.6089589595794678, "logits/rejected": -1.0122666358947754, "logps/chosen": -685.2811889648438, "logps/rejected": -1211.408935546875, "loss": 0.0745, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16823996603488922, "rewards/margins": 0.2715032994747162, "rewards/rejected": -0.4397433400154114, "step": 2700 }, { "epoch": 0.72, "learning_rate": 1.0827860044369226e-06, "logits/chosen": -1.7601515054702759, "logits/rejected": -1.007900595664978, "logps/chosen": -704.5645751953125, "logps/rejected": -1274.189453125, "loss": 0.0737, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17606690526008606, "rewards/margins": 0.30847105383872986, "rewards/rejected": -0.48453792929649353, "step": 2710 }, { "epoch": 0.73, "learning_rate": 1.06367706362636e-06, "logits/chosen": -1.5721882581710815, "logits/rejected": -1.0723769664764404, "logps/chosen": -665.5977783203125, "logps/rejected": -1278.2569580078125, "loss": 0.0799, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18703334033489227, "rewards/margins": 0.2840210795402527, "rewards/rejected": -0.47105446457862854, "step": 2720 }, { "epoch": 0.73, "learning_rate": 1.0446925746067768e-06, "logits/chosen": -1.6236509084701538, "logits/rejected": -1.0335582494735718, "logps/chosen": -735.1541137695312, "logps/rejected": -1341.9544677734375, "loss": 0.0686, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1772700995206833, "rewards/margins": 0.31488290429115295, "rewards/rejected": -0.49215301871299744, "step": 2730 }, { "epoch": 0.73, "learning_rate": 1.0258341823102418e-06, "logits/chosen": -1.6834526062011719, "logits/rejected": -1.0599212646484375, "logps/chosen": -650.5093994140625, "logps/rejected": -1288.187744140625, "loss": 0.0623, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14554272592067719, "rewards/margins": 0.3072082996368408, "rewards/rejected": -0.4527510106563568, "step": 2740 }, { "epoch": 0.73, "learning_rate": 1.0071035207430352e-06, "logits/chosen": -1.7902591228485107, "logits/rejected": -1.289333462715149, "logps/chosen": -654.4462890625, "logps/rejected": -1320.8380126953125, "loss": 0.0673, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.16027647256851196, "rewards/margins": 0.28841906785964966, "rewards/rejected": -0.44869551062583923, "step": 2750 }, { "epoch": 0.74, "learning_rate": 9.88502212844063e-07, "logits/chosen": -1.528241515159607, "logits/rejected": -1.140417218208313, "logps/chosen": -584.2600708007812, "logps/rejected": -1204.6785888671875, "loss": 0.0822, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.12145284563302994, "rewards/margins": 0.2952510714530945, "rewards/rejected": -0.4167039394378662, "step": 2760 }, { "epoch": 0.74, "learning_rate": 9.700318703442437e-07, "logits/chosen": -1.6165825128555298, "logits/rejected": -1.2629040479660034, "logps/chosen": -692.536865234375, "logps/rejected": -1356.869384765625, "loss": 0.0861, "rewards/accuracies": 0.875, "rewards/chosen": -0.16812773048877716, "rewards/margins": 0.2822396159172058, "rewards/rejected": -0.4503673017024994, "step": 2770 }, { "epoch": 0.74, "learning_rate": 9.516940936268504e-07, "logits/chosen": -1.4886250495910645, "logits/rejected": -0.8492704629898071, "logps/chosen": -567.9114379882812, "logps/rejected": -1223.29931640625, "loss": 0.0907, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15036030113697052, "rewards/margins": 0.3028646409511566, "rewards/rejected": -0.45322495698928833, "step": 2780 }, { "epoch": 0.74, "learning_rate": 9.334904715888496e-07, "logits/chosen": -1.6458749771118164, "logits/rejected": -1.2635785341262817, "logps/chosen": -567.9539794921875, "logps/rejected": -1301.9921875, "loss": 0.0872, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1548917442560196, "rewards/margins": 0.31059738993644714, "rewards/rejected": -0.46548905968666077, "step": 2790 }, { "epoch": 0.75, "learning_rate": 9.154225815032242e-07, "logits/chosen": -1.5953037738800049, "logits/rejected": -1.0339276790618896, "logps/chosen": -624.7017822265625, "logps/rejected": -1236.400634765625, "loss": 0.0716, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.17240996658802032, "rewards/margins": 0.28697705268859863, "rewards/rejected": -0.45938700437545776, "step": 2800 }, { "epoch": 0.75, "learning_rate": 8.974919888823164e-07, "logits/chosen": -1.4576761722564697, "logits/rejected": -1.1351208686828613, "logps/chosen": -624.9636840820312, "logps/rejected": -1355.374755859375, "loss": 0.0514, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.19172172248363495, "rewards/margins": 0.3108896315097809, "rewards/rejected": -0.5026113390922546, "step": 2810 }, { "epoch": 0.75, "learning_rate": 8.797002473421729e-07, "logits/chosen": -1.5604914426803589, "logits/rejected": -0.9569327235221863, "logps/chosen": -538.5881958007812, "logps/rejected": -1219.4466552734375, "loss": 0.0683, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15165778994560242, "rewards/margins": 0.28516024351119995, "rewards/rejected": -0.4368179738521576, "step": 2820 }, { "epoch": 0.75, "learning_rate": 8.620488984679378e-07, "logits/chosen": -1.852301836013794, "logits/rejected": -1.1759949922561646, "logps/chosen": -659.7407836914062, "logps/rejected": -1306.58203125, "loss": 0.066, "rewards/accuracies": 0.875, "rewards/chosen": -0.16961313784122467, "rewards/margins": 0.3043574392795563, "rewards/rejected": -0.47397056221961975, "step": 2830 }, { "epoch": 0.76, "learning_rate": 8.445394716802754e-07, "logits/chosen": -1.6400655508041382, "logits/rejected": -1.1373300552368164, "logps/chosen": -735.5645751953125, "logps/rejected": -1331.341064453125, "loss": 0.0866, "rewards/accuracies": 0.875, "rewards/chosen": -0.1878480315208435, "rewards/margins": 0.27262082695961, "rewards/rejected": -0.4604688286781311, "step": 2840 }, { "epoch": 0.76, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.4932782649993896, "logits/rejected": -0.9225826263427734, "logps/chosen": -594.31982421875, "logps/rejected": -1214.7799072265625, "loss": 0.0686, "rewards/accuracies": 0.875, "rewards/chosen": -0.1466977298259735, "rewards/margins": 0.30178767442703247, "rewards/rejected": -0.4484853744506836, "step": 2850 }, { "epoch": 0.76, "learning_rate": 8.099524404308948e-07, "logits/chosen": -1.5658118724822998, "logits/rejected": -0.9562853574752808, "logps/chosen": -645.6381225585938, "logps/rejected": -1404.0450439453125, "loss": 0.0439, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.15565729141235352, "rewards/margins": 0.3485789895057678, "rewards/rejected": -0.5042362809181213, "step": 2860 }, { "epoch": 0.77, "learning_rate": 7.928778328007918e-07, "logits/chosen": -1.867717981338501, "logits/rejected": -0.9512616991996765, "logps/chosen": -674.2090454101562, "logps/rejected": -1350.9642333984375, "loss": 0.0573, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13902226090431213, "rewards/margins": 0.3449219763278961, "rewards/rejected": -0.48394423723220825, "step": 2870 }, { "epoch": 0.77, "learning_rate": 7.759511406608255e-07, "logits/chosen": -1.5874018669128418, "logits/rejected": -1.1192893981933594, "logps/chosen": -586.0441284179688, "logps/rejected": -1302.760986328125, "loss": 0.0788, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13455680012702942, "rewards/margins": 0.2815057635307312, "rewards/rejected": -0.416062593460083, "step": 2880 }, { "epoch": 0.77, "learning_rate": 7.591738306429769e-07, "logits/chosen": -1.5606791973114014, "logits/rejected": -1.0470914840698242, "logps/chosen": -744.95263671875, "logps/rejected": -1285.8818359375, "loss": 0.0792, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18279020488262177, "rewards/margins": 0.2564167082309723, "rewards/rejected": -0.43920689821243286, "step": 2890 }, { "epoch": 0.77, "learning_rate": 7.425473564358457e-07, "logits/chosen": -1.6006968021392822, "logits/rejected": -1.0099936723709106, "logps/chosen": -720.3036499023438, "logps/rejected": -1374.542236328125, "loss": 0.0453, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18247629702091217, "rewards/margins": 0.3243308663368225, "rewards/rejected": -0.5068072080612183, "step": 2900 }, { "epoch": 0.78, "learning_rate": 7.260731586586983e-07, "logits/chosen": -1.6800225973129272, "logits/rejected": -0.9249428510665894, "logps/chosen": -472.9820251464844, "logps/rejected": -1168.8466796875, "loss": 0.0482, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1155017763376236, "rewards/margins": 0.33621880412101746, "rewards/rejected": -0.45172062516212463, "step": 2910 }, { "epoch": 0.78, "learning_rate": 7.097526647366379e-07, "logits/chosen": -1.7830703258514404, "logits/rejected": -1.3318331241607666, "logps/chosen": -588.818359375, "logps/rejected": -1168.9097900390625, "loss": 0.0765, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1334531605243683, "rewards/margins": 0.28530946373939514, "rewards/rejected": -0.41876259446144104, "step": 2920 }, { "epoch": 0.78, "learning_rate": 6.935872887769299e-07, "logits/chosen": -1.593977928161621, "logits/rejected": -1.0323081016540527, "logps/chosen": -722.8409423828125, "logps/rejected": -1217.73681640625, "loss": 0.0888, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1773270219564438, "rewards/margins": 0.2691357731819153, "rewards/rejected": -0.4464627802371979, "step": 2930 }, { "epoch": 0.78, "learning_rate": 6.775784314464717e-07, "logits/chosen": -1.6172115802764893, "logits/rejected": -1.0769383907318115, "logps/chosen": -590.12939453125, "logps/rejected": -1277.718505859375, "loss": 0.0699, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1619381308555603, "rewards/margins": 0.30093225836753845, "rewards/rejected": -0.46287041902542114, "step": 2940 }, { "epoch": 0.79, "learning_rate": 6.617274798504286e-07, "logits/chosen": -1.748478651046753, "logits/rejected": -0.9003183245658875, "logps/chosen": -748.8468017578125, "logps/rejected": -1387.6318359375, "loss": 0.0683, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.17095907032489777, "rewards/margins": 0.3251902461051941, "rewards/rejected": -0.49614930152893066, "step": 2950 }, { "epoch": 0.79, "learning_rate": 6.460358074120518e-07, "logits/chosen": -1.5600354671478271, "logits/rejected": -1.1617937088012695, "logps/chosen": -635.3890380859375, "logps/rejected": -1242.238525390625, "loss": 0.0767, "rewards/accuracies": 0.875, "rewards/chosen": -0.16152323782444, "rewards/margins": 0.26770225167274475, "rewards/rejected": -0.42922544479370117, "step": 2960 }, { "epoch": 0.79, "learning_rate": 6.305047737536707e-07, "logits/chosen": -1.6008501052856445, "logits/rejected": -1.1396461725234985, "logps/chosen": -604.7008666992188, "logps/rejected": -1217.7623291015625, "loss": 0.0703, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1483077108860016, "rewards/margins": 0.29934391379356384, "rewards/rejected": -0.44765162467956543, "step": 2970 }, { "epoch": 0.79, "learning_rate": 6.151357245788917e-07, "logits/chosen": -1.6701332330703735, "logits/rejected": -0.9330341219902039, "logps/chosen": -628.5372924804688, "logps/rejected": -1414.3253173828125, "loss": 0.0577, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.154875710606575, "rewards/margins": 0.35905200242996216, "rewards/rejected": -0.513927698135376, "step": 2980 }, { "epoch": 0.8, "learning_rate": 5.999299915559956e-07, "logits/chosen": -1.4873206615447998, "logits/rejected": -0.9139396548271179, "logps/chosen": -534.3379516601562, "logps/rejected": -1303.0247802734375, "loss": 0.0602, "rewards/accuracies": 0.875, "rewards/chosen": -0.1485694944858551, "rewards/margins": 0.3279011845588684, "rewards/rejected": -0.4764706492424011, "step": 2990 }, { "epoch": 0.8, "learning_rate": 5.848888922025553e-07, "logits/chosen": -1.5426976680755615, "logits/rejected": -1.0285556316375732, "logps/chosen": -576.2730102539062, "logps/rejected": -1104.698974609375, "loss": 0.0913, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1463908702135086, "rewards/margins": 0.2579698860645294, "rewards/rejected": -0.40436071157455444, "step": 3000 }, { "epoch": 0.8, "learning_rate": 5.700137297712749e-07, "logits/chosen": -1.5507534742355347, "logits/rejected": -1.1646429300308228, "logps/chosen": -678.5272216796875, "logps/rejected": -1307.9720458984375, "loss": 0.0558, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.19729594886302948, "rewards/margins": 0.2910037338733673, "rewards/rejected": -0.488299697637558, "step": 3010 }, { "epoch": 0.81, "learning_rate": 5.553057931370729e-07, "logits/chosen": -1.7067492008209229, "logits/rejected": -1.3142516613006592, "logps/chosen": -630.6482543945312, "logps/rejected": -1377.3018798828125, "loss": 0.053, "rewards/accuracies": 0.875, "rewards/chosen": -0.1730308085680008, "rewards/margins": 0.33525392413139343, "rewards/rejected": -0.5082847476005554, "step": 3020 }, { "epoch": 0.81, "learning_rate": 5.407663566854008e-07, "logits/chosen": -1.5402835607528687, "logits/rejected": -0.9547150731086731, "logps/chosen": -717.29833984375, "logps/rejected": -1315.943115234375, "loss": 0.0595, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18772569298744202, "rewards/margins": 0.29532501101493835, "rewards/rejected": -0.48305076360702515, "step": 3030 }, { "epoch": 0.81, "learning_rate": 5.263966802018275e-07, "logits/chosen": -1.8520797491073608, "logits/rejected": -1.0401822328567505, "logps/chosen": -717.2581176757812, "logps/rejected": -1253.8289794921875, "loss": 0.0639, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.16455858945846558, "rewards/margins": 0.29715651273727417, "rewards/rejected": -0.4617151618003845, "step": 3040 }, { "epoch": 0.81, "learning_rate": 5.121980087628802e-07, "logits/chosen": -1.7092409133911133, "logits/rejected": -1.2182010412216187, "logps/chosen": -586.7152099609375, "logps/rejected": -1214.601318359375, "loss": 0.051, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.137488454580307, "rewards/margins": 0.3142798840999603, "rewards/rejected": -0.45176833868026733, "step": 3050 }, { "epoch": 0.82, "learning_rate": 4.981715726281666e-07, "logits/chosen": -1.5101208686828613, "logits/rejected": -0.8277866244316101, "logps/chosen": -569.005615234375, "logps/rejected": -1231.107177734375, "loss": 0.0616, "rewards/accuracies": 0.875, "rewards/chosen": -0.13659389317035675, "rewards/margins": 0.32126671075820923, "rewards/rejected": -0.45786052942276, "step": 3060 }, { "epoch": 0.82, "learning_rate": 4.843185871337722e-07, "logits/chosen": -1.4731261730194092, "logits/rejected": -1.047473669052124, "logps/chosen": -693.8050537109375, "logps/rejected": -1432.60009765625, "loss": 0.0881, "rewards/accuracies": 0.875, "rewards/chosen": -0.19450101256370544, "rewards/margins": 0.3186071217060089, "rewards/rejected": -0.5131081342697144, "step": 3070 }, { "epoch": 0.82, "learning_rate": 4.706402525869633e-07, "logits/chosen": -1.3473070859909058, "logits/rejected": -1.2000024318695068, "logps/chosen": -589.5697021484375, "logps/rejected": -1317.3638916015625, "loss": 0.072, "rewards/accuracies": 0.875, "rewards/chosen": -0.16462837159633636, "rewards/margins": 0.2750544846057892, "rewards/rejected": -0.43968287110328674, "step": 3080 }, { "epoch": 0.82, "learning_rate": 4.5713775416217884e-07, "logits/chosen": -1.6565711498260498, "logits/rejected": -1.1259280443191528, "logps/chosen": -551.4010009765625, "logps/rejected": -1178.7850341796875, "loss": 0.0716, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.13680130243301392, "rewards/margins": 0.30534160137176514, "rewards/rejected": -0.44214290380477905, "step": 3090 }, { "epoch": 0.83, "learning_rate": 4.438122617983442e-07, "logits/chosen": -1.5101524591445923, "logits/rejected": -0.8424856066703796, "logps/chosen": -591.2506103515625, "logps/rejected": -1273.046142578125, "loss": 0.0566, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13511498272418976, "rewards/margins": 0.33536994457244873, "rewards/rejected": -0.4704849123954773, "step": 3100 }, { "epoch": 0.83, "learning_rate": 4.3066493009749853e-07, "logits/chosen": -1.7471688985824585, "logits/rejected": -1.1047070026397705, "logps/chosen": -767.1864013671875, "logps/rejected": -1380.8409423828125, "loss": 0.053, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20385906100273132, "rewards/margins": 0.27676570415496826, "rewards/rejected": -0.4806247651576996, "step": 3110 }, { "epoch": 0.83, "learning_rate": 4.1769689822475147e-07, "logits/chosen": -1.4887802600860596, "logits/rejected": -1.02260422706604, "logps/chosen": -691.14990234375, "logps/rejected": -1373.417724609375, "loss": 0.0965, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.20843443274497986, "rewards/margins": 0.2790736258029938, "rewards/rejected": -0.4875081181526184, "step": 3120 }, { "epoch": 0.83, "learning_rate": 4.049092898095816e-07, "logits/chosen": -1.5980995893478394, "logits/rejected": -1.023189902305603, "logps/chosen": -595.4110107421875, "logps/rejected": -1312.7945556640625, "loss": 0.0653, "rewards/accuracies": 0.875, "rewards/chosen": -0.16371554136276245, "rewards/margins": 0.2928038537502289, "rewards/rejected": -0.4565194249153137, "step": 3130 }, { "epoch": 0.84, "learning_rate": 3.9230321284847856e-07, "logits/chosen": -1.5363506078720093, "logits/rejected": -1.0504465103149414, "logps/chosen": -489.7978515625, "logps/rejected": -1119.5257568359375, "loss": 0.0633, "rewards/accuracies": 0.875, "rewards/chosen": -0.11015214771032333, "rewards/margins": 0.2800517678260803, "rewards/rejected": -0.39020389318466187, "step": 3140 }, { "epoch": 0.84, "learning_rate": 3.798797596089351e-07, "logits/chosen": -1.6714694499969482, "logits/rejected": -1.125106692314148, "logps/chosen": -604.4114379882812, "logps/rejected": -1297.808349609375, "loss": 0.0516, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.12939824163913727, "rewards/margins": 0.35474586486816406, "rewards/rejected": -0.4841441214084625, "step": 3150 }, { "epoch": 0.84, "learning_rate": 3.6764000653481263e-07, "logits/chosen": -1.7083511352539062, "logits/rejected": -1.0139703750610352, "logps/chosen": -604.1871337890625, "logps/rejected": -1156.0162353515625, "loss": 0.0919, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1465051770210266, "rewards/margins": 0.27671653032302856, "rewards/rejected": -0.4232216775417328, "step": 3160 }, { "epoch": 0.85, "learning_rate": 3.555850141530659e-07, "logits/chosen": -1.7717567682266235, "logits/rejected": -1.1824371814727783, "logps/chosen": -566.564453125, "logps/rejected": -1101.685302734375, "loss": 0.0733, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.14669661223888397, "rewards/margins": 0.26966187357902527, "rewards/rejected": -0.41635847091674805, "step": 3170 }, { "epoch": 0.85, "learning_rate": 3.4371582698185636e-07, "logits/chosen": -1.4069181680679321, "logits/rejected": -1.0164799690246582, "logps/chosen": -516.7618408203125, "logps/rejected": -1247.140380859375, "loss": 0.0714, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1548655778169632, "rewards/margins": 0.2954062819480896, "rewards/rejected": -0.4502718448638916, "step": 3180 }, { "epoch": 0.85, "learning_rate": 3.3203347344004737e-07, "logits/chosen": -1.332287073135376, "logits/rejected": -1.1523934602737427, "logps/chosen": -562.6694946289062, "logps/rejected": -1197.0635986328125, "loss": 0.0921, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.19367225468158722, "rewards/margins": 0.24201972782611847, "rewards/rejected": -0.4356919825077057, "step": 3190 }, { "epoch": 0.85, "learning_rate": 3.2053896575809426e-07, "logits/chosen": -1.4262222051620483, "logits/rejected": -1.026829719543457, "logps/chosen": -569.9625854492188, "logps/rejected": -1329.806884765625, "loss": 0.0553, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.15987573564052582, "rewards/margins": 0.3234714865684509, "rewards/rejected": -0.48334717750549316, "step": 3200 }, { "epoch": 0.86, "learning_rate": 3.092332998903416e-07, "logits/chosen": -1.403305172920227, "logits/rejected": -1.228492021560669, "logps/chosen": -531.6251220703125, "logps/rejected": -1268.7862548828125, "loss": 0.0586, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14155253767967224, "rewards/margins": 0.29640236496925354, "rewards/rejected": -0.4379549026489258, "step": 3210 }, { "epoch": 0.86, "learning_rate": 2.981174554287239e-07, "logits/chosen": -1.4185243844985962, "logits/rejected": -1.2006936073303223, "logps/chosen": -635.041259765625, "logps/rejected": -1473.925537109375, "loss": 0.0452, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.16828233003616333, "rewards/margins": 0.3489801287651062, "rewards/rejected": -0.5172623991966248, "step": 3220 }, { "epoch": 0.86, "learning_rate": 2.871923955178918e-07, "logits/chosen": -1.4334993362426758, "logits/rejected": -0.9838508367538452, "logps/chosen": -607.728271484375, "logps/rejected": -1310.9564208984375, "loss": 0.0531, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.18386729061603546, "rewards/margins": 0.3068189322948456, "rewards/rejected": -0.49068623781204224, "step": 3230 }, { "epoch": 0.86, "learning_rate": 2.764590667717562e-07, "logits/chosen": -1.6019694805145264, "logits/rejected": -1.1419106721878052, "logps/chosen": -588.9683837890625, "logps/rejected": -1284.370849609375, "loss": 0.0586, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15091760456562042, "rewards/margins": 0.30733171105384827, "rewards/rejected": -0.4582493305206299, "step": 3240 }, { "epoch": 0.87, "learning_rate": 2.6591839919146963e-07, "logits/chosen": -1.6173986196517944, "logits/rejected": -1.1335291862487793, "logps/chosen": -654.0939331054688, "logps/rejected": -1284.885986328125, "loss": 0.0572, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1872612088918686, "rewards/margins": 0.3091946244239807, "rewards/rejected": -0.4964558482170105, "step": 3250 }, { "epoch": 0.87, "learning_rate": 2.555713060848433e-07, "logits/chosen": -1.4421133995056152, "logits/rejected": -1.1747627258300781, "logps/chosen": -585.655517578125, "logps/rejected": -1284.943115234375, "loss": 0.0449, "rewards/accuracies": 0.875, "rewards/chosen": -0.1650674194097519, "rewards/margins": 0.31287819147109985, "rewards/rejected": -0.47794562578201294, "step": 3260 }, { "epoch": 0.87, "learning_rate": 2.454186839872158e-07, "logits/chosen": -1.7081034183502197, "logits/rejected": -0.94395911693573, "logps/chosen": -646.4754028320312, "logps/rejected": -1342.545166015625, "loss": 0.0709, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18191269040107727, "rewards/margins": 0.326556921005249, "rewards/rejected": -0.5084696412086487, "step": 3270 }, { "epoch": 0.87, "learning_rate": 2.3546141258376786e-07, "logits/chosen": -1.3628816604614258, "logits/rejected": -0.9335969686508179, "logps/chosen": -603.8056640625, "logps/rejected": -1245.411376953125, "loss": 0.0594, "rewards/accuracies": 0.875, "rewards/chosen": -0.1723943054676056, "rewards/margins": 0.3125647008419037, "rewards/rejected": -0.48495903611183167, "step": 3280 }, { "epoch": 0.88, "learning_rate": 2.257003546333042e-07, "logits/chosen": -1.6881519556045532, "logits/rejected": -1.116393804550171, "logps/chosen": -680.4428100585938, "logps/rejected": -1417.369384765625, "loss": 0.0578, "rewards/accuracies": 0.875, "rewards/chosen": -0.21765998005867004, "rewards/margins": 0.3092319667339325, "rewards/rejected": -0.5268920063972473, "step": 3290 }, { "epoch": 0.88, "learning_rate": 2.1613635589349756e-07, "logits/chosen": -1.7809503078460693, "logits/rejected": -1.235079050064087, "logps/chosen": -521.1368408203125, "logps/rejected": -1202.243408203125, "loss": 0.0625, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.13693402707576752, "rewards/margins": 0.2993922233581543, "rewards/rejected": -0.43632620573043823, "step": 3300 }, { "epoch": 0.88, "learning_rate": 2.0677024504760752e-07, "logits/chosen": -1.5252535343170166, "logits/rejected": -1.2412452697753906, "logps/chosen": -526.3180541992188, "logps/rejected": -1315.9249267578125, "loss": 0.0633, "rewards/accuracies": 0.875, "rewards/chosen": -0.13880392909049988, "rewards/margins": 0.35248517990112305, "rewards/rejected": -0.4912891387939453, "step": 3310 }, { "epoch": 0.89, "learning_rate": 1.9760283363267684e-07, "logits/chosen": -1.778957724571228, "logits/rejected": -1.0381158590316772, "logps/chosen": -641.0303955078125, "logps/rejected": -1229.374267578125, "loss": 0.0791, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13701531291007996, "rewards/margins": 0.3019945025444031, "rewards/rejected": -0.43900981545448303, "step": 3320 }, { "epoch": 0.89, "learning_rate": 1.8863491596921745e-07, "logits/chosen": -1.4902263879776, "logits/rejected": -0.8488144874572754, "logps/chosen": -537.8624267578125, "logps/rejected": -1138.487060546875, "loss": 0.0651, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.12021678686141968, "rewards/margins": 0.3042137622833252, "rewards/rejected": -0.4244305491447449, "step": 3330 }, { "epoch": 0.89, "learning_rate": 1.798672690923828e-07, "logits/chosen": -1.604090929031372, "logits/rejected": -1.0566972494125366, "logps/chosen": -546.621826171875, "logps/rejected": -1305.0101318359375, "loss": 0.048, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1309491991996765, "rewards/margins": 0.3338525891304016, "rewards/rejected": -0.4648017883300781, "step": 3340 }, { "epoch": 0.89, "learning_rate": 1.713006526846439e-07, "logits/chosen": -1.6890567541122437, "logits/rejected": -1.0597703456878662, "logps/chosen": -638.17626953125, "logps/rejected": -1403.3868408203125, "loss": 0.0422, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.14803606271743774, "rewards/margins": 0.36922168731689453, "rewards/rejected": -0.517257809638977, "step": 3350 }, { "epoch": 0.9, "learning_rate": 1.629358090099639e-07, "logits/chosen": -1.772783637046814, "logits/rejected": -1.145141363143921, "logps/chosen": -645.2138671875, "logps/rejected": -1247.31005859375, "loss": 0.0995, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.16153082251548767, "rewards/margins": 0.27567344903945923, "rewards/rejected": -0.4372042715549469, "step": 3360 }, { "epoch": 0.9, "learning_rate": 1.5477346284948292e-07, "logits/chosen": -1.6137062311172485, "logits/rejected": -1.2323498725891113, "logps/chosen": -652.45068359375, "logps/rejected": -1297.9696044921875, "loss": 0.1, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18204265832901, "rewards/margins": 0.2792537212371826, "rewards/rejected": -0.4612963795661926, "step": 3370 }, { "epoch": 0.9, "learning_rate": 1.4681432143872133e-07, "logits/chosen": -1.5417966842651367, "logits/rejected": -0.92780601978302, "logps/chosen": -655.7986450195312, "logps/rejected": -1352.8719482421875, "loss": 0.0452, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.1652592271566391, "rewards/margins": 0.32993632555007935, "rewards/rejected": -0.49519556760787964, "step": 3380 }, { "epoch": 0.9, "learning_rate": 1.3905907440629752e-07, "logits/chosen": -1.7428719997406006, "logits/rejected": -1.234071969985962, "logps/chosen": -683.9173583984375, "logps/rejected": -1301.064208984375, "loss": 0.0749, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1897701770067215, "rewards/margins": 0.26576271653175354, "rewards/rejected": -0.45553287863731384, "step": 3390 }, { "epoch": 0.91, "learning_rate": 1.31508393714177e-07, "logits/chosen": -1.6525071859359741, "logits/rejected": -1.269195795059204, "logps/chosen": -573.1322631835938, "logps/rejected": -1224.781005859375, "loss": 0.0717, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.1454993188381195, "rewards/margins": 0.29666373133659363, "rewards/rejected": -0.44216299057006836, "step": 3400 }, { "epoch": 0.91, "learning_rate": 1.241629335994471e-07, "logits/chosen": -1.4832892417907715, "logits/rejected": -0.7595690488815308, "logps/chosen": -546.4700927734375, "logps/rejected": -1105.534912109375, "loss": 0.0961, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.12580379843711853, "rewards/margins": 0.2655871510505676, "rewards/rejected": -0.39139097929000854, "step": 3410 }, { "epoch": 0.91, "learning_rate": 1.1702333051763271e-07, "logits/chosen": -1.729821801185608, "logits/rejected": -1.004158854484558, "logps/chosen": -648.3139038085938, "logps/rejected": -1181.37939453125, "loss": 0.0907, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.17357492446899414, "rewards/margins": 0.2692447900772095, "rewards/rejected": -0.4428196847438812, "step": 3420 }, { "epoch": 0.91, "learning_rate": 1.1009020308754587e-07, "logits/chosen": -1.6045188903808594, "logits/rejected": -1.1483290195465088, "logps/chosen": -791.281494140625, "logps/rejected": -1432.56591796875, "loss": 0.0588, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.20312626659870148, "rewards/margins": 0.30023887753486633, "rewards/rejected": -0.503365159034729, "step": 3430 }, { "epoch": 0.92, "learning_rate": 1.0336415203768962e-07, "logits/chosen": -1.4623007774353027, "logits/rejected": -0.9743086695671082, "logps/chosen": -684.1002807617188, "logps/rejected": -1355.83935546875, "loss": 0.0854, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15205714106559753, "rewards/margins": 0.3153363764286041, "rewards/rejected": -0.4673934876918793, "step": 3440 }, { "epoch": 0.92, "learning_rate": 9.684576015420277e-08, "logits/chosen": -1.5630353689193726, "logits/rejected": -1.1269890069961548, "logps/chosen": -500.76934814453125, "logps/rejected": -1174.6031494140625, "loss": 0.0748, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13523253798484802, "rewards/margins": 0.2928512990474701, "rewards/rejected": -0.4280838370323181, "step": 3450 }, { "epoch": 0.92, "learning_rate": 9.053559223036746e-08, "logits/chosen": -1.4134055376052856, "logits/rejected": -0.9375116229057312, "logps/chosen": -684.4464111328125, "logps/rejected": -1184.0179443359375, "loss": 0.097, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1784881353378296, "rewards/margins": 0.25100329518318176, "rewards/rejected": -0.42949143052101135, "step": 3460 }, { "epoch": 0.93, "learning_rate": 8.44341950176683e-08, "logits/chosen": -1.7058902978897095, "logits/rejected": -1.1464042663574219, "logps/chosen": -518.6105346679688, "logps/rejected": -1139.312744140625, "loss": 0.0706, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.11180339008569717, "rewards/margins": 0.3046559691429138, "rewards/rejected": -0.4164593815803528, "step": 3470 }, { "epoch": 0.93, "learning_rate": 7.854209717842231e-08, "logits/chosen": -1.7066657543182373, "logits/rejected": -1.1052082777023315, "logps/chosen": -634.6749267578125, "logps/rejected": -1268.662353515625, "loss": 0.0549, "rewards/accuracies": 0.875, "rewards/chosen": -0.1490454524755478, "rewards/margins": 0.32269707322120667, "rewards/rejected": -0.4717424809932709, "step": 3480 }, { "epoch": 0.93, "learning_rate": 7.285980923996989e-08, "logits/chosen": -1.583821415901184, "logits/rejected": -0.9543322324752808, "logps/chosen": -620.9078979492188, "logps/rejected": -1256.222900390625, "loss": 0.0499, "rewards/accuracies": 0.875, "rewards/chosen": -0.14574182033538818, "rewards/margins": 0.3196600079536438, "rewards/rejected": -0.465401828289032, "step": 3490 }, { "epoch": 0.93, "learning_rate": 6.738782355044048e-08, "logits/chosen": -1.5047067403793335, "logits/rejected": -1.0737859010696411, "logps/chosen": -520.77294921875, "logps/rejected": -1155.83740234375, "loss": 0.0764, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1430043876171112, "rewards/margins": 0.28466781973838806, "rewards/rejected": -0.42767223715782166, "step": 3500 }, { "epoch": 0.94, "learning_rate": 6.212661423609184e-08, "logits/chosen": -1.3737636804580688, "logits/rejected": -1.1867458820343018, "logps/chosen": -623.9158935546875, "logps/rejected": -1239.9605712890625, "loss": 0.078, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.1589566022157669, "rewards/margins": 0.27907633781433105, "rewards/rejected": -0.43803295493125916, "step": 3510 }, { "epoch": 0.94, "learning_rate": 5.707663716023021e-08, "logits/chosen": -1.5748332738876343, "logits/rejected": -1.063622236251831, "logps/chosen": -616.1275634765625, "logps/rejected": -1155.8143310546875, "loss": 0.0835, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.15737947821617126, "rewards/margins": 0.2580162584781647, "rewards/rejected": -0.4153957962989807, "step": 3520 }, { "epoch": 0.94, "learning_rate": 5.22383298837098e-08, "logits/chosen": -1.6702196598052979, "logits/rejected": -0.9411935806274414, "logps/chosen": -713.735107421875, "logps/rejected": -1223.2811279296875, "loss": 0.0824, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.17610540986061096, "rewards/margins": 0.29836469888687134, "rewards/rejected": -0.4744700491428375, "step": 3530 }, { "epoch": 0.94, "learning_rate": 4.761211162702117e-08, "logits/chosen": -1.500201940536499, "logits/rejected": -1.2098219394683838, "logps/chosen": -633.7230224609375, "logps/rejected": -1120.678466796875, "loss": 0.0984, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.17905206978321075, "rewards/margins": 0.2320644110441208, "rewards/rejected": -0.41111645102500916, "step": 3540 }, { "epoch": 0.95, "learning_rate": 4.319838323396691e-08, "logits/chosen": -1.6483008861541748, "logits/rejected": -1.2190577983856201, "logps/chosen": -614.4102783203125, "logps/rejected": -1260.265625, "loss": 0.0958, "rewards/accuracies": 0.75, "rewards/chosen": -0.16128340363502502, "rewards/margins": 0.26488471031188965, "rewards/rejected": -0.4261681139469147, "step": 3550 }, { "epoch": 0.95, "learning_rate": 3.8997527136930004e-08, "logits/chosen": -1.429495930671692, "logits/rejected": -1.0348641872406006, "logps/chosen": -632.0252685546875, "logps/rejected": -1319.2584228515625, "loss": 0.0798, "rewards/accuracies": 0.875, "rewards/chosen": -0.17518481612205505, "rewards/margins": 0.2713126540184021, "rewards/rejected": -0.44649749994277954, "step": 3560 }, { "epoch": 0.95, "learning_rate": 3.5009907323737826e-08, "logits/chosen": -1.551414132118225, "logits/rejected": -1.2644479274749756, "logps/chosen": -570.0910034179688, "logps/rejected": -1210.5382080078125, "loss": 0.0803, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.1442852020263672, "rewards/margins": 0.26128098368644714, "rewards/rejected": -0.4055662155151367, "step": 3570 }, { "epoch": 0.95, "learning_rate": 3.1235869306123766e-08, "logits/chosen": -1.8781211376190186, "logits/rejected": -1.116159439086914, "logps/chosen": -654.27685546875, "logps/rejected": -1360.783447265625, "loss": 0.0586, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.15850645303726196, "rewards/margins": 0.32171380519866943, "rewards/rejected": -0.480220228433609, "step": 3580 }, { "epoch": 0.96, "learning_rate": 2.767574008979007e-08, "logits/chosen": -1.7552858591079712, "logits/rejected": -1.1240081787109375, "logps/chosen": -649.2583618164062, "logps/rejected": -1285.60986328125, "loss": 0.0746, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1459151804447174, "rewards/margins": 0.29276043176651, "rewards/rejected": -0.43867558240890503, "step": 3590 }, { "epoch": 0.96, "learning_rate": 2.4329828146074096e-08, "logits/chosen": -1.680267333984375, "logits/rejected": -0.990991473197937, "logps/chosen": -702.8885498046875, "logps/rejected": -1430.7728271484375, "loss": 0.0422, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.16736450791358948, "rewards/margins": 0.3372777998447418, "rewards/rejected": -0.5046423673629761, "step": 3600 }, { "epoch": 0.96, "learning_rate": 2.1198423385220822e-08, "logits/chosen": -1.5231233835220337, "logits/rejected": -0.983518123626709, "logps/chosen": -574.9891967773438, "logps/rejected": -1192.521728515625, "loss": 0.091, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.13129422068595886, "rewards/margins": 0.2925337255001068, "rewards/rejected": -0.42382797598838806, "step": 3610 }, { "epoch": 0.97, "learning_rate": 1.82817971312621e-08, "logits/chosen": -1.689100980758667, "logits/rejected": -1.1573827266693115, "logps/chosen": -602.2514038085938, "logps/rejected": -1330.6248779296875, "loss": 0.0539, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.1489819586277008, "rewards/margins": 0.32134801149368286, "rewards/rejected": -0.47032999992370605, "step": 3620 }, { "epoch": 0.97, "learning_rate": 1.5580202098509078e-08, "logits/chosen": -1.7418603897094727, "logits/rejected": -1.2287579774856567, "logps/chosen": -539.5288696289062, "logps/rejected": -1092.644775390625, "loss": 0.084, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.1297844648361206, "rewards/margins": 0.25757455825805664, "rewards/rejected": -0.38735905289649963, "step": 3630 }, { "epoch": 0.97, "learning_rate": 1.3093872369654148e-08, "logits/chosen": -1.6315152645111084, "logits/rejected": -0.7913056015968323, "logps/chosen": -566.9249267578125, "logps/rejected": -1232.651611328125, "loss": 0.0596, "rewards/accuracies": 0.875, "rewards/chosen": -0.15806104242801666, "rewards/margins": 0.31945645809173584, "rewards/rejected": -0.4775174558162689, "step": 3640 }, { "epoch": 0.97, "learning_rate": 1.0823023375489128e-08, "logits/chosen": -1.8176482915878296, "logits/rejected": -1.307217001914978, "logps/chosen": -596.6251220703125, "logps/rejected": -1259.1907958984375, "loss": 0.0741, "rewards/accuracies": 0.875, "rewards/chosen": -0.13308800756931305, "rewards/margins": 0.29899168014526367, "rewards/rejected": -0.4320797026157379, "step": 3650 }, { "epoch": 0.98, "learning_rate": 8.767851876239075e-09, "logits/chosen": -1.3138879537582397, "logits/rejected": -1.0358647108078003, "logps/chosen": -606.9632568359375, "logps/rejected": -1166.726806640625, "loss": 0.126, "rewards/accuracies": 0.75, "rewards/chosen": -0.17711606621742249, "rewards/margins": 0.24026331305503845, "rewards/rejected": -0.41737937927246094, "step": 3660 }, { "epoch": 0.98, "learning_rate": 6.9285359445145366e-09, "logits/chosen": -1.6356595754623413, "logits/rejected": -1.0119235515594482, "logps/chosen": -606.1302490234375, "logps/rejected": -1273.985595703125, "loss": 0.0883, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15847013890743256, "rewards/margins": 0.29371243715286255, "rewards/rejected": -0.4521825909614563, "step": 3670 }, { "epoch": 0.98, "learning_rate": 5.305234949880001e-09, "logits/chosen": -1.659597396850586, "logits/rejected": -1.0647908449172974, "logps/chosen": -593.581787109375, "logps/rejected": -1240.505615234375, "loss": 0.0669, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.15421947836875916, "rewards/margins": 0.2849588096141815, "rewards/rejected": -0.4391782879829407, "step": 3680 }, { "epoch": 0.98, "learning_rate": 3.8980895450474455e-09, "logits/chosen": -1.4947640895843506, "logits/rejected": -1.1128860712051392, "logps/chosen": -551.9004516601562, "logps/rejected": -1167.104736328125, "loss": 0.053, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.13484862446784973, "rewards/margins": 0.2813241481781006, "rewards/rejected": -0.4161728024482727, "step": 3690 }, { "epoch": 0.99, "learning_rate": 2.7072216536885855e-09, "logits/chosen": -1.6360046863555908, "logits/rejected": -1.196569800376892, "logps/chosen": -659.7826538085938, "logps/rejected": -1305.6195068359375, "loss": 0.0578, "rewards/accuracies": 0.875, "rewards/chosen": -0.17655882239341736, "rewards/margins": 0.3209526538848877, "rewards/rejected": -0.49751147627830505, "step": 3700 }, { "epoch": 0.99, "learning_rate": 1.7327344598702667e-09, "logits/chosen": -1.4936860799789429, "logits/rejected": -1.0485936403274536, "logps/chosen": -584.678955078125, "logps/rejected": -1314.91650390625, "loss": 0.071, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.14625641703605652, "rewards/margins": 0.3208056390285492, "rewards/rejected": -0.4670620858669281, "step": 3710 }, { "epoch": 0.99, "learning_rate": 9.747123991141193e-10, "logits/chosen": -1.4201769828796387, "logits/rejected": -1.045179009437561, "logps/chosen": -620.8831787109375, "logps/rejected": -1206.128662109375, "loss": 0.0691, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.15646150708198547, "rewards/margins": 0.29528263211250305, "rewards/rejected": -0.4517441391944885, "step": 3720 }, { "epoch": 0.99, "learning_rate": 4.332211510807427e-10, "logits/chosen": -1.530029296875, "logits/rejected": -1.3592134714126587, "logps/chosen": -523.2725219726562, "logps/rejected": -1284.657958984375, "loss": 0.0589, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.13241766393184662, "rewards/margins": 0.30461427569389343, "rewards/rejected": -0.43703192472457886, "step": 3730 }, { "epoch": 1.0, "learning_rate": 1.0830763387897902e-10, "logits/chosen": -1.5114389657974243, "logits/rejected": -0.964741587638855, "logps/chosen": -626.4848022460938, "logps/rejected": -1381.5963134765625, "loss": 0.0456, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.16040828824043274, "rewards/margins": 0.3570694923400879, "rewards/rejected": -0.517477810382843, "step": 3740 }, { "epoch": 1.0, "learning_rate": 0.0, "logits/chosen": -1.877239465713501, "logits/rejected": -1.3499794006347656, "logps/chosen": -648.46875, "logps/rejected": -1231.8519287109375, "loss": 0.0717, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.18231454491615295, "rewards/margins": 0.2813524603843689, "rewards/rejected": -0.46366700530052185, "step": 3750 }, { "epoch": 1.0, "step": 3750, "total_flos": 0.0, "train_loss": 0.0800992045879364, "train_runtime": 15706.9626, "train_samples_per_second": 0.955, "train_steps_per_second": 0.239 } ], "logging_steps": 10, "max_steps": 3750, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }