{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 6573, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 7.598784194528875e-10, "logits/chosen": -2.901771306991577, "logits/rejected": -2.8884711265563965, "logps/chosen": -77.62923431396484, "logps/rejected": -64.06584167480469, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.0, "learning_rate": 7.598784194528875e-09, "logits/chosen": -2.9898242950439453, "logits/rejected": -2.947841167449951, "logps/chosen": -95.11986541748047, "logps/rejected": -74.35153198242188, "loss": 0.693, "rewards/accuracies": 0.5, "rewards/chosen": 0.0006865501054562628, "rewards/margins": -0.00023379885533358902, "rewards/rejected": 0.0009203488007187843, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.519756838905775e-08, "logits/chosen": -3.020481824874878, "logits/rejected": -2.9794812202453613, "logps/chosen": -91.61888122558594, "logps/rejected": -73.55238342285156, "loss": 0.6899, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0016287328908219934, "rewards/margins": 0.006309092044830322, "rewards/rejected": -0.004680359270423651, "step": 20 }, { "epoch": 0.01, "learning_rate": 2.2796352583586623e-08, "logits/chosen": -3.0296730995178223, "logits/rejected": -2.9928271770477295, "logps/chosen": -94.35389709472656, "logps/rejected": -70.74224853515625, "loss": 0.6781, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.02023407630622387, "rewards/margins": 0.029119813814759254, "rewards/rejected": -0.008885735645890236, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.03951367781155e-08, "logits/chosen": -3.002256393432617, "logits/rejected": -2.9691174030303955, "logps/chosen": -96.49156188964844, "logps/rejected": -69.25764465332031, "loss": 0.6513, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.05460807681083679, "rewards/margins": 0.0900125652551651, "rewards/rejected": -0.03540449216961861, "step": 40 }, { "epoch": 0.02, "learning_rate": 3.799392097264438e-08, "logits/chosen": -3.0232231616973877, "logits/rejected": -2.9752824306488037, "logps/chosen": -97.46116638183594, "logps/rejected": -74.12408447265625, "loss": 0.6124, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.09053327888250351, "rewards/margins": 0.17471307516098022, "rewards/rejected": -0.08417979627847672, "step": 50 }, { "epoch": 0.03, "learning_rate": 4.559270516717325e-08, "logits/chosen": -2.9987754821777344, "logits/rejected": -2.9764158725738525, "logps/chosen": -90.7174072265625, "logps/rejected": -77.50119018554688, "loss": 0.5669, "rewards/accuracies": 0.887499988079071, "rewards/chosen": 0.06676627695560455, "rewards/margins": 0.2405029535293579, "rewards/rejected": -0.17373664677143097, "step": 60 }, { "epoch": 0.03, "learning_rate": 5.3191489361702123e-08, "logits/chosen": -2.999079465866089, "logits/rejected": -2.957610607147217, "logps/chosen": -85.68513488769531, "logps/rejected": -73.17382049560547, "loss": 0.4937, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.11260080337524414, "rewards/margins": 0.46337103843688965, "rewards/rejected": -0.3507702052593231, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.0790273556231e-08, "logits/chosen": -2.9929611682891846, "logits/rejected": -2.9430298805236816, "logps/chosen": -95.99066925048828, "logps/rejected": -78.26673126220703, "loss": 0.4392, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.16532005369663239, "rewards/margins": 0.6958799362182617, "rewards/rejected": -0.5305598378181458, "step": 80 }, { "epoch": 0.04, "learning_rate": 6.838905775075987e-08, "logits/chosen": -2.9992611408233643, "logits/rejected": -2.9776453971862793, "logps/chosen": -92.69783020019531, "logps/rejected": -75.43290710449219, "loss": 0.419, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.025346357375383377, "rewards/margins": 0.6570864915847778, "rewards/rejected": -0.6824327707290649, "step": 90 }, { "epoch": 0.05, "learning_rate": 7.598784194528875e-08, "logits/chosen": -2.982177257537842, "logits/rejected": -2.935204029083252, "logps/chosen": -93.23124694824219, "logps/rejected": -81.80736541748047, "loss": 0.3601, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.016981299966573715, "rewards/margins": 1.017513632774353, "rewards/rejected": -1.0005323886871338, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -3.0686004161834717, "eval_logits/rejected": -3.0200653076171875, "eval_logps/chosen": -91.95072937011719, "eval_logps/rejected": -80.77359008789062, "eval_loss": 0.3409937024116516, "eval_rewards/accuracies": 0.9444444179534912, "eval_rewards/chosen": -0.07135287672281265, "eval_rewards/margins": 0.9955466985702515, "eval_rewards/rejected": -1.0668996572494507, "eval_runtime": 54.6773, "eval_samples_per_second": 52.343, "eval_steps_per_second": 1.646, "step": 100 }, { "epoch": 0.05, "learning_rate": 8.358662613981762e-08, "logits/chosen": -3.0181825160980225, "logits/rejected": -2.9785044193267822, "logps/chosen": -92.45232391357422, "logps/rejected": -82.97605895996094, "loss": 0.3187, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.013919335789978504, "rewards/margins": 1.1460493803024292, "rewards/rejected": -1.1321300268173218, "step": 110 }, { "epoch": 0.05, "learning_rate": 9.11854103343465e-08, "logits/chosen": -2.996326208114624, "logits/rejected": -2.960134506225586, "logps/chosen": -91.56250762939453, "logps/rejected": -86.30479431152344, "loss": 0.2826, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.2517639994621277, "rewards/margins": 1.317551612854004, "rewards/rejected": -1.5693156719207764, "step": 120 }, { "epoch": 0.06, "learning_rate": 9.878419452887538e-08, "logits/chosen": -2.9952170848846436, "logits/rejected": -2.970567226409912, "logps/chosen": -93.91593933105469, "logps/rejected": -90.50540161132812, "loss": 0.2448, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0845116376876831, "rewards/margins": 1.7149988412857056, "rewards/rejected": -1.7995105981826782, "step": 130 }, { "epoch": 0.06, "learning_rate": 1.0638297872340425e-07, "logits/chosen": -3.0097122192382812, "logits/rejected": -2.9792752265930176, "logps/chosen": -95.60907745361328, "logps/rejected": -92.10499572753906, "loss": 0.2252, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.3711767792701721, "rewards/margins": 1.8317139148712158, "rewards/rejected": -2.202890634536743, "step": 140 }, { "epoch": 0.07, "learning_rate": 1.1398176291793313e-07, "logits/chosen": -3.0108351707458496, "logits/rejected": -2.9584834575653076, "logps/chosen": -99.74345397949219, "logps/rejected": -95.83207702636719, "loss": 0.2017, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.43104758858680725, "rewards/margins": 1.9699010848999023, "rewards/rejected": -2.4009485244750977, "step": 150 }, { "epoch": 0.07, "learning_rate": 1.21580547112462e-07, "logits/chosen": -2.986536979675293, "logits/rejected": -2.9522886276245117, "logps/chosen": -101.01667785644531, "logps/rejected": -100.94587707519531, "loss": 0.1703, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.44724518060684204, "rewards/margins": 2.5366241931915283, "rewards/rejected": -2.9838690757751465, "step": 160 }, { "epoch": 0.08, "learning_rate": 1.2917933130699087e-07, "logits/chosen": -2.9869437217712402, "logits/rejected": -2.9626340866088867, "logps/chosen": -92.66294860839844, "logps/rejected": -100.02458190917969, "loss": 0.1652, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.6140026450157166, "rewards/margins": 2.5113110542297363, "rewards/rejected": -3.1253137588500977, "step": 170 }, { "epoch": 0.08, "learning_rate": 1.3677811550151974e-07, "logits/chosen": -2.9454994201660156, "logits/rejected": -2.95060396194458, "logps/chosen": -94.37382507324219, "logps/rejected": -105.94987487792969, "loss": 0.162, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.31808796525001526, "rewards/margins": 3.3045032024383545, "rewards/rejected": -3.622591495513916, "step": 180 }, { "epoch": 0.09, "learning_rate": 1.4437689969604864e-07, "logits/chosen": -2.9558322429656982, "logits/rejected": -2.9460699558258057, "logps/chosen": -96.69801330566406, "logps/rejected": -107.38899230957031, "loss": 0.1188, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.5944967269897461, "rewards/margins": 3.4235737323760986, "rewards/rejected": -4.018071174621582, "step": 190 }, { "epoch": 0.09, "learning_rate": 1.519756838905775e-07, "logits/chosen": -2.918544292449951, "logits/rejected": -2.918253183364868, "logps/chosen": -94.17301177978516, "logps/rejected": -117.73758697509766, "loss": 0.113, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.9829786419868469, "rewards/margins": 3.9101309776306152, "rewards/rejected": -4.8931097984313965, "step": 200 }, { "epoch": 0.09, "eval_logits/chosen": -3.029731273651123, "eval_logits/rejected": -3.0103673934936523, "eval_logps/chosen": -103.97273254394531, "eval_logps/rejected": -118.95235443115234, "eval_loss": 0.11712019145488739, "eval_rewards/accuracies": 0.9611111283302307, "eval_rewards/chosen": -1.2735543251037598, "eval_rewards/margins": 3.6112213134765625, "eval_rewards/rejected": -4.884775638580322, "eval_runtime": 46.8222, "eval_samples_per_second": 61.125, "eval_steps_per_second": 1.922, "step": 200 }, { "epoch": 0.1, "learning_rate": 1.5957446808510638e-07, "logits/chosen": -2.966219663619995, "logits/rejected": -2.940929889678955, "logps/chosen": -102.27266693115234, "logps/rejected": -124.5510482788086, "loss": 0.1011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.482550859451294, "rewards/margins": 3.7979836463928223, "rewards/rejected": -5.280534744262695, "step": 210 }, { "epoch": 0.1, "learning_rate": 1.6717325227963525e-07, "logits/chosen": -2.9579319953918457, "logits/rejected": -2.9625790119171143, "logps/chosen": -108.02095794677734, "logps/rejected": -129.6359100341797, "loss": 0.1094, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.5162633657455444, "rewards/margins": 4.2703537940979, "rewards/rejected": -5.786617279052734, "step": 220 }, { "epoch": 0.1, "learning_rate": 1.7477203647416414e-07, "logits/chosen": -2.9127144813537598, "logits/rejected": -2.913215398788452, "logps/chosen": -110.11048889160156, "logps/rejected": -133.09237670898438, "loss": 0.0955, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6744592189788818, "rewards/margins": 4.5804924964904785, "rewards/rejected": -6.254951000213623, "step": 230 }, { "epoch": 0.11, "learning_rate": 1.82370820668693e-07, "logits/chosen": -2.966217517852783, "logits/rejected": -2.9733166694641113, "logps/chosen": -105.95328521728516, "logps/rejected": -129.27061462402344, "loss": 0.1055, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3941030502319336, "rewards/margins": 4.414058208465576, "rewards/rejected": -5.80816125869751, "step": 240 }, { "epoch": 0.11, "learning_rate": 1.8996960486322188e-07, "logits/chosen": -2.907132148742676, "logits/rejected": -2.9205126762390137, "logps/chosen": -109.4215316772461, "logps/rejected": -137.55592346191406, "loss": 0.0969, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6198132038116455, "rewards/margins": 5.059004783630371, "rewards/rejected": -6.6788177490234375, "step": 250 }, { "epoch": 0.12, "learning_rate": 1.9756838905775075e-07, "logits/chosen": -2.8727142810821533, "logits/rejected": -2.857184886932373, "logps/chosen": -111.29930114746094, "logps/rejected": -142.2863311767578, "loss": 0.0981, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.1142921447753906, "rewards/margins": 5.329843997955322, "rewards/rejected": -7.444136142730713, "step": 260 }, { "epoch": 0.12, "learning_rate": 2.0516717325227962e-07, "logits/chosen": -2.8235843181610107, "logits/rejected": -2.819584608078003, "logps/chosen": -122.10441589355469, "logps/rejected": -152.1237335205078, "loss": 0.1112, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.893561601638794, "rewards/margins": 4.696758270263672, "rewards/rejected": -7.590319633483887, "step": 270 }, { "epoch": 0.13, "learning_rate": 2.127659574468085e-07, "logits/chosen": -2.894500255584717, "logits/rejected": -2.8776357173919678, "logps/chosen": -117.2232666015625, "logps/rejected": -143.18972778320312, "loss": 0.0812, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.439744710922241, "rewards/margins": 4.922734260559082, "rewards/rejected": -7.362478733062744, "step": 280 }, { "epoch": 0.13, "learning_rate": 2.2036474164133736e-07, "logits/chosen": -2.8788487911224365, "logits/rejected": -2.88078236579895, "logps/chosen": -117.56974029541016, "logps/rejected": -153.3238525390625, "loss": 0.0703, "rewards/accuracies": 1.0, "rewards/chosen": -2.690272092819214, "rewards/margins": 5.456068992614746, "rewards/rejected": -8.146341323852539, "step": 290 }, { "epoch": 0.14, "learning_rate": 2.2796352583586626e-07, "logits/chosen": -2.8312017917633057, "logits/rejected": -2.8483071327209473, "logps/chosen": -119.8138656616211, "logps/rejected": -156.4137725830078, "loss": 0.0734, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.0532338619232178, "rewards/margins": 5.62264347076416, "rewards/rejected": -8.675877571105957, "step": 300 }, { "epoch": 0.14, "eval_logits/chosen": -2.933035373687744, "eval_logits/rejected": -2.90442156791687, "eval_logps/chosen": -113.29481506347656, "eval_logps/rejected": -142.68540954589844, "eval_loss": 0.07679181545972824, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -2.2057607173919678, "eval_rewards/margins": 5.05232048034668, "eval_rewards/rejected": -7.258080959320068, "eval_runtime": 47.4353, "eval_samples_per_second": 60.335, "eval_steps_per_second": 1.897, "step": 300 }, { "epoch": 0.14, "learning_rate": 2.3556231003039513e-07, "logits/chosen": -2.80320405960083, "logits/rejected": -2.8362550735473633, "logps/chosen": -113.18946838378906, "logps/rejected": -147.78456115722656, "loss": 0.0763, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.022726058959961, "rewards/margins": 5.786845684051514, "rewards/rejected": -7.809571743011475, "step": 310 }, { "epoch": 0.15, "learning_rate": 2.43161094224924e-07, "logits/chosen": -2.795830488204956, "logits/rejected": -2.808887004852295, "logps/chosen": -118.27098083496094, "logps/rejected": -154.96951293945312, "loss": 0.0719, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.9285690784454346, "rewards/margins": 5.742952823638916, "rewards/rejected": -8.67152214050293, "step": 320 }, { "epoch": 0.15, "learning_rate": 2.507598784194529e-07, "logits/chosen": -2.8046109676361084, "logits/rejected": -2.817551612854004, "logps/chosen": -118.660888671875, "logps/rejected": -157.84669494628906, "loss": 0.0757, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.268319606781006, "rewards/margins": 6.382529258728027, "rewards/rejected": -8.650848388671875, "step": 330 }, { "epoch": 0.16, "learning_rate": 2.5835866261398174e-07, "logits/chosen": -2.766819477081299, "logits/rejected": -2.7905983924865723, "logps/chosen": -114.24540710449219, "logps/rejected": -155.21456909179688, "loss": 0.0639, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.6502342224121094, "rewards/margins": 6.039946556091309, "rewards/rejected": -8.690180778503418, "step": 340 }, { "epoch": 0.16, "learning_rate": 2.659574468085106e-07, "logits/chosen": -2.7104740142822266, "logits/rejected": -2.7129950523376465, "logps/chosen": -118.3838882446289, "logps/rejected": -163.64157104492188, "loss": 0.0714, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.7546608448028564, "rewards/margins": 6.270176887512207, "rewards/rejected": -9.024836540222168, "step": 350 }, { "epoch": 0.16, "learning_rate": 2.735562310030395e-07, "logits/chosen": -2.6810834407806396, "logits/rejected": -2.6747491359710693, "logps/chosen": -117.26749420166016, "logps/rejected": -165.12850952148438, "loss": 0.0661, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -2.4232752323150635, "rewards/margins": 7.114753723144531, "rewards/rejected": -9.538028717041016, "step": 360 }, { "epoch": 0.17, "learning_rate": 2.811550151975684e-07, "logits/chosen": -2.7305426597595215, "logits/rejected": -2.7588391304016113, "logps/chosen": -119.80528259277344, "logps/rejected": -160.10391235351562, "loss": 0.0738, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.611847400665283, "rewards/margins": 5.952840328216553, "rewards/rejected": -8.564687728881836, "step": 370 }, { "epoch": 0.17, "learning_rate": 2.887537993920973e-07, "logits/chosen": -2.700817584991455, "logits/rejected": -2.7069475650787354, "logps/chosen": -127.48515319824219, "logps/rejected": -173.31446838378906, "loss": 0.0615, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6581039428710938, "rewards/margins": 6.759530544281006, "rewards/rejected": -10.417635917663574, "step": 380 }, { "epoch": 0.18, "learning_rate": 2.9635258358662614e-07, "logits/chosen": -2.72932767868042, "logits/rejected": -2.7426867485046387, "logps/chosen": -128.69351196289062, "logps/rejected": -170.44024658203125, "loss": 0.0596, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.062074899673462, "rewards/margins": 6.59927225112915, "rewards/rejected": -9.661347389221191, "step": 390 }, { "epoch": 0.18, "learning_rate": 3.03951367781155e-07, "logits/chosen": -2.737363815307617, "logits/rejected": -2.728015184402466, "logps/chosen": -118.87040710449219, "logps/rejected": -177.7161102294922, "loss": 0.0587, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.8841772079467773, "rewards/margins": 7.693885803222656, "rewards/rejected": -10.57806396484375, "step": 400 }, { "epoch": 0.18, "eval_logits/chosen": -2.727881669998169, "eval_logits/rejected": -2.6966190338134766, "eval_logps/chosen": -131.62547302246094, "eval_logps/rejected": -187.50515747070312, "eval_loss": 0.055923737585544586, "eval_rewards/accuracies": 0.9694444537162781, "eval_rewards/chosen": -4.0388264656066895, "eval_rewards/margins": 7.701231479644775, "eval_rewards/rejected": -11.740057945251465, "eval_runtime": 49.6576, "eval_samples_per_second": 57.635, "eval_steps_per_second": 1.812, "step": 400 }, { "epoch": 0.19, "learning_rate": 3.1155015197568383e-07, "logits/chosen": -2.699958086013794, "logits/rejected": -2.672335624694824, "logps/chosen": -121.584716796875, "logps/rejected": -185.4914093017578, "loss": 0.0693, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.9695181846618652, "rewards/margins": 8.059054374694824, "rewards/rejected": -11.028572082519531, "step": 410 }, { "epoch": 0.19, "learning_rate": 3.1914893617021275e-07, "logits/chosen": -2.724929094314575, "logits/rejected": -2.737008810043335, "logps/chosen": -131.0947265625, "logps/rejected": -174.7019805908203, "loss": 0.0579, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.8909332752227783, "rewards/margins": 6.2964396476745605, "rewards/rejected": -10.187372207641602, "step": 420 }, { "epoch": 0.2, "learning_rate": 3.267477203647416e-07, "logits/chosen": -2.742372512817383, "logits/rejected": -2.7384090423583984, "logps/chosen": -120.4808120727539, "logps/rejected": -170.55133056640625, "loss": 0.0618, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.03888201713562, "rewards/margins": 6.642469882965088, "rewards/rejected": -9.681352615356445, "step": 430 }, { "epoch": 0.2, "learning_rate": 3.343465045592705e-07, "logits/chosen": -2.6667733192443848, "logits/rejected": -2.596818447113037, "logps/chosen": -120.44290924072266, "logps/rejected": -168.67884826660156, "loss": 0.0558, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.524181842803955, "rewards/margins": 7.524356842041016, "rewards/rejected": -10.048540115356445, "step": 440 }, { "epoch": 0.21, "learning_rate": 3.4194528875379936e-07, "logits/chosen": -2.6612601280212402, "logits/rejected": -2.640688180923462, "logps/chosen": -136.09475708007812, "logps/rejected": -195.4031982421875, "loss": 0.0597, "rewards/accuracies": 1.0, "rewards/chosen": -4.248106002807617, "rewards/margins": 8.086824417114258, "rewards/rejected": -12.334931373596191, "step": 450 }, { "epoch": 0.21, "learning_rate": 3.495440729483283e-07, "logits/chosen": -2.6613001823425293, "logits/rejected": -2.640634298324585, "logps/chosen": -143.10586547851562, "logps/rejected": -187.25064086914062, "loss": 0.0674, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.273663520812988, "rewards/margins": 7.212340354919434, "rewards/rejected": -11.486001968383789, "step": 460 }, { "epoch": 0.21, "learning_rate": 3.5714285714285716e-07, "logits/chosen": -2.694119453430176, "logits/rejected": -2.6654534339904785, "logps/chosen": -135.16006469726562, "logps/rejected": -191.01541137695312, "loss": 0.0554, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.870119571685791, "rewards/margins": 8.046853065490723, "rewards/rejected": -11.916971206665039, "step": 470 }, { "epoch": 0.22, "learning_rate": 3.64741641337386e-07, "logits/chosen": -2.6668903827667236, "logits/rejected": -2.668224334716797, "logps/chosen": -135.64004516601562, "logps/rejected": -191.2487335205078, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/chosen": -3.9338130950927734, "rewards/margins": 8.0016450881958, "rewards/rejected": -11.935457229614258, "step": 480 }, { "epoch": 0.22, "learning_rate": 3.7234042553191484e-07, "logits/chosen": -2.7070086002349854, "logits/rejected": -2.6857943534851074, "logps/chosen": -123.0178451538086, "logps/rejected": -179.15554809570312, "loss": 0.0499, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.212670087814331, "rewards/margins": 7.310342311859131, "rewards/rejected": -10.5230131149292, "step": 490 }, { "epoch": 0.23, "learning_rate": 3.7993920972644377e-07, "logits/chosen": -2.6694531440734863, "logits/rejected": -2.672365665435791, "logps/chosen": -126.01292419433594, "logps/rejected": -187.0902557373047, "loss": 0.0379, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.865762710571289, "rewards/margins": 7.9018754959106445, "rewards/rejected": -11.767638206481934, "step": 500 }, { "epoch": 0.23, "eval_logits/chosen": -2.701693296432495, "eval_logits/rejected": -2.6612892150878906, "eval_logps/chosen": -130.73841857910156, "eval_logps/rejected": -192.8392791748047, "eval_loss": 0.04595184698700905, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -3.950122117996216, "eval_rewards/margins": 8.323347091674805, "eval_rewards/rejected": -12.273469924926758, "eval_runtime": 50.7966, "eval_samples_per_second": 56.342, "eval_steps_per_second": 1.772, "step": 500 }, { "epoch": 0.23, "learning_rate": 3.8753799392097264e-07, "logits/chosen": -2.6502652168273926, "logits/rejected": -2.642637014389038, "logps/chosen": -124.89344787597656, "logps/rejected": -187.1622314453125, "loss": 0.0445, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -3.712517499923706, "rewards/margins": 7.8403496742248535, "rewards/rejected": -11.55286693572998, "step": 510 }, { "epoch": 0.24, "learning_rate": 3.951367781155015e-07, "logits/chosen": -2.6770434379577637, "logits/rejected": -2.6957175731658936, "logps/chosen": -135.3203125, "logps/rejected": -196.0937957763672, "loss": 0.041, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.613642454147339, "rewards/margins": 8.412160873413086, "rewards/rejected": -12.025801658630371, "step": 520 }, { "epoch": 0.24, "learning_rate": 4.027355623100304e-07, "logits/chosen": -2.6510725021362305, "logits/rejected": -2.675741195678711, "logps/chosen": -118.3866958618164, "logps/rejected": -188.92434692382812, "loss": 0.0476, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.810616970062256, "rewards/margins": 9.09221076965332, "rewards/rejected": -11.902826309204102, "step": 530 }, { "epoch": 0.25, "learning_rate": 4.1033434650455925e-07, "logits/chosen": -2.644740581512451, "logits/rejected": -2.6704249382019043, "logps/chosen": -132.53656005859375, "logps/rejected": -188.7894287109375, "loss": 0.0484, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.122117042541504, "rewards/margins": 7.6324262619018555, "rewards/rejected": -11.754544258117676, "step": 540 }, { "epoch": 0.25, "learning_rate": 4.179331306990881e-07, "logits/chosen": -2.683001756668091, "logits/rejected": -2.6826956272125244, "logps/chosen": -139.59835815429688, "logps/rejected": -194.3646240234375, "loss": 0.037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.069397926330566, "rewards/margins": 8.341604232788086, "rewards/rejected": -12.411002159118652, "step": 550 }, { "epoch": 0.26, "learning_rate": 4.25531914893617e-07, "logits/chosen": -2.7073171138763428, "logits/rejected": -2.7193078994750977, "logps/chosen": -121.34649658203125, "logps/rejected": -194.57553100585938, "loss": 0.0433, "rewards/accuracies": 1.0, "rewards/chosen": -2.9804375171661377, "rewards/margins": 9.331927299499512, "rewards/rejected": -12.312365531921387, "step": 560 }, { "epoch": 0.26, "learning_rate": 4.3313069908814586e-07, "logits/chosen": -2.630943775177002, "logits/rejected": -2.6553235054016113, "logps/chosen": -128.59909057617188, "logps/rejected": -181.20632934570312, "loss": 0.0505, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.6925315856933594, "rewards/margins": 7.2038726806640625, "rewards/rejected": -10.896404266357422, "step": 570 }, { "epoch": 0.26, "learning_rate": 4.4072948328267473e-07, "logits/chosen": -2.540254831314087, "logits/rejected": -2.5618560314178467, "logps/chosen": -124.41963195800781, "logps/rejected": -184.33001708984375, "loss": 0.0367, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6125075817108154, "rewards/margins": 8.0702543258667, "rewards/rejected": -11.682764053344727, "step": 580 }, { "epoch": 0.27, "learning_rate": 4.4832826747720365e-07, "logits/chosen": -2.605494260787964, "logits/rejected": -2.5571651458740234, "logps/chosen": -130.73892211914062, "logps/rejected": -192.65866088867188, "loss": 0.0363, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.696895122528076, "rewards/margins": 8.656723976135254, "rewards/rejected": -12.353619575500488, "step": 590 }, { "epoch": 0.27, "learning_rate": 4.559270516717325e-07, "logits/chosen": -2.52860426902771, "logits/rejected": -2.4886059761047363, "logps/chosen": -126.6506118774414, "logps/rejected": -203.23599243164062, "loss": 0.0394, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.3456413745880127, "rewards/margins": 9.531149864196777, "rewards/rejected": -12.876792907714844, "step": 600 }, { "epoch": 0.27, "eval_logits/chosen": -2.478564739227295, "eval_logits/rejected": -2.390133857727051, "eval_logps/chosen": -139.76361083984375, "eval_logps/rejected": -215.0513916015625, "eval_loss": 0.04050706699490547, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -4.85264253616333, "eval_rewards/margins": 9.642037391662598, "eval_rewards/rejected": -14.494680404663086, "eval_runtime": 50.1674, "eval_samples_per_second": 57.049, "eval_steps_per_second": 1.794, "step": 600 }, { "epoch": 0.28, "learning_rate": 4.635258358662614e-07, "logits/chosen": -2.534883975982666, "logits/rejected": -2.4874258041381836, "logps/chosen": -131.05517578125, "logps/rejected": -187.19418334960938, "loss": 0.0537, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.8685455322265625, "rewards/margins": 7.740345001220703, "rewards/rejected": -11.60888957977295, "step": 610 }, { "epoch": 0.28, "learning_rate": 4.7112462006079026e-07, "logits/chosen": -2.5169684886932373, "logits/rejected": -2.4521279335021973, "logps/chosen": -126.732421875, "logps/rejected": -196.65960693359375, "loss": 0.0424, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.4762802124023438, "rewards/margins": 9.320673942565918, "rewards/rejected": -12.796956062316895, "step": 620 }, { "epoch": 0.29, "learning_rate": 4.787234042553192e-07, "logits/chosen": -2.539543867111206, "logits/rejected": -2.4839327335357666, "logps/chosen": -130.13665771484375, "logps/rejected": -192.86544799804688, "loss": 0.0569, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.813121795654297, "rewards/margins": 7.917851448059082, "rewards/rejected": -11.730974197387695, "step": 630 }, { "epoch": 0.29, "learning_rate": 4.86322188449848e-07, "logits/chosen": -2.5205817222595215, "logits/rejected": -2.4679293632507324, "logps/chosen": -114.6985092163086, "logps/rejected": -168.59475708007812, "loss": 0.0509, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.7615535259246826, "rewards/margins": 6.972909450531006, "rewards/rejected": -9.73446273803711, "step": 640 }, { "epoch": 0.3, "learning_rate": 4.939209726443769e-07, "logits/chosen": -2.5492513179779053, "logits/rejected": -2.473130941390991, "logps/chosen": -136.21096801757812, "logps/rejected": -197.6907958984375, "loss": 0.0363, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.211759090423584, "rewards/margins": 9.240852355957031, "rewards/rejected": -12.452611923217773, "step": 650 }, { "epoch": 0.3, "learning_rate": 4.998309382924767e-07, "logits/chosen": -2.629535675048828, "logits/rejected": -2.5611090660095215, "logps/chosen": -122.98139953613281, "logps/rejected": -183.54830932617188, "loss": 0.0477, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.009155750274658, "rewards/margins": 8.353796005249023, "rewards/rejected": -11.362951278686523, "step": 660 }, { "epoch": 0.31, "learning_rate": 4.989856297548605e-07, "logits/chosen": -2.580986738204956, "logits/rejected": -2.527766704559326, "logps/chosen": -131.6427001953125, "logps/rejected": -198.3678741455078, "loss": 0.0281, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6659603118896484, "rewards/margins": 8.929367065429688, "rewards/rejected": -12.59532642364502, "step": 670 }, { "epoch": 0.31, "learning_rate": 4.981403212172442e-07, "logits/chosen": -2.6364097595214844, "logits/rejected": -2.56335186958313, "logps/chosen": -131.08419799804688, "logps/rejected": -209.58523559570312, "loss": 0.0257, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.2999167442321777, "rewards/margins": 10.14362621307373, "rewards/rejected": -13.44354248046875, "step": 680 }, { "epoch": 0.31, "learning_rate": 4.97295012679628e-07, "logits/chosen": -2.5585570335388184, "logits/rejected": -2.560281276702881, "logps/chosen": -135.70570373535156, "logps/rejected": -207.83718872070312, "loss": 0.0497, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -4.453864097595215, "rewards/margins": 9.309991836547852, "rewards/rejected": -13.76385498046875, "step": 690 }, { "epoch": 0.32, "learning_rate": 4.964497041420119e-07, "logits/chosen": -2.6161398887634277, "logits/rejected": -2.57515811920166, "logps/chosen": -128.3286895751953, "logps/rejected": -198.37640380859375, "loss": 0.0375, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.2940287590026855, "rewards/margins": 9.220344543457031, "rewards/rejected": -12.514373779296875, "step": 700 }, { "epoch": 0.32, "eval_logits/chosen": -2.714183807373047, "eval_logits/rejected": -2.6707613468170166, "eval_logps/chosen": -128.3318634033203, "eval_logps/rejected": -196.30885314941406, "eval_loss": 0.037593573331832886, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -3.709465980529785, "eval_rewards/margins": 8.910959243774414, "eval_rewards/rejected": -12.6204252243042, "eval_runtime": 48.2423, "eval_samples_per_second": 59.326, "eval_steps_per_second": 1.866, "step": 700 }, { "epoch": 0.32, "learning_rate": 4.956043956043956e-07, "logits/chosen": -2.620093822479248, "logits/rejected": -2.5982906818389893, "logps/chosen": -123.15608978271484, "logps/rejected": -197.61636352539062, "loss": 0.0404, "rewards/accuracies": 1.0, "rewards/chosen": -3.266045331954956, "rewards/margins": 9.135406494140625, "rewards/rejected": -12.401453018188477, "step": 710 }, { "epoch": 0.33, "learning_rate": 4.947590870667794e-07, "logits/chosen": -2.595463275909424, "logits/rejected": -2.5666444301605225, "logps/chosen": -125.81497955322266, "logps/rejected": -189.8935546875, "loss": 0.0391, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.0596022605895996, "rewards/margins": 8.901065826416016, "rewards/rejected": -11.960668563842773, "step": 720 }, { "epoch": 0.33, "learning_rate": 4.939137785291631e-07, "logits/chosen": -2.632361650466919, "logits/rejected": -2.584230899810791, "logps/chosen": -112.73439025878906, "logps/rejected": -181.985595703125, "loss": 0.0396, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.658551812171936, "rewards/margins": 9.849242210388184, "rewards/rejected": -11.507793426513672, "step": 730 }, { "epoch": 0.34, "learning_rate": 4.930684699915469e-07, "logits/chosen": -2.561582565307617, "logits/rejected": -2.539332151412964, "logps/chosen": -106.41217041015625, "logps/rejected": -188.4742889404297, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": -2.267642021179199, "rewards/margins": 9.631690979003906, "rewards/rejected": -11.899332046508789, "step": 740 }, { "epoch": 0.34, "learning_rate": 4.922231614539306e-07, "logits/chosen": -2.5166029930114746, "logits/rejected": -2.4404571056365967, "logps/chosen": -111.54460144042969, "logps/rejected": -179.897216796875, "loss": 0.047, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.5172572135925293, "rewards/margins": 8.489733695983887, "rewards/rejected": -11.006990432739258, "step": 750 }, { "epoch": 0.35, "learning_rate": 4.913778529163144e-07, "logits/chosen": -2.363140106201172, "logits/rejected": -2.2299087047576904, "logps/chosen": -137.92788696289062, "logps/rejected": -216.5609893798828, "loss": 0.0331, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.3311238288879395, "rewards/margins": 9.85867691040039, "rewards/rejected": -14.189801216125488, "step": 760 }, { "epoch": 0.35, "learning_rate": 4.905325443786982e-07, "logits/chosen": -2.3814444541931152, "logits/rejected": -2.3055179119110107, "logps/chosen": -131.71412658691406, "logps/rejected": -202.88714599609375, "loss": 0.0316, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -4.11533260345459, "rewards/margins": 9.29047679901123, "rewards/rejected": -13.405810356140137, "step": 770 }, { "epoch": 0.36, "learning_rate": 4.896872358410819e-07, "logits/chosen": -2.4633522033691406, "logits/rejected": -2.408992290496826, "logps/chosen": -125.52821350097656, "logps/rejected": -182.40823364257812, "loss": 0.0554, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.523876667022705, "rewards/margins": 7.773106575012207, "rewards/rejected": -11.296982765197754, "step": 780 }, { "epoch": 0.36, "learning_rate": 4.888419273034658e-07, "logits/chosen": -2.465280055999756, "logits/rejected": -2.4070966243743896, "logps/chosen": -126.39984130859375, "logps/rejected": -201.7521514892578, "loss": 0.0206, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4645187854766846, "rewards/margins": 9.297203063964844, "rewards/rejected": -12.76172161102295, "step": 790 }, { "epoch": 0.37, "learning_rate": 4.879966187658495e-07, "logits/chosen": -2.2858943939208984, "logits/rejected": -2.219574451446533, "logps/chosen": -141.95816040039062, "logps/rejected": -227.12216186523438, "loss": 0.043, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.920865058898926, "rewards/margins": 10.735105514526367, "rewards/rejected": -15.655969619750977, "step": 800 }, { "epoch": 0.37, "eval_logits/chosen": -2.282928705215454, "eval_logits/rejected": -2.165402889251709, "eval_logps/chosen": -143.88894653320312, "eval_logps/rejected": -218.6447296142578, "eval_loss": 0.03752221167087555, "eval_rewards/accuracies": 0.9694444537162781, "eval_rewards/chosen": -5.26517391204834, "eval_rewards/margins": 9.588841438293457, "eval_rewards/rejected": -14.85401439666748, "eval_runtime": 49.0421, "eval_samples_per_second": 58.358, "eval_steps_per_second": 1.835, "step": 800 }, { "epoch": 0.37, "learning_rate": 4.871513102282333e-07, "logits/chosen": -2.3610994815826416, "logits/rejected": -2.256997585296631, "logps/chosen": -133.1490478515625, "logps/rejected": -206.186279296875, "loss": 0.0394, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.193153381347656, "rewards/margins": 9.370244026184082, "rewards/rejected": -13.563395500183105, "step": 810 }, { "epoch": 0.37, "learning_rate": 4.863060016906171e-07, "logits/chosen": -2.439469814300537, "logits/rejected": -2.413252115249634, "logps/chosen": -144.38931274414062, "logps/rejected": -226.8816680908203, "loss": 0.027, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.668347358703613, "rewards/margins": 10.250940322875977, "rewards/rejected": -15.919286727905273, "step": 820 }, { "epoch": 0.38, "learning_rate": 4.854606931530008e-07, "logits/chosen": -2.410789728164673, "logits/rejected": -2.3912742137908936, "logps/chosen": -149.9718017578125, "logps/rejected": -236.6019287109375, "loss": 0.0313, "rewards/accuracies": 0.9375, "rewards/chosen": -6.080855846405029, "rewards/margins": 10.09882926940918, "rewards/rejected": -16.179683685302734, "step": 830 }, { "epoch": 0.38, "learning_rate": 4.846153846153846e-07, "logits/chosen": -2.477858066558838, "logits/rejected": -2.388430118560791, "logps/chosen": -127.3630142211914, "logps/rejected": -198.08287048339844, "loss": 0.0423, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.9977195262908936, "rewards/margins": 9.791177749633789, "rewards/rejected": -12.788896560668945, "step": 840 }, { "epoch": 0.39, "learning_rate": 4.837700760777683e-07, "logits/chosen": -2.3127739429473877, "logits/rejected": -2.2208588123321533, "logps/chosen": -137.19290161132812, "logps/rejected": -220.7669677734375, "loss": 0.0374, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.068154811859131, "rewards/margins": 9.750088691711426, "rewards/rejected": -14.818242073059082, "step": 850 }, { "epoch": 0.39, "learning_rate": 4.829247675401522e-07, "logits/chosen": -2.288473606109619, "logits/rejected": -2.130735158920288, "logps/chosen": -147.62582397460938, "logps/rejected": -239.2997589111328, "loss": 0.0348, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.74985408782959, "rewards/margins": 11.903969764709473, "rewards/rejected": -16.653823852539062, "step": 860 }, { "epoch": 0.4, "learning_rate": 4.820794590025358e-07, "logits/chosen": -2.256855010986328, "logits/rejected": -2.172144651412964, "logps/chosen": -141.7345733642578, "logps/rejected": -226.32278442382812, "loss": 0.0351, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.722945213317871, "rewards/margins": 10.656365394592285, "rewards/rejected": -15.379310607910156, "step": 870 }, { "epoch": 0.4, "learning_rate": 4.812341504649197e-07, "logits/chosen": -2.3097877502441406, "logits/rejected": -2.2116963863372803, "logps/chosen": -128.104248046875, "logps/rejected": -195.17971801757812, "loss": 0.0512, "rewards/accuracies": 1.0, "rewards/chosen": -2.658078193664551, "rewards/margins": 9.50889778137207, "rewards/rejected": -12.166976928710938, "step": 880 }, { "epoch": 0.41, "learning_rate": 4.803888419273035e-07, "logits/chosen": -2.209960460662842, "logits/rejected": -2.1225123405456543, "logps/chosen": -127.14057922363281, "logps/rejected": -200.28134155273438, "loss": 0.0407, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.6977546215057373, "rewards/margins": 9.140750885009766, "rewards/rejected": -12.838505744934082, "step": 890 }, { "epoch": 0.41, "learning_rate": 4.795435333896872e-07, "logits/chosen": -2.12211275100708, "logits/rejected": -2.003075361251831, "logps/chosen": -134.07156372070312, "logps/rejected": -211.1444549560547, "loss": 0.0304, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.154332160949707, "rewards/margins": 9.661005973815918, "rewards/rejected": -13.815338134765625, "step": 900 }, { "epoch": 0.41, "eval_logits/chosen": -2.201496124267578, "eval_logits/rejected": -2.1041057109832764, "eval_logps/chosen": -133.43316650390625, "eval_logps/rejected": -207.507080078125, "eval_loss": 0.0380096472799778, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -4.219595432281494, "eval_rewards/margins": 9.520654678344727, "eval_rewards/rejected": -13.740249633789062, "eval_runtime": 48.7922, "eval_samples_per_second": 58.657, "eval_steps_per_second": 1.845, "step": 900 }, { "epoch": 0.42, "learning_rate": 4.78698224852071e-07, "logits/chosen": -2.263471841812134, "logits/rejected": -2.182316303253174, "logps/chosen": -127.5316162109375, "logps/rejected": -203.9865264892578, "loss": 0.0425, "rewards/accuracies": 1.0, "rewards/chosen": -3.45963978767395, "rewards/margins": 9.635172843933105, "rewards/rejected": -13.094813346862793, "step": 910 }, { "epoch": 0.42, "learning_rate": 4.778529163144547e-07, "logits/chosen": -2.2711265087127686, "logits/rejected": -2.161175489425659, "logps/chosen": -135.1485137939453, "logps/rejected": -213.77218627929688, "loss": 0.0296, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.044560432434082, "rewards/margins": 10.215932846069336, "rewards/rejected": -14.260492324829102, "step": 920 }, { "epoch": 0.42, "learning_rate": 4.770076077768385e-07, "logits/chosen": -2.209508180618286, "logits/rejected": -2.0596439838409424, "logps/chosen": -139.1815948486328, "logps/rejected": -226.0141143798828, "loss": 0.0457, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.9361088275909424, "rewards/margins": 11.679204940795898, "rewards/rejected": -15.615313529968262, "step": 930 }, { "epoch": 0.43, "learning_rate": 4.761622992392223e-07, "logits/chosen": -2.2528536319732666, "logits/rejected": -2.1429479122161865, "logps/chosen": -134.35842895507812, "logps/rejected": -205.593994140625, "loss": 0.0432, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7717106342315674, "rewards/margins": 9.324674606323242, "rewards/rejected": -13.096386909484863, "step": 940 }, { "epoch": 0.43, "learning_rate": 4.7531699070160606e-07, "logits/chosen": -2.2223353385925293, "logits/rejected": -2.1092305183410645, "logps/chosen": -128.02865600585938, "logps/rejected": -215.7021484375, "loss": 0.032, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7584633827209473, "rewards/margins": 10.545815467834473, "rewards/rejected": -14.304278373718262, "step": 950 }, { "epoch": 0.44, "learning_rate": 4.7447168216398987e-07, "logits/chosen": -2.3026297092437744, "logits/rejected": -2.2772367000579834, "logps/chosen": -122.90687561035156, "logps/rejected": -187.3572998046875, "loss": 0.0507, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.7807776927948, "rewards/margins": 8.745773315429688, "rewards/rejected": -11.526552200317383, "step": 960 }, { "epoch": 0.44, "learning_rate": 4.7362637362637357e-07, "logits/chosen": -2.32552170753479, "logits/rejected": -2.2577829360961914, "logps/chosen": -131.49630737304688, "logps/rejected": -190.88980102539062, "loss": 0.0398, "rewards/accuracies": 1.0, "rewards/chosen": -4.0015668869018555, "rewards/margins": 8.384259223937988, "rewards/rejected": -12.385825157165527, "step": 970 }, { "epoch": 0.45, "learning_rate": 4.727810650887574e-07, "logits/chosen": -2.3235411643981934, "logits/rejected": -2.289680004119873, "logps/chosen": -123.10699462890625, "logps/rejected": -198.24154663085938, "loss": 0.0344, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.2337048053741455, "rewards/margins": 9.35986328125, "rewards/rejected": -12.593567848205566, "step": 980 }, { "epoch": 0.45, "learning_rate": 4.7193575655114114e-07, "logits/chosen": -2.24157977104187, "logits/rejected": -2.1801581382751465, "logps/chosen": -118.62461853027344, "logps/rejected": -207.7017059326172, "loss": 0.0297, "rewards/accuracies": 1.0, "rewards/chosen": -3.0132336616516113, "rewards/margins": 10.910429954528809, "rewards/rejected": -13.923663139343262, "step": 990 }, { "epoch": 0.46, "learning_rate": 4.7109044801352495e-07, "logits/chosen": -2.2588751316070557, "logits/rejected": -2.164956569671631, "logps/chosen": -125.435546875, "logps/rejected": -215.0337371826172, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -3.2047088146209717, "rewards/margins": 11.243847846984863, "rewards/rejected": -14.44855785369873, "step": 1000 }, { "epoch": 0.46, "eval_logits/chosen": -2.1849193572998047, "eval_logits/rejected": -2.0859615802764893, "eval_logps/chosen": -140.97811889648438, "eval_logps/rejected": -231.99063110351562, "eval_loss": 0.03243358060717583, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/chosen": -4.974091529846191, "eval_rewards/margins": 11.21451187133789, "eval_rewards/rejected": -16.1886043548584, "eval_runtime": 48.3459, "eval_samples_per_second": 59.198, "eval_steps_per_second": 1.862, "step": 1000 }, { "epoch": 0.46, "learning_rate": 4.7024513947590865e-07, "logits/chosen": -2.1504714488983154, "logits/rejected": -2.056974411010742, "logps/chosen": -134.57516479492188, "logps/rejected": -228.6068878173828, "loss": 0.0434, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -4.5758376121521, "rewards/margins": 11.010690689086914, "rewards/rejected": -15.586526870727539, "step": 1010 }, { "epoch": 0.47, "learning_rate": 4.6939983093829246e-07, "logits/chosen": -2.0870413780212402, "logits/rejected": -2.02657151222229, "logps/chosen": -139.79901123046875, "logps/rejected": -235.48812866210938, "loss": 0.0333, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.881598949432373, "rewards/margins": 11.311357498168945, "rewards/rejected": -16.192956924438477, "step": 1020 }, { "epoch": 0.47, "learning_rate": 4.685545224006762e-07, "logits/chosen": -2.180068016052246, "logits/rejected": -2.1508913040161133, "logps/chosen": -133.02784729003906, "logps/rejected": -224.31314086914062, "loss": 0.0336, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.727643013000488, "rewards/margins": 10.623067855834961, "rewards/rejected": -15.35071086883545, "step": 1030 }, { "epoch": 0.47, "learning_rate": 4.6770921386306003e-07, "logits/chosen": -2.2807860374450684, "logits/rejected": -2.279236316680908, "logps/chosen": -123.20567321777344, "logps/rejected": -196.570068359375, "loss": 0.0382, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.346115827560425, "rewards/margins": 10.093769073486328, "rewards/rejected": -12.4398832321167, "step": 1040 }, { "epoch": 0.48, "learning_rate": 4.668639053254438e-07, "logits/chosen": -2.246729850769043, "logits/rejected": -2.1844513416290283, "logps/chosen": -132.6374969482422, "logps/rejected": -225.3860321044922, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -3.7644131183624268, "rewards/margins": 11.513562202453613, "rewards/rejected": -15.277974128723145, "step": 1050 }, { "epoch": 0.48, "learning_rate": 4.660185967878275e-07, "logits/chosen": -2.301955223083496, "logits/rejected": -2.2792880535125732, "logps/chosen": -125.2342758178711, "logps/rejected": -197.12930297851562, "loss": 0.0469, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.531769275665283, "rewards/margins": 9.205384254455566, "rewards/rejected": -12.737154006958008, "step": 1060 }, { "epoch": 0.49, "learning_rate": 4.651732882502113e-07, "logits/chosen": -2.2779886722564697, "logits/rejected": -2.2681050300598145, "logps/chosen": -126.11590576171875, "logps/rejected": -211.461181640625, "loss": 0.0292, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.280942440032959, "rewards/margins": 10.733200073242188, "rewards/rejected": -14.014142990112305, "step": 1070 }, { "epoch": 0.49, "learning_rate": 4.6432797971259506e-07, "logits/chosen": -2.2388012409210205, "logits/rejected": -2.249535083770752, "logps/chosen": -120.04434967041016, "logps/rejected": -208.3643341064453, "loss": 0.0316, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.521498203277588, "rewards/margins": 11.035587310791016, "rewards/rejected": -13.557085037231445, "step": 1080 }, { "epoch": 0.5, "learning_rate": 4.6348267117497887e-07, "logits/chosen": -2.1909823417663574, "logits/rejected": -2.1376616954803467, "logps/chosen": -125.14691162109375, "logps/rejected": -222.58547973632812, "loss": 0.0212, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6583189964294434, "rewards/margins": 11.045095443725586, "rewards/rejected": -14.703417778015137, "step": 1090 }, { "epoch": 0.5, "learning_rate": 4.626373626373626e-07, "logits/chosen": -2.2102527618408203, "logits/rejected": -2.206092119216919, "logps/chosen": -128.36289978027344, "logps/rejected": -216.8212432861328, "loss": 0.03, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.7434325218200684, "rewards/margins": 10.838982582092285, "rewards/rejected": -14.582415580749512, "step": 1100 }, { "epoch": 0.5, "eval_logits/chosen": -2.174874782562256, "eval_logits/rejected": -2.11321759223938, "eval_logps/chosen": -142.76707458496094, "eval_logps/rejected": -234.18118286132812, "eval_loss": 0.03423836827278137, "eval_rewards/accuracies": 0.9666666388511658, "eval_rewards/chosen": -5.152987480163574, "eval_rewards/margins": 11.254671096801758, "eval_rewards/rejected": -16.40765953063965, "eval_runtime": 48.9677, "eval_samples_per_second": 58.447, "eval_steps_per_second": 1.838, "step": 1100 }, { "epoch": 0.51, "learning_rate": 4.617920540997464e-07, "logits/chosen": -2.216888904571533, "logits/rejected": -2.1993260383605957, "logps/chosen": -136.57183837890625, "logps/rejected": -238.6022491455078, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/chosen": -3.9422149658203125, "rewards/margins": 12.420379638671875, "rewards/rejected": -16.362594604492188, "step": 1110 }, { "epoch": 0.51, "learning_rate": 4.6094674556213014e-07, "logits/chosen": -2.2214226722717285, "logits/rejected": -2.162144660949707, "logps/chosen": -121.65068054199219, "logps/rejected": -201.74705505371094, "loss": 0.0358, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.384767532348633, "rewards/margins": 10.979083061218262, "rewards/rejected": -13.363850593566895, "step": 1120 }, { "epoch": 0.52, "learning_rate": 4.6010143702451395e-07, "logits/chosen": -2.278418779373169, "logits/rejected": -2.2508838176727295, "logps/chosen": -119.28004455566406, "logps/rejected": -186.50210571289062, "loss": 0.0381, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -2.392752170562744, "rewards/margins": 8.599018096923828, "rewards/rejected": -10.991769790649414, "step": 1130 }, { "epoch": 0.52, "learning_rate": 4.592561284868977e-07, "logits/chosen": -2.179384469985962, "logits/rejected": -2.1243185997009277, "logps/chosen": -121.19415283203125, "logps/rejected": -204.47756958007812, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/chosen": -3.072291374206543, "rewards/margins": 10.187277793884277, "rewards/rejected": -13.25956916809082, "step": 1140 }, { "epoch": 0.52, "learning_rate": 4.584108199492815e-07, "logits/chosen": -2.1386260986328125, "logits/rejected": -2.0709643363952637, "logps/chosen": -129.10621643066406, "logps/rejected": -223.4142608642578, "loss": 0.0164, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.289857387542725, "rewards/margins": 11.014361381530762, "rewards/rejected": -15.304219245910645, "step": 1150 }, { "epoch": 0.53, "learning_rate": 4.575655114116652e-07, "logits/chosen": -2.069509744644165, "logits/rejected": -1.9732223749160767, "logps/chosen": -141.01705932617188, "logps/rejected": -229.3037109375, "loss": 0.0345, "rewards/accuracies": 1.0, "rewards/chosen": -4.0492401123046875, "rewards/margins": 11.49431324005127, "rewards/rejected": -15.543553352355957, "step": 1160 }, { "epoch": 0.53, "learning_rate": 4.56720202874049e-07, "logits/chosen": -2.0551540851593018, "logits/rejected": -1.969403862953186, "logps/chosen": -128.4068603515625, "logps/rejected": -243.1908416748047, "loss": 0.0324, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.475301742553711, "rewards/margins": 13.750211715698242, "rewards/rejected": -17.225513458251953, "step": 1170 }, { "epoch": 0.54, "learning_rate": 4.558748943364328e-07, "logits/chosen": -1.974999189376831, "logits/rejected": -1.8650972843170166, "logps/chosen": -133.85598754882812, "logps/rejected": -262.3040466308594, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -5.252058506011963, "rewards/margins": 14.034210205078125, "rewards/rejected": -19.286270141601562, "step": 1180 }, { "epoch": 0.54, "learning_rate": 4.5502958579881655e-07, "logits/chosen": -2.0906567573547363, "logits/rejected": -1.9362144470214844, "logps/chosen": -136.1277618408203, "logps/rejected": -231.7950897216797, "loss": 0.0395, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.7756996154785156, "rewards/margins": 12.124837875366211, "rewards/rejected": -15.900537490844727, "step": 1190 }, { "epoch": 0.55, "learning_rate": 4.5418427726120036e-07, "logits/chosen": -2.1612088680267334, "logits/rejected": -2.0685601234436035, "logps/chosen": -118.42198181152344, "logps/rejected": -212.84268188476562, "loss": 0.0339, "rewards/accuracies": 1.0, "rewards/chosen": -2.731152296066284, "rewards/margins": 11.037375450134277, "rewards/rejected": -13.768527030944824, "step": 1200 }, { "epoch": 0.55, "eval_logits/chosen": -2.1124844551086426, "eval_logits/rejected": -2.000330686569214, "eval_logps/chosen": -118.87854766845703, "eval_logps/rejected": -190.10202026367188, "eval_loss": 0.031096385791897774, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -2.764134407043457, "eval_rewards/margins": 9.23560619354248, "eval_rewards/rejected": -11.999740600585938, "eval_runtime": 48.5808, "eval_samples_per_second": 58.912, "eval_steps_per_second": 1.853, "step": 1200 }, { "epoch": 0.55, "learning_rate": 4.5333896872358406e-07, "logits/chosen": -2.116041898727417, "logits/rejected": -1.9751415252685547, "logps/chosen": -126.99690246582031, "logps/rejected": -211.7047576904297, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": -2.922422409057617, "rewards/margins": 10.941381454467773, "rewards/rejected": -13.863802909851074, "step": 1210 }, { "epoch": 0.56, "learning_rate": 4.5249366018596787e-07, "logits/chosen": -2.0528061389923096, "logits/rejected": -1.9215404987335205, "logps/chosen": -123.11860656738281, "logps/rejected": -220.2886199951172, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -3.4065489768981934, "rewards/margins": 11.671735763549805, "rewards/rejected": -15.078285217285156, "step": 1220 }, { "epoch": 0.56, "learning_rate": 4.5164835164835163e-07, "logits/chosen": -2.0894837379455566, "logits/rejected": -1.946913480758667, "logps/chosen": -125.61395263671875, "logps/rejected": -206.34445190429688, "loss": 0.0319, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.333749771118164, "rewards/margins": 10.101171493530273, "rewards/rejected": -13.434921264648438, "step": 1230 }, { "epoch": 0.57, "learning_rate": 4.5080304311073544e-07, "logits/chosen": -1.9018142223358154, "logits/rejected": -1.7255821228027344, "logps/chosen": -130.88623046875, "logps/rejected": -230.283935546875, "loss": 0.033, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.72715425491333, "rewards/margins": 12.118368148803711, "rewards/rejected": -15.845524787902832, "step": 1240 }, { "epoch": 0.57, "learning_rate": 4.4995773457311914e-07, "logits/chosen": -1.8912999629974365, "logits/rejected": -1.6973804235458374, "logps/chosen": -140.52346801757812, "logps/rejected": -240.0919189453125, "loss": 0.0254, "rewards/accuracies": 1.0, "rewards/chosen": -4.971592903137207, "rewards/margins": 11.932929992675781, "rewards/rejected": -16.904523849487305, "step": 1250 }, { "epoch": 0.58, "learning_rate": 4.491124260355029e-07, "logits/chosen": -2.0328290462493896, "logits/rejected": -1.9123872518539429, "logps/chosen": -135.49826049804688, "logps/rejected": -225.2454833984375, "loss": 0.0315, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.119205951690674, "rewards/margins": 11.104761123657227, "rewards/rejected": -15.223965644836426, "step": 1260 }, { "epoch": 0.58, "learning_rate": 4.482671174978867e-07, "logits/chosen": -2.009664297103882, "logits/rejected": -1.873490333557129, "logps/chosen": -130.07424926757812, "logps/rejected": -234.64404296875, "loss": 0.0303, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6427292823791504, "rewards/margins": 12.3717041015625, "rewards/rejected": -16.01443099975586, "step": 1270 }, { "epoch": 0.58, "learning_rate": 4.4742180896027047e-07, "logits/chosen": -1.9943698644638062, "logits/rejected": -1.8417961597442627, "logps/chosen": -128.2240447998047, "logps/rejected": -208.1425018310547, "loss": 0.0423, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.6072349548339844, "rewards/margins": 10.532011985778809, "rewards/rejected": -14.139246940612793, "step": 1280 }, { "epoch": 0.59, "learning_rate": 4.465765004226543e-07, "logits/chosen": -2.0972723960876465, "logits/rejected": -1.9810869693756104, "logps/chosen": -124.97412109375, "logps/rejected": -192.53623962402344, "loss": 0.0272, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.600572109222412, "rewards/margins": 8.653288841247559, "rewards/rejected": -12.253861427307129, "step": 1290 }, { "epoch": 0.59, "learning_rate": 4.45731191885038e-07, "logits/chosen": -2.0243687629699707, "logits/rejected": -1.8825792074203491, "logps/chosen": -124.19642639160156, "logps/rejected": -210.21359252929688, "loss": 0.0489, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.705894708633423, "rewards/margins": 10.930899620056152, "rewards/rejected": -13.636795043945312, "step": 1300 }, { "epoch": 0.59, "eval_logits/chosen": -2.2572124004364014, "eval_logits/rejected": -2.173858880996704, "eval_logps/chosen": -119.61163330078125, "eval_logps/rejected": -194.4359130859375, "eval_loss": 0.0271957665681839, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -2.837442398071289, "eval_rewards/margins": 9.59568977355957, "eval_rewards/rejected": -12.43313217163086, "eval_runtime": 48.8158, "eval_samples_per_second": 58.629, "eval_steps_per_second": 1.844, "step": 1300 }, { "epoch": 0.6, "learning_rate": 4.448858833474218e-07, "logits/chosen": -2.195492744445801, "logits/rejected": -2.124648094177246, "logps/chosen": -123.89532470703125, "logps/rejected": -201.10519409179688, "loss": 0.031, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.475634813308716, "rewards/margins": 9.291130065917969, "rewards/rejected": -12.766763687133789, "step": 1310 }, { "epoch": 0.6, "learning_rate": 4.4404057480980555e-07, "logits/chosen": -2.1314010620117188, "logits/rejected": -2.0682311058044434, "logps/chosen": -120.6261215209961, "logps/rejected": -214.7279815673828, "loss": 0.0371, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -3.261993885040283, "rewards/margins": 10.915969848632812, "rewards/rejected": -14.177961349487305, "step": 1320 }, { "epoch": 0.61, "learning_rate": 4.4319526627218936e-07, "logits/chosen": -2.127857208251953, "logits/rejected": -2.0334818363189697, "logps/chosen": -123.88873291015625, "logps/rejected": -227.59896850585938, "loss": 0.0286, "rewards/accuracies": 1.0, "rewards/chosen": -3.45524525642395, "rewards/margins": 12.025127410888672, "rewards/rejected": -15.480372428894043, "step": 1330 }, { "epoch": 0.61, "learning_rate": 4.423499577345731e-07, "logits/chosen": -2.1861257553100586, "logits/rejected": -2.081003427505493, "logps/chosen": -119.9664535522461, "logps/rejected": -222.65853881835938, "loss": 0.0317, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.040316343307495, "rewards/margins": 12.255244255065918, "rewards/rejected": -15.295560836791992, "step": 1340 }, { "epoch": 0.62, "learning_rate": 4.4150464919695687e-07, "logits/chosen": -2.072154998779297, "logits/rejected": -1.9294564723968506, "logps/chosen": -128.0748748779297, "logps/rejected": -225.40939331054688, "loss": 0.0233, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.229937553405762, "rewards/margins": 11.25472354888916, "rewards/rejected": -15.484660148620605, "step": 1350 }, { "epoch": 0.62, "learning_rate": 4.4065934065934063e-07, "logits/chosen": -2.095121383666992, "logits/rejected": -1.9508041143417358, "logps/chosen": -139.701904296875, "logps/rejected": -224.1685791015625, "loss": 0.0244, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.606893062591553, "rewards/margins": 10.666409492492676, "rewards/rejected": -15.273303031921387, "step": 1360 }, { "epoch": 0.63, "learning_rate": 4.398140321217244e-07, "logits/chosen": -2.2028756141662598, "logits/rejected": -2.0891079902648926, "logps/chosen": -124.32647705078125, "logps/rejected": -217.9824981689453, "loss": 0.0314, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.51623797416687, "rewards/margins": 11.21560287475586, "rewards/rejected": -14.731842041015625, "step": 1370 }, { "epoch": 0.63, "learning_rate": 4.389687235841082e-07, "logits/chosen": -2.18900990486145, "logits/rejected": -2.0879733562469482, "logps/chosen": -128.83123779296875, "logps/rejected": -219.22506713867188, "loss": 0.0356, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.9812655448913574, "rewards/margins": 10.82696533203125, "rewards/rejected": -14.80823040008545, "step": 1380 }, { "epoch": 0.63, "learning_rate": 4.3812341504649195e-07, "logits/chosen": -2.2501654624938965, "logits/rejected": -2.13492488861084, "logps/chosen": -117.8704833984375, "logps/rejected": -204.0887451171875, "loss": 0.0496, "rewards/accuracies": 1.0, "rewards/chosen": -2.5138015747070312, "rewards/margins": 10.41077995300293, "rewards/rejected": -12.924581527709961, "step": 1390 }, { "epoch": 0.64, "learning_rate": 4.372781065088757e-07, "logits/chosen": -2.152750253677368, "logits/rejected": -2.0153543949127197, "logps/chosen": -136.95753479003906, "logps/rejected": -223.27490234375, "loss": 0.0263, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.8503575325012207, "rewards/margins": 11.449549674987793, "rewards/rejected": -15.299906730651855, "step": 1400 }, { "epoch": 0.64, "eval_logits/chosen": -2.2102935314178467, "eval_logits/rejected": -2.081444263458252, "eval_logps/chosen": -126.17691802978516, "eval_logps/rejected": -202.63734436035156, "eval_loss": 0.029057901352643967, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -3.4939706325531006, "eval_rewards/margins": 9.75930404663086, "eval_rewards/rejected": -13.253273963928223, "eval_runtime": 49.2397, "eval_samples_per_second": 58.124, "eval_steps_per_second": 1.828, "step": 1400 }, { "epoch": 0.64, "learning_rate": 4.3643279797125947e-07, "logits/chosen": -2.223374128341675, "logits/rejected": -2.125884771347046, "logps/chosen": -130.22470092773438, "logps/rejected": -208.01644897460938, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -3.154832124710083, "rewards/margins": 10.32645320892334, "rewards/rejected": -13.481285095214844, "step": 1410 }, { "epoch": 0.65, "learning_rate": 4.355874894336433e-07, "logits/chosen": -2.1675026416778564, "logits/rejected": -2.0408825874328613, "logps/chosen": -129.0258331298828, "logps/rejected": -226.79629516601562, "loss": 0.0287, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7398815155029297, "rewards/margins": 11.839478492736816, "rewards/rejected": -15.579358100891113, "step": 1420 }, { "epoch": 0.65, "learning_rate": 4.3474218089602703e-07, "logits/chosen": -2.213987350463867, "logits/rejected": -2.105513572692871, "logps/chosen": -130.8446807861328, "logps/rejected": -215.7467041015625, "loss": 0.04, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.016310214996338, "rewards/margins": 10.332174301147461, "rewards/rejected": -14.348484992980957, "step": 1430 }, { "epoch": 0.66, "learning_rate": 4.3389687235841084e-07, "logits/chosen": -2.158055067062378, "logits/rejected": -2.044100284576416, "logps/chosen": -128.62557983398438, "logps/rejected": -219.5900115966797, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": -3.4990944862365723, "rewards/margins": 11.490110397338867, "rewards/rejected": -14.989204406738281, "step": 1440 }, { "epoch": 0.66, "learning_rate": 4.3305156382079455e-07, "logits/chosen": -2.1493687629699707, "logits/rejected": -2.0363144874572754, "logps/chosen": -134.43417358398438, "logps/rejected": -224.71237182617188, "loss": 0.0338, "rewards/accuracies": 1.0, "rewards/chosen": -3.8965301513671875, "rewards/margins": 11.341170310974121, "rewards/rejected": -15.237699508666992, "step": 1450 }, { "epoch": 0.67, "learning_rate": 4.3220625528317836e-07, "logits/chosen": -2.225033760070801, "logits/rejected": -2.1324281692504883, "logps/chosen": -142.65785217285156, "logps/rejected": -234.6016387939453, "loss": 0.0233, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.940446376800537, "rewards/margins": 11.09235954284668, "rewards/rejected": -16.03280258178711, "step": 1460 }, { "epoch": 0.67, "learning_rate": 4.313609467455621e-07, "logits/chosen": -2.2476277351379395, "logits/rejected": -2.195188045501709, "logps/chosen": -134.739990234375, "logps/rejected": -223.62112426757812, "loss": 0.0327, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.509375095367432, "rewards/margins": 11.092538833618164, "rewards/rejected": -15.601913452148438, "step": 1470 }, { "epoch": 0.68, "learning_rate": 4.3051563820794587e-07, "logits/chosen": -2.212526798248291, "logits/rejected": -2.1159298419952393, "logps/chosen": -132.11997985839844, "logps/rejected": -234.01931762695312, "loss": 0.0337, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.121611595153809, "rewards/margins": 12.537824630737305, "rewards/rejected": -16.659435272216797, "step": 1480 }, { "epoch": 0.68, "learning_rate": 4.2967032967032963e-07, "logits/chosen": -2.276444911956787, "logits/rejected": -2.1930670738220215, "logps/chosen": -121.98631286621094, "logps/rejected": -213.0667724609375, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/chosen": -2.8731472492218018, "rewards/margins": 11.492844581604004, "rewards/rejected": -14.365991592407227, "step": 1490 }, { "epoch": 0.68, "learning_rate": 4.288250211327134e-07, "logits/chosen": -2.245380163192749, "logits/rejected": -2.15441632270813, "logps/chosen": -124.43641662597656, "logps/rejected": -232.03225708007812, "loss": 0.0301, "rewards/accuracies": 1.0, "rewards/chosen": -3.0712151527404785, "rewards/margins": 12.98668098449707, "rewards/rejected": -16.05789566040039, "step": 1500 }, { "epoch": 0.68, "eval_logits/chosen": -2.1682088375091553, "eval_logits/rejected": -2.058032751083374, "eval_logps/chosen": -138.2372283935547, "eval_logps/rejected": -232.24818420410156, "eval_loss": 0.026560302823781967, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -4.700002670288086, "eval_rewards/margins": 11.514355659484863, "eval_rewards/rejected": -16.2143611907959, "eval_runtime": 48.5449, "eval_samples_per_second": 58.956, "eval_steps_per_second": 1.854, "step": 1500 }, { "epoch": 0.69, "learning_rate": 4.279797125950972e-07, "logits/chosen": -2.2608067989349365, "logits/rejected": -2.193211555480957, "logps/chosen": -124.4505386352539, "logps/rejected": -219.6403350830078, "loss": 0.0305, "rewards/accuracies": 1.0, "rewards/chosen": -3.85394549369812, "rewards/margins": 11.083495140075684, "rewards/rejected": -14.937440872192383, "step": 1510 }, { "epoch": 0.69, "learning_rate": 4.2713440405748095e-07, "logits/chosen": -2.330735206604004, "logits/rejected": -2.2386350631713867, "logps/chosen": -121.85264587402344, "logps/rejected": -214.347412109375, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -2.3249454498291016, "rewards/margins": 11.747182846069336, "rewards/rejected": -14.07213020324707, "step": 1520 }, { "epoch": 0.7, "learning_rate": 4.2628909551986476e-07, "logits/chosen": -2.280710458755493, "logits/rejected": -2.2095468044281006, "logps/chosen": -127.70649719238281, "logps/rejected": -224.08901977539062, "loss": 0.0268, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.8593363761901855, "rewards/margins": 11.323004722595215, "rewards/rejected": -15.182344436645508, "step": 1530 }, { "epoch": 0.7, "learning_rate": 4.2544378698224847e-07, "logits/chosen": -2.1825404167175293, "logits/rejected": -2.1189706325531006, "logps/chosen": -132.66131591796875, "logps/rejected": -222.04931640625, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -4.108658790588379, "rewards/margins": 10.970823287963867, "rewards/rejected": -15.07948112487793, "step": 1540 }, { "epoch": 0.71, "learning_rate": 4.245984784446323e-07, "logits/chosen": -2.1728508472442627, "logits/rejected": -2.0841972827911377, "logps/chosen": -140.6467742919922, "logps/rejected": -246.8183135986328, "loss": 0.0284, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.1096906661987305, "rewards/margins": 12.453695297241211, "rewards/rejected": -17.563385009765625, "step": 1550 }, { "epoch": 0.71, "learning_rate": 4.2375316990701604e-07, "logits/chosen": -2.0678842067718506, "logits/rejected": -1.8705580234527588, "logps/chosen": -127.19903564453125, "logps/rejected": -243.9872589111328, "loss": 0.037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.363131523132324, "rewards/margins": 13.238340377807617, "rewards/rejected": -17.60147476196289, "step": 1560 }, { "epoch": 0.72, "learning_rate": 4.2290786136939985e-07, "logits/chosen": -2.123220205307007, "logits/rejected": -1.9888830184936523, "logps/chosen": -130.83587646484375, "logps/rejected": -227.7192840576172, "loss": 0.0253, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.2909469604492188, "rewards/margins": 12.203306198120117, "rewards/rejected": -15.494253158569336, "step": 1570 }, { "epoch": 0.72, "learning_rate": 4.220625528317836e-07, "logits/chosen": -2.1660406589508057, "logits/rejected": -2.0749552249908447, "logps/chosen": -129.2559356689453, "logps/rejected": -239.82839965820312, "loss": 0.0281, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.8316562175750732, "rewards/margins": 12.731201171875, "rewards/rejected": -16.562856674194336, "step": 1580 }, { "epoch": 0.73, "learning_rate": 4.212172442941673e-07, "logits/chosen": -2.0430922508239746, "logits/rejected": -1.9348751306533813, "logps/chosen": -156.99769592285156, "logps/rejected": -265.02130126953125, "loss": 0.0288, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -6.00015926361084, "rewards/margins": 12.735962867736816, "rewards/rejected": -18.736122131347656, "step": 1590 }, { "epoch": 0.73, "learning_rate": 4.203719357565511e-07, "logits/chosen": -2.006765604019165, "logits/rejected": -1.8920835256576538, "logps/chosen": -159.15304565429688, "logps/rejected": -254.2825469970703, "loss": 0.0272, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.988160610198975, "rewards/margins": 12.050902366638184, "rewards/rejected": -18.0390625, "step": 1600 }, { "epoch": 0.73, "eval_logits/chosen": -1.9654126167297363, "eval_logits/rejected": -1.8091415166854858, "eval_logps/chosen": -152.25767517089844, "eval_logps/rejected": -251.39105224609375, "eval_loss": 0.028284309431910515, "eval_rewards/accuracies": 0.9666666388511658, "eval_rewards/chosen": -6.102046489715576, "eval_rewards/margins": 12.026601791381836, "eval_rewards/rejected": -18.128646850585938, "eval_runtime": 48.6434, "eval_samples_per_second": 58.836, "eval_steps_per_second": 1.85, "step": 1600 }, { "epoch": 0.73, "learning_rate": 4.195266272189349e-07, "logits/chosen": -1.9909400939941406, "logits/rejected": -1.8486502170562744, "logps/chosen": -139.93109130859375, "logps/rejected": -240.50790405273438, "loss": 0.0267, "rewards/accuracies": 1.0, "rewards/chosen": -4.681200981140137, "rewards/margins": 12.465145111083984, "rewards/rejected": -17.146345138549805, "step": 1610 }, { "epoch": 0.74, "learning_rate": 4.186813186813187e-07, "logits/chosen": -1.9522764682769775, "logits/rejected": -1.859828233718872, "logps/chosen": -135.3563995361328, "logps/rejected": -242.638916015625, "loss": 0.0324, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.077095985412598, "rewards/margins": 12.791072845458984, "rewards/rejected": -16.868167877197266, "step": 1620 }, { "epoch": 0.74, "learning_rate": 4.1783601014370244e-07, "logits/chosen": -2.0774741172790527, "logits/rejected": -1.9764686822891235, "logps/chosen": -127.30684661865234, "logps/rejected": -224.0484619140625, "loss": 0.028, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.535048723220825, "rewards/margins": 11.87446403503418, "rewards/rejected": -15.409512519836426, "step": 1630 }, { "epoch": 0.75, "learning_rate": 4.169907016060862e-07, "logits/chosen": -2.080463409423828, "logits/rejected": -1.9346845149993896, "logps/chosen": -139.84872436523438, "logps/rejected": -237.13134765625, "loss": 0.0275, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.80663800239563, "rewards/margins": 12.872383117675781, "rewards/rejected": -16.679019927978516, "step": 1640 }, { "epoch": 0.75, "learning_rate": 4.1614539306846996e-07, "logits/chosen": -2.105950355529785, "logits/rejected": -1.9851375818252563, "logps/chosen": -136.1564178466797, "logps/rejected": -218.47354125976562, "loss": 0.0251, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.8997530937194824, "rewards/margins": 10.47309398651123, "rewards/rejected": -14.372848510742188, "step": 1650 }, { "epoch": 0.76, "learning_rate": 4.1530008453085377e-07, "logits/chosen": -2.0644431114196777, "logits/rejected": -1.9539110660552979, "logps/chosen": -122.35018157958984, "logps/rejected": -213.6524200439453, "loss": 0.0352, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.2302792072296143, "rewards/margins": 11.085116386413574, "rewards/rejected": -14.315394401550293, "step": 1660 }, { "epoch": 0.76, "learning_rate": 4.144547759932375e-07, "logits/chosen": -2.0494117736816406, "logits/rejected": -1.943672776222229, "logps/chosen": -128.10084533691406, "logps/rejected": -206.52450561523438, "loss": 0.0257, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.70310640335083, "rewards/margins": 10.135705947875977, "rewards/rejected": -13.838810920715332, "step": 1670 }, { "epoch": 0.77, "learning_rate": 4.1360946745562133e-07, "logits/chosen": -2.0471606254577637, "logits/rejected": -1.9206764698028564, "logps/chosen": -134.4086456298828, "logps/rejected": -238.36083984375, "loss": 0.0248, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.129271984100342, "rewards/margins": 12.306023597717285, "rewards/rejected": -16.435293197631836, "step": 1680 }, { "epoch": 0.77, "learning_rate": 4.1276415891800504e-07, "logits/chosen": -2.1813762187957764, "logits/rejected": -2.0932717323303223, "logps/chosen": -117.71382904052734, "logps/rejected": -211.7388153076172, "loss": 0.0204, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.775641918182373, "rewards/margins": 11.458349227905273, "rewards/rejected": -14.233988761901855, "step": 1690 }, { "epoch": 0.78, "learning_rate": 4.119188503803888e-07, "logits/chosen": -2.187344789505005, "logits/rejected": -2.0936601161956787, "logps/chosen": -127.6551513671875, "logps/rejected": -225.92593383789062, "loss": 0.0278, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.2443504333496094, "rewards/margins": 11.978067398071289, "rewards/rejected": -15.222417831420898, "step": 1700 }, { "epoch": 0.78, "eval_logits/chosen": -2.244328260421753, "eval_logits/rejected": -2.165691375732422, "eval_logps/chosen": -121.15006256103516, "eval_logps/rejected": -200.86061096191406, "eval_loss": 0.02541803941130638, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -2.991286277770996, "eval_rewards/margins": 10.084315299987793, "eval_rewards/rejected": -13.075600624084473, "eval_runtime": 48.58, "eval_samples_per_second": 58.913, "eval_steps_per_second": 1.853, "step": 1700 }, { "epoch": 0.78, "learning_rate": 4.110735418427726e-07, "logits/chosen": -2.2411341667175293, "logits/rejected": -2.151401996612549, "logps/chosen": -121.0505142211914, "logps/rejected": -218.67831420898438, "loss": 0.0161, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.7752506732940674, "rewards/margins": 12.061071395874023, "rewards/rejected": -14.836321830749512, "step": 1710 }, { "epoch": 0.79, "learning_rate": 4.1022823330515636e-07, "logits/chosen": -2.269857883453369, "logits/rejected": -2.196040153503418, "logps/chosen": -112.02171325683594, "logps/rejected": -191.1605224609375, "loss": 0.0237, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.076202869415283, "rewards/margins": 10.068809509277344, "rewards/rejected": -12.145011901855469, "step": 1720 }, { "epoch": 0.79, "learning_rate": 4.0938292476754017e-07, "logits/chosen": -2.2377099990844727, "logits/rejected": -2.1565918922424316, "logps/chosen": -110.47419738769531, "logps/rejected": -200.7766571044922, "loss": 0.0311, "rewards/accuracies": 1.0, "rewards/chosen": -2.1801040172576904, "rewards/margins": 10.705169677734375, "rewards/rejected": -12.885273933410645, "step": 1730 }, { "epoch": 0.79, "learning_rate": 4.085376162299239e-07, "logits/chosen": -2.2225890159606934, "logits/rejected": -2.1323955059051514, "logps/chosen": -120.49271392822266, "logps/rejected": -214.49478149414062, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/chosen": -2.7662570476531982, "rewards/margins": 11.611419677734375, "rewards/rejected": -14.377676010131836, "step": 1740 }, { "epoch": 0.8, "learning_rate": 4.076923076923077e-07, "logits/chosen": -2.264892101287842, "logits/rejected": -2.1854147911071777, "logps/chosen": -111.61662292480469, "logps/rejected": -202.98321533203125, "loss": 0.0303, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.9836992025375366, "rewards/margins": 11.16712474822998, "rewards/rejected": -13.150823593139648, "step": 1750 }, { "epoch": 0.8, "learning_rate": 4.0684699915469144e-07, "logits/chosen": -2.3072378635406494, "logits/rejected": -2.242619037628174, "logps/chosen": -129.24737548828125, "logps/rejected": -198.75900268554688, "loss": 0.0347, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.685056209564209, "rewards/margins": 8.488093376159668, "rewards/rejected": -12.173150062561035, "step": 1760 }, { "epoch": 0.81, "learning_rate": 4.0600169061707525e-07, "logits/chosen": -2.2348296642303467, "logits/rejected": -2.1900715827941895, "logps/chosen": -114.0647964477539, "logps/rejected": -217.90072631835938, "loss": 0.0193, "rewards/accuracies": 1.0, "rewards/chosen": -2.479679584503174, "rewards/margins": 12.287931442260742, "rewards/rejected": -14.767611503601074, "step": 1770 }, { "epoch": 0.81, "learning_rate": 4.0515638207945896e-07, "logits/chosen": -2.238379716873169, "logits/rejected": -2.193352699279785, "logps/chosen": -128.66403198242188, "logps/rejected": -215.4800262451172, "loss": 0.0408, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.537277936935425, "rewards/margins": 10.639458656311035, "rewards/rejected": -14.176734924316406, "step": 1780 }, { "epoch": 0.82, "learning_rate": 4.043110735418427e-07, "logits/chosen": -2.1671791076660156, "logits/rejected": -2.1061270236968994, "logps/chosen": -123.48826599121094, "logps/rejected": -213.7099151611328, "loss": 0.019, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.847043514251709, "rewards/margins": 11.608076095581055, "rewards/rejected": -14.455119132995605, "step": 1790 }, { "epoch": 0.82, "learning_rate": 4.034657650042265e-07, "logits/chosen": -2.0503573417663574, "logits/rejected": -1.9893563985824585, "logps/chosen": -140.46481323242188, "logps/rejected": -256.7433166503906, "loss": 0.0291, "rewards/accuracies": 1.0, "rewards/chosen": -4.495582580566406, "rewards/margins": 13.987375259399414, "rewards/rejected": -18.48295783996582, "step": 1800 }, { "epoch": 0.82, "eval_logits/chosen": -2.036374807357788, "eval_logits/rejected": -1.9426956176757812, "eval_logps/chosen": -140.2257843017578, "eval_logps/rejected": -234.42259216308594, "eval_loss": 0.025952113792300224, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/chosen": -4.898858070373535, "eval_rewards/margins": 11.532942771911621, "eval_rewards/rejected": -16.43180274963379, "eval_runtime": 49.2187, "eval_samples_per_second": 58.149, "eval_steps_per_second": 1.829, "step": 1800 }, { "epoch": 0.83, "learning_rate": 4.026204564666103e-07, "logits/chosen": -1.964010238647461, "logits/rejected": -1.896442174911499, "logps/chosen": -146.27426147460938, "logps/rejected": -250.4436798095703, "loss": 0.0145, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.305529594421387, "rewards/margins": 12.814935684204102, "rewards/rejected": -18.120464324951172, "step": 1810 }, { "epoch": 0.83, "learning_rate": 4.017751479289941e-07, "logits/chosen": -2.0868825912475586, "logits/rejected": -1.9830644130706787, "logps/chosen": -137.53465270996094, "logps/rejected": -241.1505126953125, "loss": 0.0254, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.553969383239746, "rewards/margins": 12.338434219360352, "rewards/rejected": -16.892404556274414, "step": 1820 }, { "epoch": 0.84, "learning_rate": 4.009298393913778e-07, "logits/chosen": -2.0947585105895996, "logits/rejected": -2.0051965713500977, "logps/chosen": -123.03267669677734, "logps/rejected": -234.7122344970703, "loss": 0.0212, "rewards/accuracies": 1.0, "rewards/chosen": -3.8716492652893066, "rewards/margins": 12.589926719665527, "rewards/rejected": -16.46157455444336, "step": 1830 }, { "epoch": 0.84, "learning_rate": 4.000845308537616e-07, "logits/chosen": -2.092618465423584, "logits/rejected": -1.9781370162963867, "logps/chosen": -134.5900421142578, "logps/rejected": -260.53485107421875, "loss": 0.0173, "rewards/accuracies": 1.0, "rewards/chosen": -4.325318336486816, "rewards/margins": 13.917451858520508, "rewards/rejected": -18.242769241333008, "step": 1840 }, { "epoch": 0.84, "learning_rate": 3.9923922231614536e-07, "logits/chosen": -2.052762508392334, "logits/rejected": -1.9207212924957275, "logps/chosen": -128.0731658935547, "logps/rejected": -237.6293487548828, "loss": 0.0192, "rewards/accuracies": 1.0, "rewards/chosen": -4.202644348144531, "rewards/margins": 12.438554763793945, "rewards/rejected": -16.641199111938477, "step": 1850 }, { "epoch": 0.85, "learning_rate": 3.9839391377852917e-07, "logits/chosen": -2.051992416381836, "logits/rejected": -1.9083547592163086, "logps/chosen": -139.25662231445312, "logps/rejected": -266.41278076171875, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -4.40474271774292, "rewards/margins": 14.871357917785645, "rewards/rejected": -19.276100158691406, "step": 1860 }, { "epoch": 0.85, "learning_rate": 3.9754860524091293e-07, "logits/chosen": -1.98797607421875, "logits/rejected": -1.8896089792251587, "logps/chosen": -143.28067016601562, "logps/rejected": -239.6479949951172, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/chosen": -4.469711780548096, "rewards/margins": 12.147082328796387, "rewards/rejected": -16.61679458618164, "step": 1870 }, { "epoch": 0.86, "learning_rate": 3.967032967032967e-07, "logits/chosen": -2.1141982078552246, "logits/rejected": -2.009969711303711, "logps/chosen": -125.37733459472656, "logps/rejected": -214.2640380859375, "loss": 0.0455, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7521488666534424, "rewards/margins": 10.471065521240234, "rewards/rejected": -14.223215103149414, "step": 1880 }, { "epoch": 0.86, "learning_rate": 3.9585798816568044e-07, "logits/chosen": -2.096876621246338, "logits/rejected": -2.0146164894104004, "logps/chosen": -142.41055297851562, "logps/rejected": -233.24954223632812, "loss": 0.0222, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.191798686981201, "rewards/margins": 11.954326629638672, "rewards/rejected": -16.14612579345703, "step": 1890 }, { "epoch": 0.87, "learning_rate": 3.950126796280642e-07, "logits/chosen": -2.0792036056518555, "logits/rejected": -1.9863141775131226, "logps/chosen": -134.00045776367188, "logps/rejected": -232.57553100585938, "loss": 0.0253, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6598198413848877, "rewards/margins": 11.928800582885742, "rewards/rejected": -15.588618278503418, "step": 1900 }, { "epoch": 0.87, "eval_logits/chosen": -2.0360984802246094, "eval_logits/rejected": -1.9098594188690186, "eval_logps/chosen": -131.64044189453125, "eval_logps/rejected": -223.32122802734375, "eval_loss": 0.02517438866198063, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -4.0403242111206055, "eval_rewards/margins": 11.28133773803711, "eval_rewards/rejected": -15.321663856506348, "eval_runtime": 49.6332, "eval_samples_per_second": 57.663, "eval_steps_per_second": 1.813, "step": 1900 }, { "epoch": 0.87, "learning_rate": 3.94167371090448e-07, "logits/chosen": -2.0065677165985107, "logits/rejected": -1.8861503601074219, "logps/chosen": -125.51326751708984, "logps/rejected": -241.58425903320312, "loss": 0.0167, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.468381881713867, "rewards/margins": 13.503756523132324, "rewards/rejected": -16.972139358520508, "step": 1910 }, { "epoch": 0.88, "learning_rate": 3.9332206255283177e-07, "logits/chosen": -2.0069472789764404, "logits/rejected": -1.880659818649292, "logps/chosen": -126.3309555053711, "logps/rejected": -214.9152069091797, "loss": 0.0505, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.8646080493927, "rewards/margins": 10.696456909179688, "rewards/rejected": -14.561065673828125, "step": 1920 }, { "epoch": 0.88, "learning_rate": 3.924767540152155e-07, "logits/chosen": -2.0810904502868652, "logits/rejected": -1.9871727228164673, "logps/chosen": -123.64485931396484, "logps/rejected": -200.4116973876953, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/chosen": -2.4952170848846436, "rewards/margins": 10.431968688964844, "rewards/rejected": -12.92718505859375, "step": 1930 }, { "epoch": 0.89, "learning_rate": 3.916314454775993e-07, "logits/chosen": -1.965213418006897, "logits/rejected": -1.8638643026351929, "logps/chosen": -123.869384765625, "logps/rejected": -213.0116424560547, "loss": 0.026, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.5767924785614014, "rewards/margins": 11.060912132263184, "rewards/rejected": -14.63770580291748, "step": 1940 }, { "epoch": 0.89, "learning_rate": 3.907861369399831e-07, "logits/chosen": -2.027801036834717, "logits/rejected": -1.8885034322738647, "logps/chosen": -115.36368560791016, "logps/rejected": -218.797607421875, "loss": 0.0186, "rewards/accuracies": 1.0, "rewards/chosen": -2.2753968238830566, "rewards/margins": 12.602838516235352, "rewards/rejected": -14.87823486328125, "step": 1950 }, { "epoch": 0.89, "learning_rate": 3.8994082840236685e-07, "logits/chosen": -2.0216193199157715, "logits/rejected": -1.917438268661499, "logps/chosen": -136.36891174316406, "logps/rejected": -224.12875366210938, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -4.036121368408203, "rewards/margins": 11.403396606445312, "rewards/rejected": -15.4395170211792, "step": 1960 }, { "epoch": 0.9, "learning_rate": 3.8909551986475066e-07, "logits/chosen": -2.039957284927368, "logits/rejected": -1.9109203815460205, "logps/chosen": -123.08699798583984, "logps/rejected": -234.58975219726562, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -2.9215152263641357, "rewards/margins": 13.935195922851562, "rewards/rejected": -16.85671043395996, "step": 1970 }, { "epoch": 0.9, "learning_rate": 3.8825021132713436e-07, "logits/chosen": -2.0436453819274902, "logits/rejected": -1.9319490194320679, "logps/chosen": -127.397216796875, "logps/rejected": -237.54476928710938, "loss": 0.0271, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.8864715099334717, "rewards/margins": 12.645808219909668, "rewards/rejected": -16.53228187561035, "step": 1980 }, { "epoch": 0.91, "learning_rate": 3.874049027895182e-07, "logits/chosen": -2.097534656524658, "logits/rejected": -1.9975534677505493, "logps/chosen": -117.48333740234375, "logps/rejected": -218.18301391601562, "loss": 0.0189, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.8377158641815186, "rewards/margins": 11.74644660949707, "rewards/rejected": -14.584162712097168, "step": 1990 }, { "epoch": 0.91, "learning_rate": 3.8655959425190193e-07, "logits/chosen": -2.1223201751708984, "logits/rejected": -2.0484108924865723, "logps/chosen": -127.0828857421875, "logps/rejected": -223.9336395263672, "loss": 0.0235, "rewards/accuracies": 1.0, "rewards/chosen": -3.3377633094787598, "rewards/margins": 11.691206932067871, "rewards/rejected": -15.028970718383789, "step": 2000 }, { "epoch": 0.91, "eval_logits/chosen": -2.157291889190674, "eval_logits/rejected": -2.044750213623047, "eval_logps/chosen": -123.21009826660156, "eval_logps/rejected": -218.69635009765625, "eval_loss": 0.022263653576374054, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -3.1972897052764893, "eval_rewards/margins": 11.661887168884277, "eval_rewards/rejected": -14.859176635742188, "eval_runtime": 49.2513, "eval_samples_per_second": 58.11, "eval_steps_per_second": 1.827, "step": 2000 }, { "epoch": 0.92, "learning_rate": 3.857142857142857e-07, "logits/chosen": -2.145784378051758, "logits/rejected": -2.039353132247925, "logps/chosen": -118.86814880371094, "logps/rejected": -231.6697540283203, "loss": 0.0229, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.6437346935272217, "rewards/margins": 13.244104385375977, "rewards/rejected": -15.887837409973145, "step": 2010 }, { "epoch": 0.92, "learning_rate": 3.8486897717666945e-07, "logits/chosen": -2.0739400386810303, "logits/rejected": -1.9888814687728882, "logps/chosen": -115.21482849121094, "logps/rejected": -220.90774536132812, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -2.819551706314087, "rewards/margins": 11.673441886901855, "rewards/rejected": -14.49299144744873, "step": 2020 }, { "epoch": 0.93, "learning_rate": 3.840236686390532e-07, "logits/chosen": -2.0994973182678223, "logits/rejected": -2.0037684440612793, "logps/chosen": -115.40962219238281, "logps/rejected": -210.97677612304688, "loss": 0.028, "rewards/accuracies": 1.0, "rewards/chosen": -2.4768853187561035, "rewards/margins": 12.01233959197998, "rewards/rejected": -14.489225387573242, "step": 2030 }, { "epoch": 0.93, "learning_rate": 3.83178360101437e-07, "logits/chosen": -2.139526844024658, "logits/rejected": -2.0501785278320312, "logps/chosen": -122.64337158203125, "logps/rejected": -215.5990447998047, "loss": 0.0437, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.588408946990967, "rewards/margins": 11.165366172790527, "rewards/rejected": -14.753776550292969, "step": 2040 }, { "epoch": 0.94, "learning_rate": 3.8233305156382077e-07, "logits/chosen": -2.2497003078460693, "logits/rejected": -2.1790757179260254, "logps/chosen": -126.99018859863281, "logps/rejected": -205.12118530273438, "loss": 0.0373, "rewards/accuracies": 1.0, "rewards/chosen": -2.9164180755615234, "rewards/margins": 10.327306747436523, "rewards/rejected": -13.243725776672363, "step": 2050 }, { "epoch": 0.94, "learning_rate": 3.814877430262046e-07, "logits/chosen": -2.2753875255584717, "logits/rejected": -2.2370798587799072, "logps/chosen": -117.94771575927734, "logps/rejected": -200.80496215820312, "loss": 0.0271, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.391221523284912, "rewards/margins": 10.8762788772583, "rewards/rejected": -13.267499923706055, "step": 2060 }, { "epoch": 0.94, "learning_rate": 3.806424344885883e-07, "logits/chosen": -2.2422733306884766, "logits/rejected": -2.1572372913360596, "logps/chosen": -113.2877426147461, "logps/rejected": -220.81710815429688, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/chosen": -2.4801509380340576, "rewards/margins": 12.62053394317627, "rewards/rejected": -15.100683212280273, "step": 2070 }, { "epoch": 0.95, "learning_rate": 3.797971259509721e-07, "logits/chosen": -2.186211109161377, "logits/rejected": -2.132835865020752, "logps/chosen": -116.13663482666016, "logps/rejected": -209.317626953125, "loss": 0.0248, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.663703441619873, "rewards/margins": 11.38145637512207, "rewards/rejected": -14.045160293579102, "step": 2080 }, { "epoch": 0.95, "learning_rate": 3.7895181741335585e-07, "logits/chosen": -2.203897476196289, "logits/rejected": -2.1628737449645996, "logps/chosen": -118.30888366699219, "logps/rejected": -203.86724853515625, "loss": 0.0264, "rewards/accuracies": 1.0, "rewards/chosen": -2.612675428390503, "rewards/margins": 10.86390495300293, "rewards/rejected": -13.476580619812012, "step": 2090 }, { "epoch": 0.96, "learning_rate": 3.7810650887573966e-07, "logits/chosen": -2.103102922439575, "logits/rejected": -2.016601800918579, "logps/chosen": -119.75636291503906, "logps/rejected": -212.2992401123047, "loss": 0.0272, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.717388868331909, "rewards/margins": 11.71783447265625, "rewards/rejected": -14.435221672058105, "step": 2100 }, { "epoch": 0.96, "eval_logits/chosen": -2.089155673980713, "eval_logits/rejected": -1.9834376573562622, "eval_logps/chosen": -127.06487274169922, "eval_logps/rejected": -223.385498046875, "eval_loss": 0.02362261526286602, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -3.5827677249908447, "eval_rewards/margins": 11.745320320129395, "eval_rewards/rejected": -15.328089714050293, "eval_runtime": 48.1236, "eval_samples_per_second": 59.472, "eval_steps_per_second": 1.87, "step": 2100 }, { "epoch": 0.96, "learning_rate": 3.772612003381234e-07, "logits/chosen": -2.150611162185669, "logits/rejected": -2.0834269523620605, "logps/chosen": -117.68843078613281, "logps/rejected": -215.09017944335938, "loss": 0.0252, "rewards/accuracies": 1.0, "rewards/chosen": -2.300182342529297, "rewards/margins": 12.045984268188477, "rewards/rejected": -14.346165657043457, "step": 2110 }, { "epoch": 0.97, "learning_rate": 3.764158918005071e-07, "logits/chosen": -2.176175832748413, "logits/rejected": -2.113129138946533, "logps/chosen": -121.611083984375, "logps/rejected": -220.4056854248047, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -2.795717477798462, "rewards/margins": 12.352693557739258, "rewards/rejected": -15.148412704467773, "step": 2120 }, { "epoch": 0.97, "learning_rate": 3.7557058326289093e-07, "logits/chosen": -2.181874990463257, "logits/rejected": -2.119330883026123, "logps/chosen": -128.18763732910156, "logps/rejected": -238.7275848388672, "loss": 0.0187, "rewards/accuracies": 1.0, "rewards/chosen": -3.2725563049316406, "rewards/margins": 13.210433959960938, "rewards/rejected": -16.482990264892578, "step": 2130 }, { "epoch": 0.98, "learning_rate": 3.747252747252747e-07, "logits/chosen": -2.2190940380096436, "logits/rejected": -2.1502795219421387, "logps/chosen": -118.45751953125, "logps/rejected": -222.6544189453125, "loss": 0.0196, "rewards/accuracies": 1.0, "rewards/chosen": -2.5364670753479004, "rewards/margins": 12.805526733398438, "rewards/rejected": -15.34199333190918, "step": 2140 }, { "epoch": 0.98, "learning_rate": 3.738799661876585e-07, "logits/chosen": -2.198096990585327, "logits/rejected": -2.1333017349243164, "logps/chosen": -120.4395751953125, "logps/rejected": -215.0843963623047, "loss": 0.0327, "rewards/accuracies": 1.0, "rewards/chosen": -3.054147243499756, "rewards/margins": 11.440935134887695, "rewards/rejected": -14.495083808898926, "step": 2150 }, { "epoch": 0.99, "learning_rate": 3.7303465765004226e-07, "logits/chosen": -2.176610231399536, "logits/rejected": -2.1423356533050537, "logps/chosen": -126.62446594238281, "logps/rejected": -223.6035614013672, "loss": 0.0245, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.833204984664917, "rewards/margins": 12.73131275177002, "rewards/rejected": -15.564518928527832, "step": 2160 }, { "epoch": 0.99, "learning_rate": 3.72189349112426e-07, "logits/chosen": -2.1511826515197754, "logits/rejected": -2.072523355484009, "logps/chosen": -133.12416076660156, "logps/rejected": -233.90939331054688, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -3.2800636291503906, "rewards/margins": 12.904279708862305, "rewards/rejected": -16.184341430664062, "step": 2170 }, { "epoch": 0.99, "learning_rate": 3.7134404057480977e-07, "logits/chosen": -2.2596356868743896, "logits/rejected": -2.1797854900360107, "logps/chosen": -109.3218994140625, "logps/rejected": -208.5428009033203, "loss": 0.0244, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.163811206817627, "rewards/margins": 11.893083572387695, "rewards/rejected": -14.056894302368164, "step": 2180 }, { "epoch": 1.0, "learning_rate": 3.704987320371936e-07, "logits/chosen": -2.264486789703369, "logits/rejected": -2.2104756832122803, "logps/chosen": -116.32193756103516, "logps/rejected": -205.1827392578125, "loss": 0.0121, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.8635953664779663, "rewards/margins": 11.354402542114258, "rewards/rejected": -13.217997550964355, "step": 2190 }, { "epoch": 1.0, "learning_rate": 3.6965342349957734e-07, "logits/chosen": -2.2398998737335205, "logits/rejected": -2.144261121749878, "logps/chosen": -117.9054183959961, "logps/rejected": -221.7399139404297, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.8189895153045654, "rewards/margins": 12.08039665222168, "rewards/rejected": -14.899385452270508, "step": 2200 }, { "epoch": 1.0, "eval_logits/chosen": -2.265789270401001, "eval_logits/rejected": -2.1606945991516113, "eval_logps/chosen": -117.26538848876953, "eval_logps/rejected": -213.8473358154297, "eval_loss": 0.020574109628796577, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -2.6028175354003906, "eval_rewards/margins": 11.771455764770508, "eval_rewards/rejected": -14.374273300170898, "eval_runtime": 48.9012, "eval_samples_per_second": 58.526, "eval_steps_per_second": 1.84, "step": 2200 }, { "epoch": 1.01, "learning_rate": 3.6880811496196115e-07, "logits/chosen": -2.2239885330200195, "logits/rejected": -2.099027156829834, "logps/chosen": -117.2328872680664, "logps/rejected": -240.2711944580078, "loss": 0.0122, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -2.659773588180542, "rewards/margins": 13.949972152709961, "rewards/rejected": -16.6097469329834, "step": 2210 }, { "epoch": 1.01, "learning_rate": 3.6796280642434485e-07, "logits/chosen": -2.224212169647217, "logits/rejected": -2.1168534755706787, "logps/chosen": -119.9342269897461, "logps/rejected": -238.63882446289062, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -2.521542549133301, "rewards/margins": 13.85914134979248, "rewards/rejected": -16.38068199157715, "step": 2220 }, { "epoch": 1.02, "learning_rate": 3.671174978867286e-07, "logits/chosen": -2.223118543624878, "logits/rejected": -2.092538356781006, "logps/chosen": -116.990234375, "logps/rejected": -240.6658477783203, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -2.179953098297119, "rewards/margins": 14.794723510742188, "rewards/rejected": -16.974674224853516, "step": 2230 }, { "epoch": 1.02, "learning_rate": 3.662721893491124e-07, "logits/chosen": -2.176121234893799, "logits/rejected": -2.0341637134552, "logps/chosen": -129.12535095214844, "logps/rejected": -250.8475341796875, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -3.3982722759246826, "rewards/margins": 14.736839294433594, "rewards/rejected": -18.135112762451172, "step": 2240 }, { "epoch": 1.03, "learning_rate": 3.654268808114962e-07, "logits/chosen": -2.2421722412109375, "logits/rejected": -2.151844024658203, "logps/chosen": -111.7896957397461, "logps/rejected": -223.0345458984375, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -2.0418715476989746, "rewards/margins": 13.331483840942383, "rewards/rejected": -15.373356819152832, "step": 2250 }, { "epoch": 1.03, "learning_rate": 3.6458157227387994e-07, "logits/chosen": -2.268448829650879, "logits/rejected": -2.1708810329437256, "logps/chosen": -111.51778411865234, "logps/rejected": -217.6341552734375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -1.3873281478881836, "rewards/margins": 13.243179321289062, "rewards/rejected": -14.63050651550293, "step": 2260 }, { "epoch": 1.04, "learning_rate": 3.637362637362637e-07, "logits/chosen": -2.2276995182037354, "logits/rejected": -2.134836196899414, "logps/chosen": -118.00798034667969, "logps/rejected": -232.2769775390625, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -2.6417064666748047, "rewards/margins": 13.054730415344238, "rewards/rejected": -15.696436882019043, "step": 2270 }, { "epoch": 1.04, "learning_rate": 3.628909551986475e-07, "logits/chosen": -2.2070775032043457, "logits/rejected": -2.111219644546509, "logps/chosen": -123.40742492675781, "logps/rejected": -239.35726928710938, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -3.2174816131591797, "rewards/margins": 13.53497314453125, "rewards/rejected": -16.752452850341797, "step": 2280 }, { "epoch": 1.05, "learning_rate": 3.6204564666103126e-07, "logits/chosen": -2.104203701019287, "logits/rejected": -1.9640731811523438, "logps/chosen": -128.63992309570312, "logps/rejected": -239.8550262451172, "loss": 0.0094, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -3.4442787170410156, "rewards/margins": 13.817400932312012, "rewards/rejected": -17.261680603027344, "step": 2290 }, { "epoch": 1.05, "learning_rate": 3.6120033812341507e-07, "logits/chosen": -2.0865983963012695, "logits/rejected": -1.9611084461212158, "logps/chosen": -129.01255798339844, "logps/rejected": -271.47900390625, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -3.7618050575256348, "rewards/margins": 16.001571655273438, "rewards/rejected": -19.763378143310547, "step": 2300 }, { "epoch": 1.05, "eval_logits/chosen": -2.055070638656616, "eval_logits/rejected": -1.8950544595718384, "eval_logps/chosen": -133.615234375, "eval_logps/rejected": -254.9934844970703, "eval_loss": 0.020873844623565674, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -4.237802505493164, "eval_rewards/margins": 14.251086235046387, "eval_rewards/rejected": -18.488889694213867, "eval_runtime": 48.722, "eval_samples_per_second": 58.741, "eval_steps_per_second": 1.847, "step": 2300 }, { "epoch": 1.05, "learning_rate": 3.603550295857988e-07, "logits/chosen": -2.0648961067199707, "logits/rejected": -1.911163568496704, "logps/chosen": -124.85935974121094, "logps/rejected": -266.6103210449219, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6062381267547607, "rewards/margins": 15.78801155090332, "rewards/rejected": -19.394250869750977, "step": 2310 }, { "epoch": 1.06, "learning_rate": 3.5950972104818253e-07, "logits/chosen": -2.039515733718872, "logits/rejected": -1.8882137537002563, "logps/chosen": -128.62649536132812, "logps/rejected": -267.51885986328125, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.149783134460449, "rewards/margins": 15.50981616973877, "rewards/rejected": -19.65959930419922, "step": 2320 }, { "epoch": 1.06, "learning_rate": 3.5866441251056634e-07, "logits/chosen": -2.0585365295410156, "logits/rejected": -1.901302695274353, "logps/chosen": -135.6258087158203, "logps/rejected": -253.69482421875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.203586578369141, "rewards/margins": 14.143606185913086, "rewards/rejected": -18.347192764282227, "step": 2330 }, { "epoch": 1.07, "learning_rate": 3.578191039729501e-07, "logits/chosen": -2.0926547050476074, "logits/rejected": -1.9490468502044678, "logps/chosen": -123.04887390136719, "logps/rejected": -267.77203369140625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.0432724952697754, "rewards/margins": 16.452411651611328, "rewards/rejected": -19.495681762695312, "step": 2340 }, { "epoch": 1.07, "learning_rate": 3.569737954353339e-07, "logits/chosen": -2.0938773155212402, "logits/rejected": -1.9247863292694092, "logps/chosen": -127.2049331665039, "logps/rejected": -267.74725341796875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -2.875643730163574, "rewards/margins": 16.645126342773438, "rewards/rejected": -19.520769119262695, "step": 2350 }, { "epoch": 1.08, "learning_rate": 3.561284868977176e-07, "logits/chosen": -2.120603322982788, "logits/rejected": -1.9793163537979126, "logps/chosen": -119.56201171875, "logps/rejected": -280.1813659667969, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -3.0645883083343506, "rewards/margins": 17.966808319091797, "rewards/rejected": -21.03139877319336, "step": 2360 }, { "epoch": 1.08, "learning_rate": 3.552831783601014e-07, "logits/chosen": -2.135152578353882, "logits/rejected": -1.9979181289672852, "logps/chosen": -132.65444946289062, "logps/rejected": -299.92486572265625, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.212158679962158, "rewards/margins": 18.336050033569336, "rewards/rejected": -22.548208236694336, "step": 2370 }, { "epoch": 1.09, "learning_rate": 3.544378698224852e-07, "logits/chosen": -2.131582260131836, "logits/rejected": -1.9988138675689697, "logps/chosen": -138.9375457763672, "logps/rejected": -296.9496154785156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -5.4065937995910645, "rewards/margins": 17.19466781616211, "rewards/rejected": -22.601261138916016, "step": 2380 }, { "epoch": 1.09, "learning_rate": 3.53592561284869e-07, "logits/chosen": -2.23891282081604, "logits/rejected": -2.1590425968170166, "logps/chosen": -122.85921478271484, "logps/rejected": -241.72793579101562, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6361231803894043, "rewards/margins": 13.326749801635742, "rewards/rejected": -16.962873458862305, "step": 2390 }, { "epoch": 1.1, "learning_rate": 3.5274725274725275e-07, "logits/chosen": -2.228616714477539, "logits/rejected": -2.143519878387451, "logps/chosen": -121.30659484863281, "logps/rejected": -244.88333129882812, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.2300007343292236, "rewards/margins": 13.734130859375, "rewards/rejected": -16.964130401611328, "step": 2400 }, { "epoch": 1.1, "eval_logits/chosen": -2.257730722427368, "eval_logits/rejected": -2.151594877243042, "eval_logps/chosen": -129.5041961669922, "eval_logps/rejected": -237.86705017089844, "eval_loss": 0.022165490314364433, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -3.826699733734131, "eval_rewards/margins": 12.949548721313477, "eval_rewards/rejected": -16.776248931884766, "eval_runtime": 48.3089, "eval_samples_per_second": 59.244, "eval_steps_per_second": 1.863, "step": 2400 }, { "epoch": 1.1, "learning_rate": 3.519019442096365e-07, "logits/chosen": -2.211165428161621, "logits/rejected": -2.1267261505126953, "logps/chosen": -128.97445678710938, "logps/rejected": -244.82559204101562, "loss": 0.0184, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.3847079277038574, "rewards/margins": 13.957148551940918, "rewards/rejected": -17.341854095458984, "step": 2410 }, { "epoch": 1.1, "learning_rate": 3.5105663567202026e-07, "logits/chosen": -2.083887815475464, "logits/rejected": -1.9280351400375366, "logps/chosen": -133.95697021484375, "logps/rejected": -273.7509765625, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -4.235071659088135, "rewards/margins": 16.102216720581055, "rewards/rejected": -20.33728790283203, "step": 2420 }, { "epoch": 1.11, "learning_rate": 3.50211327134404e-07, "logits/chosen": -2.0661416053771973, "logits/rejected": -1.9433352947235107, "logps/chosen": -131.2800750732422, "logps/rejected": -283.74798583984375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -4.225616931915283, "rewards/margins": 16.740453720092773, "rewards/rejected": -20.9660701751709, "step": 2430 }, { "epoch": 1.11, "learning_rate": 3.4936601859678783e-07, "logits/chosen": -2.0689034461975098, "logits/rejected": -1.9006198644638062, "logps/chosen": -140.2222900390625, "logps/rejected": -282.52154541015625, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -4.639592170715332, "rewards/margins": 16.51576805114746, "rewards/rejected": -21.155359268188477, "step": 2440 }, { "epoch": 1.12, "learning_rate": 3.485207100591716e-07, "logits/chosen": -2.069180965423584, "logits/rejected": -1.8828113079071045, "logps/chosen": -132.38973999023438, "logps/rejected": -282.66888427734375, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.3987526893615723, "rewards/margins": 17.470693588256836, "rewards/rejected": -20.86944580078125, "step": 2450 }, { "epoch": 1.12, "learning_rate": 3.4767540152155534e-07, "logits/chosen": -2.0478618144989014, "logits/rejected": -1.8923364877700806, "logps/chosen": -137.94956970214844, "logps/rejected": -262.4928283691406, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -4.1068878173828125, "rewards/margins": 14.977930068969727, "rewards/rejected": -19.08481788635254, "step": 2460 }, { "epoch": 1.13, "learning_rate": 3.468300929839391e-07, "logits/chosen": -2.032320499420166, "logits/rejected": -1.8694692850112915, "logps/chosen": -129.439697265625, "logps/rejected": -256.06854248046875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -3.800882339477539, "rewards/margins": 14.736040115356445, "rewards/rejected": -18.53692054748535, "step": 2470 }, { "epoch": 1.13, "learning_rate": 3.459847844463229e-07, "logits/chosen": -1.9632114171981812, "logits/rejected": -1.772853136062622, "logps/chosen": -137.02362060546875, "logps/rejected": -271.421875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.54182243347168, "rewards/margins": 15.439663887023926, "rewards/rejected": -19.981487274169922, "step": 2480 }, { "epoch": 1.14, "learning_rate": 3.4513947590870667e-07, "logits/chosen": -1.9685713052749634, "logits/rejected": -1.7855579853057861, "logps/chosen": -116.3403549194336, "logps/rejected": -269.75408935546875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.9411520957946777, "rewards/margins": 16.898277282714844, "rewards/rejected": -19.839427947998047, "step": 2490 }, { "epoch": 1.14, "learning_rate": 3.442941673710904e-07, "logits/chosen": -1.9229958057403564, "logits/rejected": -1.7180604934692383, "logps/chosen": -141.33531188964844, "logps/rejected": -287.43255615234375, "loss": 0.0114, "rewards/accuracies": 1.0, "rewards/chosen": -4.679624080657959, "rewards/margins": 17.07419204711914, "rewards/rejected": -21.753812789916992, "step": 2500 }, { "epoch": 1.14, "eval_logits/chosen": -1.8552100658416748, "eval_logits/rejected": -1.660325288772583, "eval_logps/chosen": -147.8096923828125, "eval_logps/rejected": -279.1315002441406, "eval_loss": 0.022998971864581108, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -5.65725040435791, "eval_rewards/margins": 15.245442390441895, "eval_rewards/rejected": -20.902692794799805, "eval_runtime": 48.7103, "eval_samples_per_second": 58.756, "eval_steps_per_second": 1.848, "step": 2500 }, { "epoch": 1.15, "learning_rate": 3.434488588334742e-07, "logits/chosen": -1.945347785949707, "logits/rejected": -1.7307497262954712, "logps/chosen": -135.29379272460938, "logps/rejected": -286.13726806640625, "loss": 0.0109, "rewards/accuracies": 1.0, "rewards/chosen": -4.308348178863525, "rewards/margins": 17.14834976196289, "rewards/rejected": -21.45669937133789, "step": 2510 }, { "epoch": 1.15, "learning_rate": 3.42603550295858e-07, "logits/chosen": -1.9633537530899048, "logits/rejected": -1.753211259841919, "logps/chosen": -133.03085327148438, "logps/rejected": -293.79693603515625, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.341958999633789, "rewards/margins": 17.850200653076172, "rewards/rejected": -22.19215965270996, "step": 2520 }, { "epoch": 1.15, "learning_rate": 3.4175824175824175e-07, "logits/chosen": -1.9777822494506836, "logits/rejected": -1.8326003551483154, "logps/chosen": -136.5177459716797, "logps/rejected": -278.86993408203125, "loss": 0.0194, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.523541450500488, "rewards/margins": 16.020450592041016, "rewards/rejected": -20.543991088867188, "step": 2530 }, { "epoch": 1.16, "learning_rate": 3.409129332206255e-07, "logits/chosen": -2.049921989440918, "logits/rejected": -1.9096603393554688, "logps/chosen": -128.6142578125, "logps/rejected": -250.35678100585938, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -3.5399975776672363, "rewards/margins": 14.6093111038208, "rewards/rejected": -18.149309158325195, "step": 2540 }, { "epoch": 1.16, "learning_rate": 3.4006762468300926e-07, "logits/chosen": -2.027036666870117, "logits/rejected": -1.8620399236679077, "logps/chosen": -134.22491455078125, "logps/rejected": -270.4568786621094, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -3.9924380779266357, "rewards/margins": 16.017200469970703, "rewards/rejected": -20.009639739990234, "step": 2550 }, { "epoch": 1.17, "learning_rate": 3.39222316145393e-07, "logits/chosen": -1.9359347820281982, "logits/rejected": -1.7949869632720947, "logps/chosen": -141.54837036132812, "logps/rejected": -281.8461608886719, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -5.612332344055176, "rewards/margins": 15.11158561706543, "rewards/rejected": -20.723918914794922, "step": 2560 }, { "epoch": 1.17, "learning_rate": 3.3837700760777683e-07, "logits/chosen": -1.879448652267456, "logits/rejected": -1.6673691272735596, "logps/chosen": -160.62313842773438, "logps/rejected": -294.2522277832031, "loss": 0.0097, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.579423427581787, "rewards/margins": 15.896377563476562, "rewards/rejected": -22.47580337524414, "step": 2570 }, { "epoch": 1.18, "learning_rate": 3.375316990701606e-07, "logits/chosen": -2.0175633430480957, "logits/rejected": -1.8550834655761719, "logps/chosen": -142.2264404296875, "logps/rejected": -282.3786926269531, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.80888032913208, "rewards/margins": 16.41634178161621, "rewards/rejected": -21.225223541259766, "step": 2580 }, { "epoch": 1.18, "learning_rate": 3.366863905325444e-07, "logits/chosen": -1.9725208282470703, "logits/rejected": -1.8113142251968384, "logps/chosen": -134.61044311523438, "logps/rejected": -288.10003662109375, "loss": 0.0188, "rewards/accuracies": 1.0, "rewards/chosen": -5.315024375915527, "rewards/margins": 16.41974639892578, "rewards/rejected": -21.73476791381836, "step": 2590 }, { "epoch": 1.19, "learning_rate": 3.358410819949281e-07, "logits/chosen": -2.056762218475342, "logits/rejected": -1.913812279701233, "logps/chosen": -122.4278793334961, "logps/rejected": -275.90936279296875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -3.6767172813415527, "rewards/margins": 16.875553131103516, "rewards/rejected": -20.552268981933594, "step": 2600 }, { "epoch": 1.19, "eval_logits/chosen": -2.0171782970428467, "eval_logits/rejected": -1.8500170707702637, "eval_logps/chosen": -140.48916625976562, "eval_logps/rejected": -263.9971618652344, "eval_loss": 0.02173527143895626, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -4.925196170806885, "eval_rewards/margins": 14.46406078338623, "eval_rewards/rejected": -19.38925552368164, "eval_runtime": 48.2981, "eval_samples_per_second": 59.257, "eval_steps_per_second": 1.863, "step": 2600 }, { "epoch": 1.19, "learning_rate": 3.349957734573119e-07, "logits/chosen": -2.0181078910827637, "logits/rejected": -1.8825185298919678, "logps/chosen": -137.3406982421875, "logps/rejected": -271.9612121582031, "loss": 0.0104, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.693453788757324, "rewards/margins": 15.234161376953125, "rewards/rejected": -19.927616119384766, "step": 2610 }, { "epoch": 1.2, "learning_rate": 3.3415046491969567e-07, "logits/chosen": -2.041337251663208, "logits/rejected": -1.866619348526001, "logps/chosen": -137.9915313720703, "logps/rejected": -269.0408020019531, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.2950334548950195, "rewards/margins": 15.195487976074219, "rewards/rejected": -19.490522384643555, "step": 2620 }, { "epoch": 1.2, "learning_rate": 3.333051563820795e-07, "logits/chosen": -2.074596881866455, "logits/rejected": -1.9203710556030273, "logps/chosen": -132.4041748046875, "logps/rejected": -252.46218872070312, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -4.1531267166137695, "rewards/margins": 13.75012493133545, "rewards/rejected": -17.903249740600586, "step": 2630 }, { "epoch": 1.2, "learning_rate": 3.3245984784446324e-07, "logits/chosen": -2.0898518562316895, "logits/rejected": -1.9567184448242188, "logps/chosen": -140.36544799804688, "logps/rejected": -271.5333557128906, "loss": 0.0187, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7434005737304688, "rewards/margins": 15.86798095703125, "rewards/rejected": -19.611379623413086, "step": 2640 }, { "epoch": 1.21, "learning_rate": 3.3161453930684694e-07, "logits/chosen": -1.94447922706604, "logits/rejected": -1.8102020025253296, "logps/chosen": -150.39633178710938, "logps/rejected": -288.087158203125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/chosen": -5.932501792907715, "rewards/margins": 15.888578414916992, "rewards/rejected": -21.821081161499023, "step": 2650 }, { "epoch": 1.21, "learning_rate": 3.3076923076923075e-07, "logits/chosen": -1.962162733078003, "logits/rejected": -1.796449065208435, "logps/chosen": -137.46371459960938, "logps/rejected": -309.79241943359375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -4.807311534881592, "rewards/margins": 19.060232162475586, "rewards/rejected": -23.867542266845703, "step": 2660 }, { "epoch": 1.22, "learning_rate": 3.299239222316145e-07, "logits/chosen": -2.0989317893981934, "logits/rejected": -1.9328184127807617, "logps/chosen": -129.36871337890625, "logps/rejected": -271.81866455078125, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -3.052420139312744, "rewards/margins": 16.742918014526367, "rewards/rejected": -19.795337677001953, "step": 2670 }, { "epoch": 1.22, "learning_rate": 3.290786136939983e-07, "logits/chosen": -2.135300397872925, "logits/rejected": -2.0134975910186768, "logps/chosen": -128.8970489501953, "logps/rejected": -272.21417236328125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -3.772590160369873, "rewards/margins": 16.183759689331055, "rewards/rejected": -19.956350326538086, "step": 2680 }, { "epoch": 1.23, "learning_rate": 3.282333051563821e-07, "logits/chosen": -2.1228604316711426, "logits/rejected": -1.9986953735351562, "logps/chosen": -134.7543182373047, "logps/rejected": -265.08306884765625, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -4.4857401847839355, "rewards/margins": 15.023765563964844, "rewards/rejected": -19.509506225585938, "step": 2690 }, { "epoch": 1.23, "learning_rate": 3.2738799661876583e-07, "logits/chosen": -2.0504837036132812, "logits/rejected": -1.9117425680160522, "logps/chosen": -145.67308044433594, "logps/rejected": -270.6615905761719, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -5.501971244812012, "rewards/margins": 14.847882270812988, "rewards/rejected": -20.349851608276367, "step": 2700 }, { "epoch": 1.23, "eval_logits/chosen": -2.017770290374756, "eval_logits/rejected": -1.867299199104309, "eval_logps/chosen": -145.2077178955078, "eval_logps/rejected": -271.73480224609375, "eval_loss": 0.023520223796367645, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.397050380706787, "eval_rewards/margins": 14.765971183776855, "eval_rewards/rejected": -20.163022994995117, "eval_runtime": 49.0085, "eval_samples_per_second": 58.398, "eval_steps_per_second": 1.836, "step": 2700 }, { "epoch": 1.24, "learning_rate": 3.265426880811496e-07, "logits/chosen": -2.0202062129974365, "logits/rejected": -1.8841352462768555, "logps/chosen": -151.43978881835938, "logps/rejected": -286.70758056640625, "loss": 0.0055, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.582226753234863, "rewards/margins": 15.728716850280762, "rewards/rejected": -21.310945510864258, "step": 2710 }, { "epoch": 1.24, "learning_rate": 3.256973795435334e-07, "logits/chosen": -2.0473127365112305, "logits/rejected": -1.9340074062347412, "logps/chosen": -134.10830688476562, "logps/rejected": -270.43170166015625, "loss": 0.0137, "rewards/accuracies": 1.0, "rewards/chosen": -4.385322570800781, "rewards/margins": 15.43200969696045, "rewards/rejected": -19.817331314086914, "step": 2720 }, { "epoch": 1.25, "learning_rate": 3.2485207100591716e-07, "logits/chosen": -2.0043697357177734, "logits/rejected": -1.8764839172363281, "logps/chosen": -128.83837890625, "logps/rejected": -265.72991943359375, "loss": 0.0118, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.232174873352051, "rewards/margins": 15.495455741882324, "rewards/rejected": -19.727630615234375, "step": 2730 }, { "epoch": 1.25, "learning_rate": 3.2400676246830097e-07, "logits/chosen": -1.9958341121673584, "logits/rejected": -1.8539024591445923, "logps/chosen": -134.306884765625, "logps/rejected": -271.220458984375, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.187023162841797, "rewards/margins": 15.482475280761719, "rewards/rejected": -19.66950035095215, "step": 2740 }, { "epoch": 1.26, "learning_rate": 3.2316145393068467e-07, "logits/chosen": -1.9477897882461548, "logits/rejected": -1.8062279224395752, "logps/chosen": -127.55613708496094, "logps/rejected": -272.15313720703125, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -3.4938323497772217, "rewards/margins": 16.706642150878906, "rewards/rejected": -20.20047378540039, "step": 2750 }, { "epoch": 1.26, "learning_rate": 3.2231614539306843e-07, "logits/chosen": -1.957558035850525, "logits/rejected": -1.832824468612671, "logps/chosen": -131.24839782714844, "logps/rejected": -260.27166748046875, "loss": 0.0158, "rewards/accuracies": 1.0, "rewards/chosen": -4.663699150085449, "rewards/margins": 14.462875366210938, "rewards/rejected": -19.12657356262207, "step": 2760 }, { "epoch": 1.26, "learning_rate": 3.2147083685545224e-07, "logits/chosen": -2.097468852996826, "logits/rejected": -2.0030126571655273, "logps/chosen": -131.1923828125, "logps/rejected": -261.2097473144531, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -3.6247544288635254, "rewards/margins": 15.17779541015625, "rewards/rejected": -18.802549362182617, "step": 2770 }, { "epoch": 1.27, "learning_rate": 3.20625528317836e-07, "logits/chosen": -2.0422565937042236, "logits/rejected": -1.9468787908554077, "logps/chosen": -135.0767822265625, "logps/rejected": -260.1372985839844, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -4.394379615783691, "rewards/margins": 14.503211975097656, "rewards/rejected": -18.897592544555664, "step": 2780 }, { "epoch": 1.27, "learning_rate": 3.1978021978021975e-07, "logits/chosen": -2.0489416122436523, "logits/rejected": -1.9344393014907837, "logps/chosen": -137.5537109375, "logps/rejected": -256.15869140625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/chosen": -4.1104559898376465, "rewards/margins": 14.742477416992188, "rewards/rejected": -18.852933883666992, "step": 2790 }, { "epoch": 1.28, "learning_rate": 3.189349112426035e-07, "logits/chosen": -2.0865895748138428, "logits/rejected": -1.9760797023773193, "logps/chosen": -133.05752563476562, "logps/rejected": -261.711181640625, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/chosen": -4.011336326599121, "rewards/margins": 15.45531940460205, "rewards/rejected": -19.466655731201172, "step": 2800 }, { "epoch": 1.28, "eval_logits/chosen": -2.152179002761841, "eval_logits/rejected": -2.0417330265045166, "eval_logps/chosen": -135.65919494628906, "eval_logps/rejected": -252.37962341308594, "eval_loss": 0.025323208421468735, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -4.44219970703125, "eval_rewards/margins": 13.785304069519043, "eval_rewards/rejected": -18.22750473022461, "eval_runtime": 49.1425, "eval_samples_per_second": 58.239, "eval_steps_per_second": 1.831, "step": 2800 }, { "epoch": 1.28, "learning_rate": 3.180896027049873e-07, "logits/chosen": -2.1095643043518066, "logits/rejected": -2.021012783050537, "logps/chosen": -137.2529296875, "logps/rejected": -278.6671447753906, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.625119686126709, "rewards/margins": 16.213436126708984, "rewards/rejected": -20.83855628967285, "step": 2810 }, { "epoch": 1.29, "learning_rate": 3.172442941673711e-07, "logits/chosen": -2.064513683319092, "logits/rejected": -1.9486706256866455, "logps/chosen": -141.18331909179688, "logps/rejected": -278.5491638183594, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/chosen": -4.658184051513672, "rewards/margins": 16.239248275756836, "rewards/rejected": -20.897432327270508, "step": 2820 }, { "epoch": 1.29, "learning_rate": 3.163989856297549e-07, "logits/chosen": -2.0233511924743652, "logits/rejected": -1.8923966884613037, "logps/chosen": -143.33706665039062, "logps/rejected": -295.3087463378906, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -5.4150919914245605, "rewards/margins": 17.231006622314453, "rewards/rejected": -22.64609718322754, "step": 2830 }, { "epoch": 1.3, "learning_rate": 3.155536770921386e-07, "logits/chosen": -2.0582056045532227, "logits/rejected": -1.9745107889175415, "logps/chosen": -147.27874755859375, "logps/rejected": -292.4425354003906, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": -4.995936870574951, "rewards/margins": 16.957895278930664, "rewards/rejected": -21.95383071899414, "step": 2840 }, { "epoch": 1.3, "learning_rate": 3.147083685545224e-07, "logits/chosen": -2.301547050476074, "logits/rejected": -2.2333405017852783, "logps/chosen": -114.71829986572266, "logps/rejected": -219.6496124267578, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/chosen": -2.4453327655792236, "rewards/margins": 12.367315292358398, "rewards/rejected": -14.812649726867676, "step": 2850 }, { "epoch": 1.31, "learning_rate": 3.1386306001690616e-07, "logits/chosen": -2.29642391204834, "logits/rejected": -2.225308656692505, "logps/chosen": -117.47834777832031, "logps/rejected": -219.9888153076172, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -2.5083842277526855, "rewards/margins": 12.631601333618164, "rewards/rejected": -15.139986991882324, "step": 2860 }, { "epoch": 1.31, "learning_rate": 3.130177514792899e-07, "logits/chosen": -2.269688129425049, "logits/rejected": -2.1952171325683594, "logps/chosen": -123.7551498413086, "logps/rejected": -238.8106689453125, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.254749298095703, "rewards/margins": 13.318034172058105, "rewards/rejected": -16.572784423828125, "step": 2870 }, { "epoch": 1.31, "learning_rate": 3.121724429416737e-07, "logits/chosen": -2.225463628768921, "logits/rejected": -2.132333993911743, "logps/chosen": -126.89964294433594, "logps/rejected": -262.2653503417969, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.3202064037323, "rewards/margins": 15.432042121887207, "rewards/rejected": -18.752248764038086, "step": 2880 }, { "epoch": 1.32, "learning_rate": 3.1132713440405743e-07, "logits/chosen": -2.1518263816833496, "logits/rejected": -2.0612220764160156, "logps/chosen": -134.36441040039062, "logps/rejected": -267.93707275390625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -4.020511627197266, "rewards/margins": 15.682164192199707, "rewards/rejected": -19.70267677307129, "step": 2890 }, { "epoch": 1.32, "learning_rate": 3.1048182586644124e-07, "logits/chosen": -2.1560277938842773, "logits/rejected": -2.056459426879883, "logps/chosen": -131.1350555419922, "logps/rejected": -267.2361755371094, "loss": 0.0064, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.494834899902344, "rewards/margins": 14.966562271118164, "rewards/rejected": -19.461395263671875, "step": 2900 }, { "epoch": 1.32, "eval_logits/chosen": -2.141598701477051, "eval_logits/rejected": -2.027343273162842, "eval_logps/chosen": -134.41439819335938, "eval_logps/rejected": -257.9463195800781, "eval_loss": 0.02687516249716282, "eval_rewards/accuracies": 0.9750000238418579, "eval_rewards/chosen": -4.317718505859375, "eval_rewards/margins": 14.466452598571777, "eval_rewards/rejected": -18.784168243408203, "eval_runtime": 48.4811, "eval_samples_per_second": 59.033, "eval_steps_per_second": 1.856, "step": 2900 }, { "epoch": 1.33, "learning_rate": 3.09636517328825e-07, "logits/chosen": -2.116936683654785, "logits/rejected": -2.014040231704712, "logps/chosen": -132.79800415039062, "logps/rejected": -271.3309020996094, "loss": 0.0094, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.4362220764160156, "rewards/margins": 16.35221290588379, "rewards/rejected": -19.788434982299805, "step": 2910 }, { "epoch": 1.33, "learning_rate": 3.087912087912088e-07, "logits/chosen": -2.075852632522583, "logits/rejected": -2.0212950706481934, "logps/chosen": -141.65907287597656, "logps/rejected": -264.9969482421875, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.6742095947265625, "rewards/margins": 15.127553939819336, "rewards/rejected": -19.8017635345459, "step": 2920 }, { "epoch": 1.34, "learning_rate": 3.0794590025359256e-07, "logits/chosen": -2.0961661338806152, "logits/rejected": -1.9971954822540283, "logps/chosen": -130.35186767578125, "logps/rejected": -270.8241271972656, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/chosen": -4.06374454498291, "rewards/margins": 16.1806697845459, "rewards/rejected": -20.244413375854492, "step": 2930 }, { "epoch": 1.34, "learning_rate": 3.071005917159763e-07, "logits/chosen": -2.0451531410217285, "logits/rejected": -1.938254952430725, "logps/chosen": -130.60731506347656, "logps/rejected": -266.6194763183594, "loss": 0.0138, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.539628505706787, "rewards/margins": 15.338510513305664, "rewards/rejected": -19.878137588500977, "step": 2940 }, { "epoch": 1.35, "learning_rate": 3.062552831783601e-07, "logits/chosen": -2.068493366241455, "logits/rejected": -1.9677813053131104, "logps/chosen": -131.9779510498047, "logps/rejected": -259.90789794921875, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -4.384075164794922, "rewards/margins": 14.50419807434082, "rewards/rejected": -18.88827133178711, "step": 2950 }, { "epoch": 1.35, "learning_rate": 3.054099746407439e-07, "logits/chosen": -2.090090751647949, "logits/rejected": -1.969435691833496, "logps/chosen": -131.5757598876953, "logps/rejected": -254.5954132080078, "loss": 0.0119, "rewards/accuracies": 1.0, "rewards/chosen": -3.72786021232605, "rewards/margins": 14.49266529083252, "rewards/rejected": -18.220523834228516, "step": 2960 }, { "epoch": 1.36, "learning_rate": 3.0456466610312764e-07, "logits/chosen": -1.991431474685669, "logits/rejected": -1.859548568725586, "logps/chosen": -126.8050765991211, "logps/rejected": -244.7470245361328, "loss": 0.0064, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.15117883682251, "rewards/margins": 13.41639232635498, "rewards/rejected": -17.56757164001465, "step": 2970 }, { "epoch": 1.36, "learning_rate": 3.037193575655114e-07, "logits/chosen": -2.0137648582458496, "logits/rejected": -1.9008442163467407, "logps/chosen": -134.04664611816406, "logps/rejected": -252.976318359375, "loss": 0.0052, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.397458553314209, "rewards/margins": 13.81383228302002, "rewards/rejected": -18.211292266845703, "step": 2980 }, { "epoch": 1.36, "learning_rate": 3.0287404902789516e-07, "logits/chosen": -2.012768268585205, "logits/rejected": -1.8527495861053467, "logps/chosen": -140.04957580566406, "logps/rejected": -276.19427490234375, "loss": 0.0221, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.443761348724365, "rewards/margins": 15.549433708190918, "rewards/rejected": -19.993196487426758, "step": 2990 }, { "epoch": 1.37, "learning_rate": 3.020287404902789e-07, "logits/chosen": -2.1158039569854736, "logits/rejected": -1.957754373550415, "logps/chosen": -128.07528686523438, "logps/rejected": -259.60809326171875, "loss": 0.0093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.460177183151245, "rewards/margins": 15.643338203430176, "rewards/rejected": -19.103515625, "step": 3000 }, { "epoch": 1.37, "eval_logits/chosen": -2.078118324279785, "eval_logits/rejected": -1.9133172035217285, "eval_logps/chosen": -137.57186889648438, "eval_logps/rejected": -264.0636291503906, "eval_loss": 0.023424891754984856, "eval_rewards/accuracies": 0.9722222089767456, "eval_rewards/chosen": -4.633467674255371, "eval_rewards/margins": 14.76243782043457, "eval_rewards/rejected": -19.395904541015625, "eval_runtime": 49.0999, "eval_samples_per_second": 58.289, "eval_steps_per_second": 1.833, "step": 3000 }, { "epoch": 1.37, "learning_rate": 3.011834319526627e-07, "logits/chosen": -2.0927577018737793, "logits/rejected": -1.9399240016937256, "logps/chosen": -135.44723510742188, "logps/rejected": -270.5841979980469, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -3.9826979637145996, "rewards/margins": 16.10317611694336, "rewards/rejected": -20.085874557495117, "step": 3010 }, { "epoch": 1.38, "learning_rate": 3.003381234150465e-07, "logits/chosen": -1.9384254217147827, "logits/rejected": -1.738167405128479, "logps/chosen": -132.06727600097656, "logps/rejected": -276.01617431640625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -3.956315279006958, "rewards/margins": 16.69976043701172, "rewards/rejected": -20.65607452392578, "step": 3020 }, { "epoch": 1.38, "learning_rate": 2.9949281487743024e-07, "logits/chosen": -1.9904791116714478, "logits/rejected": -1.8166635036468506, "logps/chosen": -134.52444458007812, "logps/rejected": -280.89031982421875, "loss": 0.0134, "rewards/accuracies": 1.0, "rewards/chosen": -4.553272247314453, "rewards/margins": 16.27239990234375, "rewards/rejected": -20.82567024230957, "step": 3030 }, { "epoch": 1.39, "learning_rate": 2.98647506339814e-07, "logits/chosen": -1.996466040611267, "logits/rejected": -1.8200994729995728, "logps/chosen": -126.57682800292969, "logps/rejected": -277.4131774902344, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -3.318772554397583, "rewards/margins": 17.410900115966797, "rewards/rejected": -20.729673385620117, "step": 3040 }, { "epoch": 1.39, "learning_rate": 2.978021978021978e-07, "logits/chosen": -1.935706377029419, "logits/rejected": -1.7807499170303345, "logps/chosen": -138.53733825683594, "logps/rejected": -271.65582275390625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": -4.194025993347168, "rewards/margins": 15.971760749816895, "rewards/rejected": -20.16578483581543, "step": 3050 }, { "epoch": 1.4, "learning_rate": 2.9695688926458157e-07, "logits/chosen": -1.9109838008880615, "logits/rejected": -1.7679493427276611, "logps/chosen": -138.1450958251953, "logps/rejected": -284.1612243652344, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -4.85819149017334, "rewards/margins": 16.53154754638672, "rewards/rejected": -21.389739990234375, "step": 3060 }, { "epoch": 1.4, "learning_rate": 2.961115807269654e-07, "logits/chosen": -1.9031598567962646, "logits/rejected": -1.7178453207015991, "logps/chosen": -133.40499877929688, "logps/rejected": -313.7869567871094, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.607630729675293, "rewards/margins": 19.082489013671875, "rewards/rejected": -23.690120697021484, "step": 3070 }, { "epoch": 1.41, "learning_rate": 2.952662721893491e-07, "logits/chosen": -1.9240505695343018, "logits/rejected": -1.7546335458755493, "logps/chosen": -140.02825927734375, "logps/rejected": -313.56463623046875, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.939627647399902, "rewards/margins": 18.84872817993164, "rewards/rejected": -23.788354873657227, "step": 3080 }, { "epoch": 1.41, "learning_rate": 2.9442096365173284e-07, "logits/chosen": -1.9146785736083984, "logits/rejected": -1.750884771347046, "logps/chosen": -136.1601104736328, "logps/rejected": -272.26361083984375, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": -3.955554962158203, "rewards/margins": 15.811803817749023, "rewards/rejected": -19.767358779907227, "step": 3090 }, { "epoch": 1.41, "learning_rate": 2.9357565511411665e-07, "logits/chosen": -1.888639211654663, "logits/rejected": -1.7459430694580078, "logps/chosen": -144.42047119140625, "logps/rejected": -279.8617248535156, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -5.060923099517822, "rewards/margins": 15.9983549118042, "rewards/rejected": -21.059276580810547, "step": 3100 }, { "epoch": 1.41, "eval_logits/chosen": -1.909785270690918, "eval_logits/rejected": -1.7623956203460693, "eval_logps/chosen": -142.03558349609375, "eval_logps/rejected": -271.3341064453125, "eval_loss": 0.022984443232417107, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.079836845397949, "eval_rewards/margins": 15.043112754821777, "eval_rewards/rejected": -20.122955322265625, "eval_runtime": 49.0951, "eval_samples_per_second": 58.295, "eval_steps_per_second": 1.833, "step": 3100 }, { "epoch": 1.42, "learning_rate": 2.927303465765004e-07, "logits/chosen": -1.9462015628814697, "logits/rejected": -1.7958303689956665, "logps/chosen": -140.11074829101562, "logps/rejected": -295.42303466796875, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.818387508392334, "rewards/margins": 17.30405044555664, "rewards/rejected": -22.1224365234375, "step": 3110 }, { "epoch": 1.42, "learning_rate": 2.918850380388842e-07, "logits/chosen": -1.9005266427993774, "logits/rejected": -1.7481244802474976, "logps/chosen": -152.64340209960938, "logps/rejected": -297.1541442871094, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -5.619594573974609, "rewards/margins": 16.809993743896484, "rewards/rejected": -22.429584503173828, "step": 3120 }, { "epoch": 1.43, "learning_rate": 2.910397295012679e-07, "logits/chosen": -1.849311113357544, "logits/rejected": -1.6929523944854736, "logps/chosen": -148.63531494140625, "logps/rejected": -290.8329772949219, "loss": 0.0122, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.668116569519043, "rewards/margins": 16.208858489990234, "rewards/rejected": -21.87697410583496, "step": 3130 }, { "epoch": 1.43, "learning_rate": 2.9019442096365173e-07, "logits/chosen": -2.0505404472351074, "logits/rejected": -1.8951294422149658, "logps/chosen": -141.0372314453125, "logps/rejected": -290.4275817871094, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -4.243703842163086, "rewards/margins": 17.539813995361328, "rewards/rejected": -21.78351593017578, "step": 3140 }, { "epoch": 1.44, "learning_rate": 2.893491124260355e-07, "logits/chosen": -2.0169425010681152, "logits/rejected": -1.894471526145935, "logps/chosen": -141.3474578857422, "logps/rejected": -292.564453125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -5.002017974853516, "rewards/margins": 16.942005157470703, "rewards/rejected": -21.944019317626953, "step": 3150 }, { "epoch": 1.44, "learning_rate": 2.885038038884193e-07, "logits/chosen": -2.069235324859619, "logits/rejected": -1.9347785711288452, "logps/chosen": -137.05416870117188, "logps/rejected": -259.1018981933594, "loss": 0.0107, "rewards/accuracies": 1.0, "rewards/chosen": -4.348564147949219, "rewards/margins": 14.3115234375, "rewards/rejected": -18.66008949279785, "step": 3160 }, { "epoch": 1.45, "learning_rate": 2.8765849535080305e-07, "logits/chosen": -2.0588865280151367, "logits/rejected": -1.9449745416641235, "logps/chosen": -130.26327514648438, "logps/rejected": -272.8006286621094, "loss": 0.0085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.459763526916504, "rewards/margins": 15.614460945129395, "rewards/rejected": -20.0742244720459, "step": 3170 }, { "epoch": 1.45, "learning_rate": 2.8681318681318676e-07, "logits/chosen": -1.9862502813339233, "logits/rejected": -1.8353523015975952, "logps/chosen": -128.68417358398438, "logps/rejected": -297.4949645996094, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -4.29546594619751, "rewards/margins": 18.210203170776367, "rewards/rejected": -22.50567054748535, "step": 3180 }, { "epoch": 1.46, "learning_rate": 2.8596787827557057e-07, "logits/chosen": -2.0963692665100098, "logits/rejected": -1.99569833278656, "logps/chosen": -130.62600708007812, "logps/rejected": -252.66830444335938, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6897976398468018, "rewards/margins": 14.709360122680664, "rewards/rejected": -18.399158477783203, "step": 3190 }, { "epoch": 1.46, "learning_rate": 2.851225697379543e-07, "logits/chosen": -2.1021742820739746, "logits/rejected": -1.9880530834197998, "logps/chosen": -132.0804443359375, "logps/rejected": -262.18572998046875, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/chosen": -4.257116317749023, "rewards/margins": 14.728506088256836, "rewards/rejected": -18.98562240600586, "step": 3200 }, { "epoch": 1.46, "eval_logits/chosen": -2.074456214904785, "eval_logits/rejected": -1.949985384941101, "eval_logps/chosen": -130.15731811523438, "eval_logps/rejected": -251.4248504638672, "eval_loss": 0.02166706882417202, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -3.892010450363159, "eval_rewards/margins": 14.240015983581543, "eval_rewards/rejected": -18.13202667236328, "eval_runtime": 49.0701, "eval_samples_per_second": 58.325, "eval_steps_per_second": 1.834, "step": 3200 }, { "epoch": 1.47, "learning_rate": 2.8427726120033813e-07, "logits/chosen": -2.067147731781006, "logits/rejected": -1.9356224536895752, "logps/chosen": -124.56694030761719, "logps/rejected": -271.8417053222656, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -2.7113606929779053, "rewards/margins": 17.175228118896484, "rewards/rejected": -19.886587142944336, "step": 3210 }, { "epoch": 1.47, "learning_rate": 2.834319526627219e-07, "logits/chosen": -1.9816830158233643, "logits/rejected": -1.8282426595687866, "logps/chosen": -140.33413696289062, "logps/rejected": -292.79754638671875, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.944754123687744, "rewards/margins": 17.03341293334961, "rewards/rejected": -21.978168487548828, "step": 3220 }, { "epoch": 1.47, "learning_rate": 2.8258664412510565e-07, "logits/chosen": -1.9779627323150635, "logits/rejected": -1.847848892211914, "logps/chosen": -149.28030395507812, "logps/rejected": -304.99969482421875, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/chosen": -5.514216899871826, "rewards/margins": 18.037212371826172, "rewards/rejected": -23.551427841186523, "step": 3230 }, { "epoch": 1.48, "learning_rate": 2.817413355874894e-07, "logits/chosen": -1.9442542791366577, "logits/rejected": -1.8127539157867432, "logps/chosen": -143.69705200195312, "logps/rejected": -310.5235900878906, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -5.017391204833984, "rewards/margins": 18.894908905029297, "rewards/rejected": -23.91229820251465, "step": 3240 }, { "epoch": 1.48, "learning_rate": 2.808960270498732e-07, "logits/chosen": -1.882361650466919, "logits/rejected": -1.78091561794281, "logps/chosen": -144.84976196289062, "logps/rejected": -306.044921875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -5.687883377075195, "rewards/margins": 17.577857971191406, "rewards/rejected": -23.265743255615234, "step": 3250 }, { "epoch": 1.49, "learning_rate": 2.8005071851225697e-07, "logits/chosen": -1.921932578086853, "logits/rejected": -1.798011064529419, "logps/chosen": -145.02206420898438, "logps/rejected": -306.07464599609375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -5.535216331481934, "rewards/margins": 17.976417541503906, "rewards/rejected": -23.51163101196289, "step": 3260 }, { "epoch": 1.49, "learning_rate": 2.7920540997464073e-07, "logits/chosen": -1.9915612936019897, "logits/rejected": -1.8652572631835938, "logps/chosen": -144.52069091796875, "logps/rejected": -326.04132080078125, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.750706672668457, "rewards/margins": 19.223308563232422, "rewards/rejected": -24.974014282226562, "step": 3270 }, { "epoch": 1.5, "learning_rate": 2.783601014370245e-07, "logits/chosen": -2.0454087257385254, "logits/rejected": -1.9038234949111938, "logps/chosen": -143.76519775390625, "logps/rejected": -308.48480224609375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -5.117907524108887, "rewards/margins": 18.567800521850586, "rewards/rejected": -23.685707092285156, "step": 3280 }, { "epoch": 1.5, "learning_rate": 2.7751479289940824e-07, "logits/chosen": -2.01210355758667, "logits/rejected": -1.9171651601791382, "logps/chosen": -149.78817749023438, "logps/rejected": -302.19879150390625, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -5.596650123596191, "rewards/margins": 17.594165802001953, "rewards/rejected": -23.190814971923828, "step": 3290 }, { "epoch": 1.51, "learning_rate": 2.7666948436179205e-07, "logits/chosen": -2.067559003829956, "logits/rejected": -1.9251827001571655, "logps/chosen": -160.0213623046875, "logps/rejected": -330.7239074707031, "loss": 0.0091, "rewards/accuracies": 1.0, "rewards/chosen": -6.127202033996582, "rewards/margins": 19.1259822845459, "rewards/rejected": -25.253185272216797, "step": 3300 }, { "epoch": 1.51, "eval_logits/chosen": -2.0357284545898438, "eval_logits/rejected": -1.9011106491088867, "eval_logps/chosen": -148.20668029785156, "eval_logps/rejected": -288.69830322265625, "eval_loss": 0.022296199575066566, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.696948051452637, "eval_rewards/margins": 16.16242218017578, "eval_rewards/rejected": -21.8593692779541, "eval_runtime": 47.7021, "eval_samples_per_second": 59.997, "eval_steps_per_second": 1.887, "step": 3300 }, { "epoch": 1.51, "learning_rate": 2.758241758241758e-07, "logits/chosen": -2.0515849590301514, "logits/rejected": -1.942239761352539, "logps/chosen": -131.13009643554688, "logps/rejected": -284.80010986328125, "loss": 0.0073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.422455787658691, "rewards/margins": 16.477149963378906, "rewards/rejected": -20.899606704711914, "step": 3310 }, { "epoch": 1.52, "learning_rate": 2.7497886728655957e-07, "logits/chosen": -2.049119710922241, "logits/rejected": -1.9266822338104248, "logps/chosen": -127.2447509765625, "logps/rejected": -258.5926208496094, "loss": 0.0221, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.6637301445007324, "rewards/margins": 15.236650466918945, "rewards/rejected": -18.900381088256836, "step": 3320 }, { "epoch": 1.52, "learning_rate": 2.741335587489433e-07, "logits/chosen": -2.138721466064453, "logits/rejected": -2.068021535873413, "logps/chosen": -126.6335678100586, "logps/rejected": -244.0006103515625, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -3.2384860515594482, "rewards/margins": 13.476173400878906, "rewards/rejected": -16.714656829833984, "step": 3330 }, { "epoch": 1.52, "learning_rate": 2.7328825021132714e-07, "logits/chosen": -2.1016390323638916, "logits/rejected": -1.9984807968139648, "logps/chosen": -135.56008911132812, "logps/rejected": -260.299560546875, "loss": 0.0088, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.3465962409973145, "rewards/margins": 14.227472305297852, "rewards/rejected": -18.574068069458008, "step": 3340 }, { "epoch": 1.53, "learning_rate": 2.724429416737109e-07, "logits/chosen": -2.007490634918213, "logits/rejected": -1.8925546407699585, "logps/chosen": -142.90896606445312, "logps/rejected": -262.3196716308594, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/chosen": -4.897563457489014, "rewards/margins": 14.112276077270508, "rewards/rejected": -19.009838104248047, "step": 3350 }, { "epoch": 1.53, "learning_rate": 2.715976331360947e-07, "logits/chosen": -2.0170576572418213, "logits/rejected": -1.9034395217895508, "logps/chosen": -139.79205322265625, "logps/rejected": -262.28289794921875, "loss": 0.011, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.756741046905518, "rewards/margins": 14.202244758605957, "rewards/rejected": -18.958986282348633, "step": 3360 }, { "epoch": 1.54, "learning_rate": 2.707523245984784e-07, "logits/chosen": -1.954490065574646, "logits/rejected": -1.847299337387085, "logps/chosen": -137.61386108398438, "logps/rejected": -256.4851379394531, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.715554237365723, "rewards/margins": 13.87446403503418, "rewards/rejected": -18.59002113342285, "step": 3370 }, { "epoch": 1.54, "learning_rate": 2.699070160608622e-07, "logits/chosen": -1.930686593055725, "logits/rejected": -1.7932345867156982, "logps/chosen": -137.0919647216797, "logps/rejected": -268.186767578125, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.4739766120910645, "rewards/margins": 15.397375106811523, "rewards/rejected": -19.87135124206543, "step": 3380 }, { "epoch": 1.55, "learning_rate": 2.69061707523246e-07, "logits/chosen": -1.902661681175232, "logits/rejected": -1.7563111782073975, "logps/chosen": -132.42283630371094, "logps/rejected": -278.3194885253906, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -4.570847034454346, "rewards/margins": 16.12563133239746, "rewards/rejected": -20.69647979736328, "step": 3390 }, { "epoch": 1.55, "learning_rate": 2.6821639898562973e-07, "logits/chosen": -1.9157822132110596, "logits/rejected": -1.7864373922348022, "logps/chosen": -136.57540893554688, "logps/rejected": -260.8625793457031, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -4.147719383239746, "rewards/margins": 15.035512924194336, "rewards/rejected": -19.183231353759766, "step": 3400 }, { "epoch": 1.55, "eval_logits/chosen": -1.8729345798492432, "eval_logits/rejected": -1.7467317581176758, "eval_logps/chosen": -139.7764129638672, "eval_logps/rejected": -252.1146697998047, "eval_loss": 0.021492039784789085, "eval_rewards/accuracies": 0.9861111044883728, "eval_rewards/chosen": -4.8539228439331055, "eval_rewards/margins": 13.347084999084473, "eval_rewards/rejected": -18.20100975036621, "eval_runtime": 48.9371, "eval_samples_per_second": 58.483, "eval_steps_per_second": 1.839, "step": 3400 }, { "epoch": 1.56, "learning_rate": 2.6737109044801354e-07, "logits/chosen": -1.9103200435638428, "logits/rejected": -1.763331651687622, "logps/chosen": -137.3577117919922, "logps/rejected": -259.80828857421875, "loss": 0.0142, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.467274188995361, "rewards/margins": 14.363429069519043, "rewards/rejected": -18.830707550048828, "step": 3410 }, { "epoch": 1.56, "learning_rate": 2.6652578191039725e-07, "logits/chosen": -1.8430370092391968, "logits/rejected": -1.711909532546997, "logps/chosen": -148.16954040527344, "logps/rejected": -279.33135986328125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -5.628726482391357, "rewards/margins": 15.455856323242188, "rewards/rejected": -21.08458137512207, "step": 3420 }, { "epoch": 1.57, "learning_rate": 2.6568047337278106e-07, "logits/chosen": -1.8523824214935303, "logits/rejected": -1.7088029384613037, "logps/chosen": -142.4571990966797, "logps/rejected": -282.54547119140625, "loss": 0.0082, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.997100830078125, "rewards/margins": 15.815884590148926, "rewards/rejected": -20.81298828125, "step": 3430 }, { "epoch": 1.57, "learning_rate": 2.648351648351648e-07, "logits/chosen": -1.9678691625595093, "logits/rejected": -1.867297887802124, "logps/chosen": -135.94393920898438, "logps/rejected": -251.23452758789062, "loss": 0.0127, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.613978624343872, "rewards/margins": 13.765337944030762, "rewards/rejected": -17.379316329956055, "step": 3440 }, { "epoch": 1.57, "learning_rate": 2.639898562975486e-07, "logits/chosen": -1.9418270587921143, "logits/rejected": -1.833435297012329, "logps/chosen": -131.86508178710938, "logps/rejected": -259.078857421875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -3.501054286956787, "rewards/margins": 15.120953559875488, "rewards/rejected": -18.622005462646484, "step": 3450 }, { "epoch": 1.58, "learning_rate": 2.631445477599324e-07, "logits/chosen": -1.9227845668792725, "logits/rejected": -1.8239774703979492, "logps/chosen": -133.7466278076172, "logps/rejected": -259.53369140625, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/chosen": -4.523486614227295, "rewards/margins": 14.523590087890625, "rewards/rejected": -19.047077178955078, "step": 3460 }, { "epoch": 1.58, "learning_rate": 2.6229923922231614e-07, "logits/chosen": -2.066107988357544, "logits/rejected": -1.9604514837265015, "logps/chosen": -122.82148742675781, "logps/rejected": -241.93679809570312, "loss": 0.0126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.367501735687256, "rewards/margins": 13.92534065246582, "rewards/rejected": -17.2928409576416, "step": 3470 }, { "epoch": 1.59, "learning_rate": 2.614539306846999e-07, "logits/chosen": -2.0362019538879395, "logits/rejected": -1.9253215789794922, "logps/chosen": -126.20308685302734, "logps/rejected": -241.9541473388672, "loss": 0.0126, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -3.430163860321045, "rewards/margins": 13.962501525878906, "rewards/rejected": -17.39266586303711, "step": 3480 }, { "epoch": 1.59, "learning_rate": 2.606086221470837e-07, "logits/chosen": -2.0087404251098633, "logits/rejected": -1.8917499780654907, "logps/chosen": -138.81875610351562, "logps/rejected": -268.839111328125, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -4.0037407875061035, "rewards/margins": 15.63740348815918, "rewards/rejected": -19.641145706176758, "step": 3490 }, { "epoch": 1.6, "learning_rate": 2.5976331360946746e-07, "logits/chosen": -1.9867855310440063, "logits/rejected": -1.867781400680542, "logps/chosen": -122.08673095703125, "logps/rejected": -257.277587890625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -3.2326064109802246, "rewards/margins": 15.269567489624023, "rewards/rejected": -18.502174377441406, "step": 3500 }, { "epoch": 1.6, "eval_logits/chosen": -1.8975638151168823, "eval_logits/rejected": -1.7721211910247803, "eval_logps/chosen": -142.01966857910156, "eval_logps/rejected": -266.510009765625, "eval_loss": 0.021842440590262413, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -5.078246116638184, "eval_rewards/margins": 14.562296867370605, "eval_rewards/rejected": -19.64054298400879, "eval_runtime": 48.4963, "eval_samples_per_second": 59.015, "eval_steps_per_second": 1.856, "step": 3500 }, { "epoch": 1.6, "learning_rate": 2.5891800507185117e-07, "logits/chosen": -1.9047114849090576, "logits/rejected": -1.7761516571044922, "logps/chosen": -140.54165649414062, "logps/rejected": -272.0914611816406, "loss": 0.0052, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.641226768493652, "rewards/margins": 15.182680130004883, "rewards/rejected": -19.82390594482422, "step": 3510 }, { "epoch": 1.61, "learning_rate": 2.58072696534235e-07, "logits/chosen": -1.892962098121643, "logits/rejected": -1.7451508045196533, "logps/chosen": -150.09315490722656, "logps/rejected": -296.66632080078125, "loss": 0.0107, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.678755760192871, "rewards/margins": 16.952327728271484, "rewards/rejected": -22.63108253479004, "step": 3520 }, { "epoch": 1.61, "learning_rate": 2.5722738799661873e-07, "logits/chosen": -1.8864864110946655, "logits/rejected": -1.7408676147460938, "logps/chosen": -141.12594604492188, "logps/rejected": -289.6314697265625, "loss": 0.0091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.263436794281006, "rewards/margins": 16.858762741088867, "rewards/rejected": -22.122201919555664, "step": 3530 }, { "epoch": 1.62, "learning_rate": 2.5638207945900254e-07, "logits/chosen": -1.8301013708114624, "logits/rejected": -1.6557966470718384, "logps/chosen": -144.84866333007812, "logps/rejected": -302.3169860839844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.072390079498291, "rewards/margins": 17.961421966552734, "rewards/rejected": -23.033809661865234, "step": 3540 }, { "epoch": 1.62, "learning_rate": 2.555367709213863e-07, "logits/chosen": -1.874068021774292, "logits/rejected": -1.708491325378418, "logps/chosen": -133.3509063720703, "logps/rejected": -283.3302917480469, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.035571098327637, "rewards/margins": 17.044132232666016, "rewards/rejected": -21.079702377319336, "step": 3550 }, { "epoch": 1.62, "learning_rate": 2.5469146238377006e-07, "logits/chosen": -1.8753341436386108, "logits/rejected": -1.7035486698150635, "logps/chosen": -132.8619384765625, "logps/rejected": -260.8714294433594, "loss": 0.0084, "rewards/accuracies": 1.0, "rewards/chosen": -3.194260835647583, "rewards/margins": 15.806689262390137, "rewards/rejected": -19.00094985961914, "step": 3560 }, { "epoch": 1.63, "learning_rate": 2.538461538461538e-07, "logits/chosen": -1.8306610584259033, "logits/rejected": -1.6790813207626343, "logps/chosen": -129.2947540283203, "logps/rejected": -261.3028259277344, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.236842632293701, "rewards/margins": 14.93622875213623, "rewards/rejected": -19.173070907592773, "step": 3570 }, { "epoch": 1.63, "learning_rate": 2.530008453085376e-07, "logits/chosen": -1.8658215999603271, "logits/rejected": -1.695593237876892, "logps/chosen": -122.03929138183594, "logps/rejected": -287.39764404296875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/chosen": -2.4829065799713135, "rewards/margins": 19.046436309814453, "rewards/rejected": -21.529342651367188, "step": 3580 }, { "epoch": 1.64, "learning_rate": 2.521555367709214e-07, "logits/chosen": -1.784368872642517, "logits/rejected": -1.6300216913223267, "logps/chosen": -140.92726135253906, "logps/rejected": -296.0577087402344, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -4.633268356323242, "rewards/margins": 16.99161148071289, "rewards/rejected": -21.6248779296875, "step": 3590 }, { "epoch": 1.64, "learning_rate": 2.513102282333052e-07, "logits/chosen": -1.7988407611846924, "logits/rejected": -1.5915801525115967, "logps/chosen": -130.07949829101562, "logps/rejected": -284.1544494628906, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -3.352548122406006, "rewards/margins": 18.076284408569336, "rewards/rejected": -21.428834915161133, "step": 3600 }, { "epoch": 1.64, "eval_logits/chosen": -1.7237591743469238, "eval_logits/rejected": -1.5604692697525024, "eval_logps/chosen": -142.53948974609375, "eval_logps/rejected": -272.5372314453125, "eval_loss": 0.021309753879904747, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -5.13023042678833, "eval_rewards/margins": 15.113033294677734, "eval_rewards/rejected": -20.243263244628906, "eval_runtime": 48.3893, "eval_samples_per_second": 59.145, "eval_steps_per_second": 1.86, "step": 3600 }, { "epoch": 1.65, "learning_rate": 2.504649196956889e-07, "logits/chosen": -1.7095403671264648, "logits/rejected": -1.5458117723464966, "logps/chosen": -145.26849365234375, "logps/rejected": -284.03741455078125, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.109766960144043, "rewards/margins": 16.189342498779297, "rewards/rejected": -21.299108505249023, "step": 3610 }, { "epoch": 1.65, "learning_rate": 2.4961961115807265e-07, "logits/chosen": -1.779497504234314, "logits/rejected": -1.641000747680664, "logps/chosen": -131.517333984375, "logps/rejected": -281.7998352050781, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/chosen": -4.562431335449219, "rewards/margins": 16.709604263305664, "rewards/rejected": -21.272035598754883, "step": 3620 }, { "epoch": 1.66, "learning_rate": 2.4877430262045646e-07, "logits/chosen": -1.8060439825057983, "logits/rejected": -1.6531273126602173, "logps/chosen": -133.15786743164062, "logps/rejected": -289.7575988769531, "loss": 0.0101, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.9135138988494873, "rewards/margins": 17.889179229736328, "rewards/rejected": -21.802692413330078, "step": 3630 }, { "epoch": 1.66, "learning_rate": 2.479289940828402e-07, "logits/chosen": -1.8380343914031982, "logits/rejected": -1.681099534034729, "logps/chosen": -142.34280395507812, "logps/rejected": -281.3111267089844, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": -4.688026428222656, "rewards/margins": 16.42477035522461, "rewards/rejected": -21.112796783447266, "step": 3640 }, { "epoch": 1.67, "learning_rate": 2.47083685545224e-07, "logits/chosen": -1.7750627994537354, "logits/rejected": -1.6246888637542725, "logps/chosen": -138.1237335205078, "logps/rejected": -296.9304504394531, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -4.636274337768555, "rewards/margins": 17.649145126342773, "rewards/rejected": -22.285419464111328, "step": 3650 }, { "epoch": 1.67, "learning_rate": 2.462383770076078e-07, "logits/chosen": -1.7485411167144775, "logits/rejected": -1.5784364938735962, "logps/chosen": -146.9974365234375, "logps/rejected": -324.06549072265625, "loss": 0.0072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.838747978210449, "rewards/margins": 19.2270565032959, "rewards/rejected": -25.065805435180664, "step": 3660 }, { "epoch": 1.68, "learning_rate": 2.4539306846999154e-07, "logits/chosen": -1.648329496383667, "logits/rejected": -1.4489551782608032, "logps/chosen": -146.84127807617188, "logps/rejected": -308.2528381347656, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -5.316080570220947, "rewards/margins": 18.388378143310547, "rewards/rejected": -23.70446014404297, "step": 3670 }, { "epoch": 1.68, "learning_rate": 2.445477599323753e-07, "logits/chosen": -1.696155309677124, "logits/rejected": -1.4816488027572632, "logps/chosen": -144.35195922851562, "logps/rejected": -316.3251953125, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -4.834813594818115, "rewards/margins": 19.11313819885254, "rewards/rejected": -23.947948455810547, "step": 3680 }, { "epoch": 1.68, "learning_rate": 2.437024513947591e-07, "logits/chosen": -1.6801557540893555, "logits/rejected": -1.4912935495376587, "logps/chosen": -148.6354522705078, "logps/rejected": -324.12432861328125, "loss": 0.0073, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.01719856262207, "rewards/margins": 20.103734970092773, "rewards/rejected": -25.120933532714844, "step": 3690 }, { "epoch": 1.69, "learning_rate": 2.4285714285714287e-07, "logits/chosen": -1.7024446725845337, "logits/rejected": -1.4968335628509521, "logps/chosen": -130.99986267089844, "logps/rejected": -303.06231689453125, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.061335563659668, "rewards/margins": 18.91769027709961, "rewards/rejected": -22.979026794433594, "step": 3700 }, { "epoch": 1.69, "eval_logits/chosen": -1.6558696031570435, "eval_logits/rejected": -1.4704645872116089, "eval_logps/chosen": -147.81668090820312, "eval_logps/rejected": -295.7735290527344, "eval_loss": 0.022238925099372864, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -5.657946586608887, "eval_rewards/margins": 16.908945083618164, "eval_rewards/rejected": -22.566892623901367, "eval_runtime": 48.4889, "eval_samples_per_second": 59.024, "eval_steps_per_second": 1.856, "step": 3700 }, { "epoch": 1.69, "learning_rate": 2.420118343195266e-07, "logits/chosen": -1.7344181537628174, "logits/rejected": -1.5882803201675415, "logps/chosen": -140.1300506591797, "logps/rejected": -290.84539794921875, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -4.182340145111084, "rewards/margins": 17.883474349975586, "rewards/rejected": -22.065814971923828, "step": 3710 }, { "epoch": 1.7, "learning_rate": 2.411665257819104e-07, "logits/chosen": -1.741615653038025, "logits/rejected": -1.5667476654052734, "logps/chosen": -133.61981201171875, "logps/rejected": -274.44439697265625, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -4.086359977722168, "rewards/margins": 16.344524383544922, "rewards/rejected": -20.43088150024414, "step": 3720 }, { "epoch": 1.7, "learning_rate": 2.4032121724429414e-07, "logits/chosen": -1.8133243322372437, "logits/rejected": -1.6576799154281616, "logps/chosen": -137.39431762695312, "logps/rejected": -262.45318603515625, "loss": 0.0113, "rewards/accuracies": 1.0, "rewards/chosen": -4.237823486328125, "rewards/margins": 14.64301872253418, "rewards/rejected": -18.880840301513672, "step": 3730 }, { "epoch": 1.71, "learning_rate": 2.394759087066779e-07, "logits/chosen": -1.7763252258300781, "logits/rejected": -1.5898211002349854, "logps/chosen": -134.40628051757812, "logps/rejected": -280.9346008300781, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -3.767348527908325, "rewards/margins": 17.052318572998047, "rewards/rejected": -20.81966781616211, "step": 3740 }, { "epoch": 1.71, "learning_rate": 2.386306001690617e-07, "logits/chosen": -1.6497255563735962, "logits/rejected": -1.4708611965179443, "logps/chosen": -136.38125610351562, "logps/rejected": -289.7961730957031, "loss": 0.0054, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.592088222503662, "rewards/margins": 17.545528411865234, "rewards/rejected": -22.13762092590332, "step": 3750 }, { "epoch": 1.72, "learning_rate": 2.377852916314455e-07, "logits/chosen": -1.7078943252563477, "logits/rejected": -1.5805634260177612, "logps/chosen": -135.6635284423828, "logps/rejected": -278.7815856933594, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/chosen": -4.852513313293457, "rewards/margins": 15.829513549804688, "rewards/rejected": -20.68202781677246, "step": 3760 }, { "epoch": 1.72, "learning_rate": 2.3693998309382922e-07, "logits/chosen": -1.7141424417495728, "logits/rejected": -1.585723638534546, "logps/chosen": -130.74488830566406, "logps/rejected": -281.3462829589844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -3.7584216594696045, "rewards/margins": 16.653173446655273, "rewards/rejected": -20.41159439086914, "step": 3770 }, { "epoch": 1.73, "learning_rate": 2.36094674556213e-07, "logits/chosen": -1.7240254878997803, "logits/rejected": -1.5711407661437988, "logps/chosen": -131.56781005859375, "logps/rejected": -273.98455810546875, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/chosen": -3.972919464111328, "rewards/margins": 16.38176155090332, "rewards/rejected": -20.354679107666016, "step": 3780 }, { "epoch": 1.73, "learning_rate": 2.3524936601859676e-07, "logits/chosen": -1.7031100988388062, "logits/rejected": -1.5339725017547607, "logps/chosen": -139.65846252441406, "logps/rejected": -275.16375732421875, "loss": 0.0161, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -4.4210100173950195, "rewards/margins": 15.863576889038086, "rewards/rejected": -20.284587860107422, "step": 3790 }, { "epoch": 1.73, "learning_rate": 2.3440405748098055e-07, "logits/chosen": -1.8004308938980103, "logits/rejected": -1.6649887561798096, "logps/chosen": -116.216552734375, "logps/rejected": -240.1760711669922, "loss": 0.0124, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -2.5760066509246826, "rewards/margins": 14.410638809204102, "rewards/rejected": -16.98664665222168, "step": 3800 }, { "epoch": 1.73, "eval_logits/chosen": -1.7606146335601807, "eval_logits/rejected": -1.6381984949111938, "eval_logps/chosen": -126.71410369873047, "eval_logps/rejected": -231.4264373779297, "eval_loss": 0.022137422114610672, "eval_rewards/accuracies": 0.9861111044883728, "eval_rewards/chosen": -3.5476911067962646, "eval_rewards/margins": 12.584492683410645, "eval_rewards/rejected": -16.132184982299805, "eval_runtime": 49.0971, "eval_samples_per_second": 58.293, "eval_steps_per_second": 1.833, "step": 3800 }, { "epoch": 1.74, "learning_rate": 2.3355874894336433e-07, "logits/chosen": -1.7343565225601196, "logits/rejected": -1.6265987157821655, "logps/chosen": -127.9572982788086, "logps/rejected": -241.6963653564453, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.091909408569336, "rewards/margins": 13.281129837036133, "rewards/rejected": -17.373037338256836, "step": 3810 }, { "epoch": 1.74, "learning_rate": 2.327134404057481e-07, "logits/chosen": -1.6919950246810913, "logits/rejected": -1.5495738983154297, "logps/chosen": -132.85983276367188, "logps/rejected": -267.4934387207031, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.089395046234131, "rewards/margins": 15.883015632629395, "rewards/rejected": -19.972410202026367, "step": 3820 }, { "epoch": 1.75, "learning_rate": 2.3186813186813187e-07, "logits/chosen": -1.7256600856781006, "logits/rejected": -1.553453803062439, "logps/chosen": -134.67343139648438, "logps/rejected": -279.3134765625, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.113102912902832, "rewards/margins": 16.799463272094727, "rewards/rejected": -20.912565231323242, "step": 3830 }, { "epoch": 1.75, "learning_rate": 2.3102282333051563e-07, "logits/chosen": -1.6534430980682373, "logits/rejected": -1.5118329524993896, "logps/chosen": -132.36349487304688, "logps/rejected": -284.98406982421875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/chosen": -4.512646198272705, "rewards/margins": 17.21428871154785, "rewards/rejected": -21.7269344329834, "step": 3840 }, { "epoch": 1.76, "learning_rate": 2.301775147928994e-07, "logits/chosen": -1.6359570026397705, "logits/rejected": -1.4759365320205688, "logps/chosen": -138.23097229003906, "logps/rejected": -283.275146484375, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -4.575328350067139, "rewards/margins": 16.573030471801758, "rewards/rejected": -21.148357391357422, "step": 3850 }, { "epoch": 1.76, "learning_rate": 2.2933220625528317e-07, "logits/chosen": -1.7314882278442383, "logits/rejected": -1.6037628650665283, "logps/chosen": -136.9398651123047, "logps/rejected": -272.9326171875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/chosen": -4.366036415100098, "rewards/margins": 15.990483283996582, "rewards/rejected": -20.35651969909668, "step": 3860 }, { "epoch": 1.77, "learning_rate": 2.2848689771766693e-07, "logits/chosen": -1.7622716426849365, "logits/rejected": -1.6511011123657227, "logps/chosen": -128.43710327148438, "logps/rejected": -265.1844787597656, "loss": 0.0087, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.14860200881958, "rewards/margins": 15.2080659866333, "rewards/rejected": -19.356666564941406, "step": 3870 }, { "epoch": 1.77, "learning_rate": 2.276415891800507e-07, "logits/chosen": -1.8153717517852783, "logits/rejected": -1.6824384927749634, "logps/chosen": -138.5544891357422, "logps/rejected": -276.2153015136719, "loss": 0.0031, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.317188739776611, "rewards/margins": 15.966781616210938, "rewards/rejected": -20.28396987915039, "step": 3880 }, { "epoch": 1.78, "learning_rate": 2.2679628064243447e-07, "logits/chosen": -1.8020213842391968, "logits/rejected": -1.6542917490005493, "logps/chosen": -123.3238754272461, "logps/rejected": -290.70306396484375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -3.0738091468811035, "rewards/margins": 18.742576599121094, "rewards/rejected": -21.81638526916504, "step": 3890 }, { "epoch": 1.78, "learning_rate": 2.2595097210481825e-07, "logits/chosen": -1.7502750158309937, "logits/rejected": -1.5834579467773438, "logps/chosen": -141.47828674316406, "logps/rejected": -303.58599853515625, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.480618476867676, "rewards/margins": 18.75547981262207, "rewards/rejected": -23.236099243164062, "step": 3900 }, { "epoch": 1.78, "eval_logits/chosen": -1.713238000869751, "eval_logits/rejected": -1.565711259841919, "eval_logps/chosen": -141.85250854492188, "eval_logps/rejected": -270.1767272949219, "eval_loss": 0.02007424458861351, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.06152868270874, "eval_rewards/margins": 14.945685386657715, "eval_rewards/rejected": -20.007213592529297, "eval_runtime": 48.854, "eval_samples_per_second": 58.583, "eval_steps_per_second": 1.842, "step": 3900 }, { "epoch": 1.78, "learning_rate": 2.25105663567202e-07, "logits/chosen": -1.744927167892456, "logits/rejected": -1.593875527381897, "logps/chosen": -132.0017547607422, "logps/rejected": -284.510498046875, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/chosen": -4.235764026641846, "rewards/margins": 17.11020278930664, "rewards/rejected": -21.345964431762695, "step": 3910 }, { "epoch": 1.79, "learning_rate": 2.242603550295858e-07, "logits/chosen": -1.7243611812591553, "logits/rejected": -1.575244426727295, "logps/chosen": -135.41140747070312, "logps/rejected": -294.58331298828125, "loss": 0.0116, "rewards/accuracies": 1.0, "rewards/chosen": -4.456456184387207, "rewards/margins": 17.783428192138672, "rewards/rejected": -22.23988151550293, "step": 3920 }, { "epoch": 1.79, "learning_rate": 2.2341504649196957e-07, "logits/chosen": -1.7868388891220093, "logits/rejected": -1.6776450872421265, "logps/chosen": -143.6422576904297, "logps/rejected": -281.7991638183594, "loss": 0.0085, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.448180198669434, "rewards/margins": 16.026264190673828, "rewards/rejected": -20.474443435668945, "step": 3930 }, { "epoch": 1.8, "learning_rate": 2.2256973795435333e-07, "logits/chosen": -1.7921279668807983, "logits/rejected": -1.6391985416412354, "logps/chosen": -136.37741088867188, "logps/rejected": -279.2124328613281, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -4.4318318367004395, "rewards/margins": 16.44356918334961, "rewards/rejected": -20.87540054321289, "step": 3940 }, { "epoch": 1.8, "learning_rate": 2.2172442941673711e-07, "logits/chosen": -1.7016105651855469, "logits/rejected": -1.5557249784469604, "logps/chosen": -137.5961151123047, "logps/rejected": -281.9116516113281, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -4.446589469909668, "rewards/margins": 16.51205825805664, "rewards/rejected": -20.958646774291992, "step": 3950 }, { "epoch": 1.81, "learning_rate": 2.2087912087912087e-07, "logits/chosen": -1.70087468624115, "logits/rejected": -1.5272462368011475, "logps/chosen": -134.16659545898438, "logps/rejected": -300.7137145996094, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -4.51452112197876, "rewards/margins": 18.287456512451172, "rewards/rejected": -22.80197525024414, "step": 3960 }, { "epoch": 1.81, "learning_rate": 2.2003381234150466e-07, "logits/chosen": -1.7029697895050049, "logits/rejected": -1.5384533405303955, "logps/chosen": -135.23904418945312, "logps/rejected": -311.7080993652344, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -4.424857139587402, "rewards/margins": 19.508625030517578, "rewards/rejected": -23.933483123779297, "step": 3970 }, { "epoch": 1.82, "learning_rate": 2.1918850380388839e-07, "logits/chosen": -1.7048301696777344, "logits/rejected": -1.5668028593063354, "logps/chosen": -155.3131103515625, "logps/rejected": -305.61492919921875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -6.269586086273193, "rewards/margins": 17.230533599853516, "rewards/rejected": -23.5001163482666, "step": 3980 }, { "epoch": 1.82, "learning_rate": 2.1834319526627217e-07, "logits/chosen": -1.716619849205017, "logits/rejected": -1.5665405988693237, "logps/chosen": -146.93238830566406, "logps/rejected": -297.7852783203125, "loss": 0.0243, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.378264427185059, "rewards/margins": 17.037546157836914, "rewards/rejected": -22.41581153869629, "step": 3990 }, { "epoch": 1.83, "learning_rate": 2.1749788672865595e-07, "logits/chosen": -1.7857425212860107, "logits/rejected": -1.641950011253357, "logps/chosen": -142.95773315429688, "logps/rejected": -291.2868347167969, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.872292518615723, "rewards/margins": 17.185726165771484, "rewards/rejected": -22.058019638061523, "step": 4000 }, { "epoch": 1.83, "eval_logits/chosen": -1.762734293937683, "eval_logits/rejected": -1.618021011352539, "eval_logps/chosen": -146.5255126953125, "eval_logps/rejected": -275.50830078125, "eval_loss": 0.019976630806922913, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.528830528259277, "eval_rewards/margins": 15.011541366577148, "eval_rewards/rejected": -20.540372848510742, "eval_runtime": 49.1871, "eval_samples_per_second": 58.186, "eval_steps_per_second": 1.83, "step": 4000 }, { "epoch": 1.83, "learning_rate": 2.166525781910397e-07, "logits/chosen": -1.8257217407226562, "logits/rejected": -1.7216644287109375, "logps/chosen": -149.7119598388672, "logps/rejected": -270.9111633300781, "loss": 0.0134, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.1475443840026855, "rewards/margins": 14.822622299194336, "rewards/rejected": -19.970165252685547, "step": 4010 }, { "epoch": 1.83, "learning_rate": 2.158072696534235e-07, "logits/chosen": -1.81096613407135, "logits/rejected": -1.7163082361221313, "logps/chosen": -126.8602294921875, "logps/rejected": -255.57363891601562, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/chosen": -4.170827865600586, "rewards/margins": 14.215136528015137, "rewards/rejected": -18.385963439941406, "step": 4020 }, { "epoch": 1.84, "learning_rate": 2.1496196111580725e-07, "logits/chosen": -1.9823904037475586, "logits/rejected": -1.87985360622406, "logps/chosen": -136.60366821289062, "logps/rejected": -257.38446044921875, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.11444091796875, "rewards/margins": 14.040242195129395, "rewards/rejected": -18.154682159423828, "step": 4030 }, { "epoch": 1.84, "learning_rate": 2.1411665257819104e-07, "logits/chosen": -1.9636681079864502, "logits/rejected": -1.8081839084625244, "logps/chosen": -132.60195922851562, "logps/rejected": -262.4747619628906, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -3.0300045013427734, "rewards/margins": 15.940587043762207, "rewards/rejected": -18.970592498779297, "step": 4040 }, { "epoch": 1.85, "learning_rate": 2.1327134404057482e-07, "logits/chosen": -1.9444208145141602, "logits/rejected": -1.8486173152923584, "logps/chosen": -131.70828247070312, "logps/rejected": -259.8726501464844, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -3.042513370513916, "rewards/margins": 16.066484451293945, "rewards/rejected": -19.108997344970703, "step": 4050 }, { "epoch": 1.85, "learning_rate": 2.1242603550295858e-07, "logits/chosen": -1.8983211517333984, "logits/rejected": -1.7571252584457397, "logps/chosen": -134.80789184570312, "logps/rejected": -263.4311218261719, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -3.634955644607544, "rewards/margins": 15.251360893249512, "rewards/rejected": -18.886316299438477, "step": 4060 }, { "epoch": 1.86, "learning_rate": 2.1158072696534236e-07, "logits/chosen": -1.8505229949951172, "logits/rejected": -1.7038662433624268, "logps/chosen": -133.12786865234375, "logps/rejected": -275.3601379394531, "loss": 0.0042, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.181855201721191, "rewards/margins": 15.866403579711914, "rewards/rejected": -20.04825782775879, "step": 4070 }, { "epoch": 1.86, "learning_rate": 2.107354184277261e-07, "logits/chosen": -1.8010523319244385, "logits/rejected": -1.6853234767913818, "logps/chosen": -131.32154846191406, "logps/rejected": -284.50250244140625, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.455392837524414, "rewards/margins": 16.990983963012695, "rewards/rejected": -21.44637680053711, "step": 4080 }, { "epoch": 1.87, "learning_rate": 2.0989010989010987e-07, "logits/chosen": -1.8266456127166748, "logits/rejected": -1.6853101253509521, "logps/chosen": -136.2552947998047, "logps/rejected": -295.13653564453125, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -4.681432723999023, "rewards/margins": 17.44306182861328, "rewards/rejected": -22.12449073791504, "step": 4090 }, { "epoch": 1.87, "learning_rate": 2.0904480135249363e-07, "logits/chosen": -1.8539117574691772, "logits/rejected": -1.6934545040130615, "logps/chosen": -141.95208740234375, "logps/rejected": -311.4937438964844, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -4.835737228393555, "rewards/margins": 18.804773330688477, "rewards/rejected": -23.64051055908203, "step": 4100 }, { "epoch": 1.87, "eval_logits/chosen": -1.799795150756836, "eval_logits/rejected": -1.637331247329712, "eval_logps/chosen": -147.08396911621094, "eval_logps/rejected": -292.2131652832031, "eval_loss": 0.020981300622224808, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.584676265716553, "eval_rewards/margins": 16.62618064880371, "eval_rewards/rejected": -22.21085548400879, "eval_runtime": 48.7877, "eval_samples_per_second": 58.662, "eval_steps_per_second": 1.845, "step": 4100 }, { "epoch": 1.88, "learning_rate": 2.0819949281487741e-07, "logits/chosen": -1.8189401626586914, "logits/rejected": -1.630281686782837, "logps/chosen": -142.0605010986328, "logps/rejected": -307.12725830078125, "loss": 0.0057, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.467617034912109, "rewards/margins": 19.189804077148438, "rewards/rejected": -23.657419204711914, "step": 4110 }, { "epoch": 1.88, "learning_rate": 2.073541842772612e-07, "logits/chosen": -1.814287781715393, "logits/rejected": -1.632960557937622, "logps/chosen": -142.54632568359375, "logps/rejected": -323.35992431640625, "loss": 0.005, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.713547706604004, "rewards/margins": 20.30032730102539, "rewards/rejected": -25.013872146606445, "step": 4120 }, { "epoch": 1.88, "learning_rate": 2.0650887573964496e-07, "logits/chosen": -1.8119704723358154, "logits/rejected": -1.6724140644073486, "logps/chosen": -145.4338836669922, "logps/rejected": -319.1186218261719, "loss": 0.009, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.1287946701049805, "rewards/margins": 18.313566207885742, "rewards/rejected": -24.44236183166504, "step": 4130 }, { "epoch": 1.89, "learning_rate": 2.0566356720202874e-07, "logits/chosen": -1.8369214534759521, "logits/rejected": -1.675672173500061, "logps/chosen": -154.06927490234375, "logps/rejected": -305.5340576171875, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.684229850769043, "rewards/margins": 17.68790626525879, "rewards/rejected": -23.372135162353516, "step": 4140 }, { "epoch": 1.89, "learning_rate": 2.048182586644125e-07, "logits/chosen": -1.8302981853485107, "logits/rejected": -1.6432266235351562, "logps/chosen": -141.11495971679688, "logps/rejected": -297.094482421875, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.820776462554932, "rewards/margins": 18.21160316467285, "rewards/rejected": -23.032377243041992, "step": 4150 }, { "epoch": 1.9, "learning_rate": 2.0397295012679628e-07, "logits/chosen": -1.8787815570831299, "logits/rejected": -1.7506415843963623, "logps/chosen": -139.04129028320312, "logps/rejected": -285.0088806152344, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -4.390732765197754, "rewards/margins": 17.23700523376465, "rewards/rejected": -21.627737045288086, "step": 4160 }, { "epoch": 1.9, "learning_rate": 2.0312764158918006e-07, "logits/chosen": -1.8781499862670898, "logits/rejected": -1.731313705444336, "logps/chosen": -134.88818359375, "logps/rejected": -275.73553466796875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/chosen": -3.5239157676696777, "rewards/margins": 16.483020782470703, "rewards/rejected": -20.006938934326172, "step": 4170 }, { "epoch": 1.91, "learning_rate": 2.0228233305156382e-07, "logits/chosen": -1.7998660802841187, "logits/rejected": -1.6408767700195312, "logps/chosen": -135.11972045898438, "logps/rejected": -277.9287414550781, "loss": 0.0073, "rewards/accuracies": 1.0, "rewards/chosen": -4.2441205978393555, "rewards/margins": 16.757963180541992, "rewards/rejected": -21.002086639404297, "step": 4180 }, { "epoch": 1.91, "learning_rate": 2.0143702451394758e-07, "logits/chosen": -1.7851566076278687, "logits/rejected": -1.6121336221694946, "logps/chosen": -137.66769409179688, "logps/rejected": -306.99420166015625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.519832134246826, "rewards/margins": 18.73788070678711, "rewards/rejected": -23.257715225219727, "step": 4190 }, { "epoch": 1.92, "learning_rate": 2.0059171597633133e-07, "logits/chosen": -1.7903058528900146, "logits/rejected": -1.6327577829360962, "logps/chosen": -142.868896484375, "logps/rejected": -303.55194091796875, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -5.210319519042969, "rewards/margins": 18.065744400024414, "rewards/rejected": -23.27606201171875, "step": 4200 }, { "epoch": 1.92, "eval_logits/chosen": -1.7379149198532104, "eval_logits/rejected": -1.5687103271484375, "eval_logps/chosen": -143.3500213623047, "eval_logps/rejected": -284.3860778808594, "eval_loss": 0.02061247080564499, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -5.211281776428223, "eval_rewards/margins": 16.216867446899414, "eval_rewards/rejected": -21.428150177001953, "eval_runtime": 49.1094, "eval_samples_per_second": 58.278, "eval_steps_per_second": 1.833, "step": 4200 }, { "epoch": 1.92, "learning_rate": 1.9974640743871512e-07, "logits/chosen": -1.810712456703186, "logits/rejected": -1.6843817234039307, "logps/chosen": -139.18353271484375, "logps/rejected": -281.2368469238281, "loss": 0.0172, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -5.270883560180664, "rewards/margins": 15.727676391601562, "rewards/rejected": -20.998559951782227, "step": 4210 }, { "epoch": 1.93, "learning_rate": 1.9890109890109888e-07, "logits/chosen": -1.8806030750274658, "logits/rejected": -1.7819697856903076, "logps/chosen": -130.38177490234375, "logps/rejected": -267.53607177734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.8572211265563965, "rewards/margins": 15.687589645385742, "rewards/rejected": -19.544809341430664, "step": 4220 }, { "epoch": 1.93, "learning_rate": 1.9805579036348266e-07, "logits/chosen": -1.7186689376831055, "logits/rejected": -1.5683701038360596, "logps/chosen": -141.64805603027344, "logps/rejected": -306.37347412109375, "loss": 0.0126, "rewards/accuracies": 1.0, "rewards/chosen": -5.315321922302246, "rewards/margins": 18.028160095214844, "rewards/rejected": -23.343482971191406, "step": 4230 }, { "epoch": 1.94, "learning_rate": 1.9721048182586644e-07, "logits/chosen": -1.793931007385254, "logits/rejected": -1.5868475437164307, "logps/chosen": -127.1364974975586, "logps/rejected": -309.6034240722656, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7159628868103027, "rewards/margins": 20.01531982421875, "rewards/rejected": -23.731281280517578, "step": 4240 }, { "epoch": 1.94, "learning_rate": 1.963651732882502e-07, "logits/chosen": -1.8754409551620483, "logits/rejected": -1.7193734645843506, "logps/chosen": -130.42030334472656, "logps/rejected": -281.035400390625, "loss": 0.0105, "rewards/accuracies": 1.0, "rewards/chosen": -3.504946231842041, "rewards/margins": 16.956892013549805, "rewards/rejected": -20.461841583251953, "step": 4250 }, { "epoch": 1.94, "learning_rate": 1.9551986475063398e-07, "logits/chosen": -1.868322730064392, "logits/rejected": -1.758927583694458, "logps/chosen": -137.1085968017578, "logps/rejected": -269.8552551269531, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -4.547211647033691, "rewards/margins": 14.988856315612793, "rewards/rejected": -19.536067962646484, "step": 4260 }, { "epoch": 1.95, "learning_rate": 1.9467455621301774e-07, "logits/chosen": -1.849692702293396, "logits/rejected": -1.6743018627166748, "logps/chosen": -134.50845336914062, "logps/rejected": -278.919677734375, "loss": 0.0112, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.260858058929443, "rewards/margins": 16.32442283630371, "rewards/rejected": -20.585281372070312, "step": 4270 }, { "epoch": 1.95, "learning_rate": 1.9382924767540152e-07, "logits/chosen": -1.8525829315185547, "logits/rejected": -1.713321328163147, "logps/chosen": -135.14071655273438, "logps/rejected": -287.2046813964844, "loss": 0.0037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.9108681678771973, "rewards/margins": 17.29202651977539, "rewards/rejected": -21.202892303466797, "step": 4280 }, { "epoch": 1.96, "learning_rate": 1.929839391377853e-07, "logits/chosen": -1.8243554830551147, "logits/rejected": -1.714613914489746, "logps/chosen": -139.86141967773438, "logps/rejected": -265.8287658691406, "loss": 0.0143, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.9251556396484375, "rewards/margins": 14.7702054977417, "rewards/rejected": -19.695362091064453, "step": 4290 }, { "epoch": 1.96, "learning_rate": 1.9213863060016904e-07, "logits/chosen": -1.7749605178833008, "logits/rejected": -1.6101394891738892, "logps/chosen": -127.2191162109375, "logps/rejected": -277.84893798828125, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.9290008544921875, "rewards/margins": 16.82949447631836, "rewards/rejected": -20.758495330810547, "step": 4300 }, { "epoch": 1.96, "eval_logits/chosen": -1.74306321144104, "eval_logits/rejected": -1.588865041732788, "eval_logps/chosen": -135.57318115234375, "eval_logps/rejected": -258.4649658203125, "eval_loss": 0.020533427596092224, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -4.433597087860107, "eval_rewards/margins": 14.402440071105957, "eval_rewards/rejected": -18.836036682128906, "eval_runtime": 48.6151, "eval_samples_per_second": 58.871, "eval_steps_per_second": 1.851, "step": 4300 }, { "epoch": 1.97, "learning_rate": 1.9129332206255282e-07, "logits/chosen": -1.7776132822036743, "logits/rejected": -1.624101996421814, "logps/chosen": -125.65428161621094, "logps/rejected": -264.5531005859375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -3.207595109939575, "rewards/margins": 16.06104850769043, "rewards/rejected": -19.26864242553711, "step": 4310 }, { "epoch": 1.97, "learning_rate": 1.9044801352493658e-07, "logits/chosen": -1.7687575817108154, "logits/rejected": -1.5868823528289795, "logps/chosen": -138.88534545898438, "logps/rejected": -266.2568359375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.900606632232666, "rewards/margins": 15.659704208374023, "rewards/rejected": -19.560312271118164, "step": 4320 }, { "epoch": 1.98, "learning_rate": 1.8960270498732036e-07, "logits/chosen": -1.6516854763031006, "logits/rejected": -1.4852774143218994, "logps/chosen": -138.44424438476562, "logps/rejected": -276.5455322265625, "loss": 0.0093, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.644902229309082, "rewards/margins": 15.883813858032227, "rewards/rejected": -20.528715133666992, "step": 4330 }, { "epoch": 1.98, "learning_rate": 1.8875739644970412e-07, "logits/chosen": -1.6602948904037476, "logits/rejected": -1.497868299484253, "logps/chosen": -138.5507354736328, "logps/rejected": -286.0603942871094, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": -4.921391487121582, "rewards/margins": 16.544239044189453, "rewards/rejected": -21.46563148498535, "step": 4340 }, { "epoch": 1.99, "learning_rate": 1.879120879120879e-07, "logits/chosen": -1.8231804370880127, "logits/rejected": -1.7026121616363525, "logps/chosen": -118.9980697631836, "logps/rejected": -241.5241241455078, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -2.846381187438965, "rewards/margins": 14.30328369140625, "rewards/rejected": -17.149662017822266, "step": 4350 }, { "epoch": 1.99, "learning_rate": 1.870667793744717e-07, "logits/chosen": -1.856925368309021, "logits/rejected": -1.760263442993164, "logps/chosen": -126.03316497802734, "logps/rejected": -230.318115234375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -3.0299785137176514, "rewards/margins": 12.88720417022705, "rewards/rejected": -15.917182922363281, "step": 4360 }, { "epoch": 1.99, "learning_rate": 1.8622147083685544e-07, "logits/chosen": -1.8362869024276733, "logits/rejected": -1.7145744562149048, "logps/chosen": -123.3587875366211, "logps/rejected": -238.42489624023438, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -3.1427197456359863, "rewards/margins": 13.606206893920898, "rewards/rejected": -16.74892807006836, "step": 4370 }, { "epoch": 2.0, "learning_rate": 1.8537616229923923e-07, "logits/chosen": -1.8129431009292603, "logits/rejected": -1.6960035562515259, "logps/chosen": -118.24918365478516, "logps/rejected": -240.0215606689453, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/chosen": -2.538170576095581, "rewards/margins": 14.578079223632812, "rewards/rejected": -17.116249084472656, "step": 4380 }, { "epoch": 2.0, "learning_rate": 1.8453085376162298e-07, "logits/chosen": -1.8822195529937744, "logits/rejected": -1.7461490631103516, "logps/chosen": -131.69345092773438, "logps/rejected": -253.98416137695312, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -3.098832845687866, "rewards/margins": 14.841659545898438, "rewards/rejected": -17.940494537353516, "step": 4390 }, { "epoch": 2.01, "learning_rate": 1.8368554522400674e-07, "logits/chosen": -1.83819580078125, "logits/rejected": -1.7222115993499756, "logps/chosen": -127.88763427734375, "logps/rejected": -243.81893920898438, "loss": 0.0038, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.604341983795166, "rewards/margins": 13.784266471862793, "rewards/rejected": -17.388608932495117, "step": 4400 }, { "epoch": 2.01, "eval_logits/chosen": -1.7572911977767944, "eval_logits/rejected": -1.6138116121292114, "eval_logps/chosen": -129.26815795898438, "eval_logps/rejected": -244.04730224609375, "eval_loss": 0.02128242887556553, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -3.803096294403076, "eval_rewards/margins": 13.591176986694336, "eval_rewards/rejected": -17.39427375793457, "eval_runtime": 48.6945, "eval_samples_per_second": 58.775, "eval_steps_per_second": 1.848, "step": 4400 }, { "epoch": 2.01, "learning_rate": 1.8284023668639053e-07, "logits/chosen": -1.8123576641082764, "logits/rejected": -1.6658599376678467, "logps/chosen": -133.7542266845703, "logps/rejected": -276.7223815917969, "loss": 0.0066, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.7751827239990234, "rewards/margins": 16.35031509399414, "rewards/rejected": -20.125499725341797, "step": 4410 }, { "epoch": 2.02, "learning_rate": 1.8199492814877428e-07, "logits/chosen": -1.8025636672973633, "logits/rejected": -1.6769447326660156, "logps/chosen": -130.52369689941406, "logps/rejected": -256.2034912109375, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -3.9366908073425293, "rewards/margins": 14.231219291687012, "rewards/rejected": -18.167909622192383, "step": 4420 }, { "epoch": 2.02, "learning_rate": 1.8114961961115807e-07, "logits/chosen": -1.7440084218978882, "logits/rejected": -1.6063076257705688, "logps/chosen": -114.4801025390625, "logps/rejected": -257.86651611328125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -2.5977623462677, "rewards/margins": 15.983970642089844, "rewards/rejected": -18.581729888916016, "step": 4430 }, { "epoch": 2.03, "learning_rate": 1.8030431107354182e-07, "logits/chosen": -1.795680284500122, "logits/rejected": -1.661131501197815, "logps/chosen": -127.7900619506836, "logps/rejected": -260.941162109375, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -3.44555401802063, "rewards/margins": 15.371556282043457, "rewards/rejected": -18.817108154296875, "step": 4440 }, { "epoch": 2.03, "learning_rate": 1.794590025359256e-07, "logits/chosen": -1.7703081369400024, "logits/rejected": -1.6057188510894775, "logps/chosen": -125.89506530761719, "logps/rejected": -273.585205078125, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -3.688451051712036, "rewards/margins": 16.332950592041016, "rewards/rejected": -20.02140235900879, "step": 4450 }, { "epoch": 2.04, "learning_rate": 1.7861369399830936e-07, "logits/chosen": -1.7694547176361084, "logits/rejected": -1.6058471202850342, "logps/chosen": -137.6060791015625, "logps/rejected": -278.2977600097656, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -4.031754970550537, "rewards/margins": 17.0496826171875, "rewards/rejected": -21.081439971923828, "step": 4460 }, { "epoch": 2.04, "learning_rate": 1.7776838546069315e-07, "logits/chosen": -1.7135499715805054, "logits/rejected": -1.5273189544677734, "logps/chosen": -123.12062072753906, "logps/rejected": -284.689208984375, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -3.752751588821411, "rewards/margins": 17.560134887695312, "rewards/rejected": -21.31288719177246, "step": 4470 }, { "epoch": 2.04, "learning_rate": 1.7692307692307693e-07, "logits/chosen": -1.724585771560669, "logits/rejected": -1.5345683097839355, "logps/chosen": -123.19392395019531, "logps/rejected": -282.0025939941406, "loss": 0.0006, "rewards/accuracies": 1.0, "rewards/chosen": -3.395287036895752, "rewards/margins": 17.836999893188477, "rewards/rejected": -21.23228645324707, "step": 4480 }, { "epoch": 2.05, "learning_rate": 1.760777683854607e-07, "logits/chosen": -1.7193084955215454, "logits/rejected": -1.5445727109909058, "logps/chosen": -131.0773468017578, "logps/rejected": -282.26812744140625, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -3.6276519298553467, "rewards/margins": 17.360347747802734, "rewards/rejected": -20.988000869750977, "step": 4490 }, { "epoch": 2.05, "learning_rate": 1.7523245984784447e-07, "logits/chosen": -1.7349720001220703, "logits/rejected": -1.5807268619537354, "logps/chosen": -132.5692596435547, "logps/rejected": -283.40283203125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -4.059118270874023, "rewards/margins": 17.672191619873047, "rewards/rejected": -21.73130989074707, "step": 4500 }, { "epoch": 2.05, "eval_logits/chosen": -1.6392802000045776, "eval_logits/rejected": -1.457102656364441, "eval_logps/chosen": -139.0337371826172, "eval_logps/rejected": -276.6232604980469, "eval_loss": 0.020971935242414474, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -4.779654502868652, "eval_rewards/margins": 15.872212409973145, "eval_rewards/rejected": -20.651866912841797, "eval_runtime": 49.0643, "eval_samples_per_second": 58.332, "eval_steps_per_second": 1.834, "step": 4500 }, { "epoch": 2.06, "learning_rate": 1.743871513102282e-07, "logits/chosen": -1.683262825012207, "logits/rejected": -1.528511643409729, "logps/chosen": -142.10989379882812, "logps/rejected": -294.67877197265625, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -4.435738563537598, "rewards/margins": 17.726200103759766, "rewards/rejected": -22.161941528320312, "step": 4510 }, { "epoch": 2.06, "learning_rate": 1.7354184277261199e-07, "logits/chosen": -1.707883596420288, "logits/rejected": -1.5181795358657837, "logps/chosen": -138.83139038085938, "logps/rejected": -298.7277526855469, "loss": 0.0061, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.555205345153809, "rewards/margins": 17.840625762939453, "rewards/rejected": -22.395830154418945, "step": 4520 }, { "epoch": 2.07, "learning_rate": 1.7269653423499577e-07, "logits/chosen": -1.706210732460022, "logits/rejected": -1.4901323318481445, "logps/chosen": -141.46340942382812, "logps/rejected": -306.2601623535156, "loss": 0.0063, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.152948379516602, "rewards/margins": 19.567630767822266, "rewards/rejected": -23.720579147338867, "step": 4530 }, { "epoch": 2.07, "learning_rate": 1.7185122569737953e-07, "logits/chosen": -1.6828571557998657, "logits/rejected": -1.473172903060913, "logps/chosen": -137.48602294921875, "logps/rejected": -301.6971740722656, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.706709861755371, "rewards/margins": 18.48426055908203, "rewards/rejected": -23.190969467163086, "step": 4540 }, { "epoch": 2.08, "learning_rate": 1.710059171597633e-07, "logits/chosen": -1.6979420185089111, "logits/rejected": -1.5147123336791992, "logps/chosen": -126.75444030761719, "logps/rejected": -299.8186340332031, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/chosen": -4.0670061111450195, "rewards/margins": 18.573415756225586, "rewards/rejected": -22.640424728393555, "step": 4550 }, { "epoch": 2.08, "learning_rate": 1.7016060862214707e-07, "logits/chosen": -1.7006571292877197, "logits/rejected": -1.5056321620941162, "logps/chosen": -123.4179916381836, "logps/rejected": -292.84478759765625, "loss": 0.0079, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -3.3246817588806152, "rewards/margins": 19.180789947509766, "rewards/rejected": -22.50547218322754, "step": 4560 }, { "epoch": 2.09, "learning_rate": 1.6931530008453085e-07, "logits/chosen": -1.684525489807129, "logits/rejected": -1.4895942211151123, "logps/chosen": -134.53253173828125, "logps/rejected": -290.8706359863281, "loss": 0.0056, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.204358100891113, "rewards/margins": 18.149574279785156, "rewards/rejected": -22.353931427001953, "step": 4570 }, { "epoch": 2.09, "learning_rate": 1.684699915469146e-07, "logits/chosen": -1.6476118564605713, "logits/rejected": -1.4635541439056396, "logps/chosen": -141.87469482421875, "logps/rejected": -301.1070251464844, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.55865478515625, "rewards/margins": 18.13406753540039, "rewards/rejected": -22.69272232055664, "step": 4580 }, { "epoch": 2.09, "learning_rate": 1.676246830092984e-07, "logits/chosen": -1.6086757183074951, "logits/rejected": -1.4354054927825928, "logps/chosen": -141.43751525878906, "logps/rejected": -300.6250305175781, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.488182544708252, "rewards/margins": 17.32658576965332, "rewards/rejected": -22.814769744873047, "step": 4590 }, { "epoch": 2.1, "learning_rate": 1.6677937447168218e-07, "logits/chosen": -1.6444076299667358, "logits/rejected": -1.440915584564209, "logps/chosen": -131.90924072265625, "logps/rejected": -313.8179016113281, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -3.731848955154419, "rewards/margins": 20.591495513916016, "rewards/rejected": -24.32334327697754, "step": 4600 }, { "epoch": 2.1, "eval_logits/chosen": -1.611390233039856, "eval_logits/rejected": -1.4136455059051514, "eval_logps/chosen": -144.26724243164062, "eval_logps/rejected": -291.9064025878906, "eval_loss": 0.022027108818292618, "eval_rewards/accuracies": 0.9833333492279053, "eval_rewards/chosen": -5.30300235748291, "eval_rewards/margins": 16.877180099487305, "eval_rewards/rejected": -22.18018341064453, "eval_runtime": 48.9227, "eval_samples_per_second": 58.5, "eval_steps_per_second": 1.84, "step": 4600 }, { "epoch": 2.1, "learning_rate": 1.6593406593406593e-07, "logits/chosen": -1.66204035282135, "logits/rejected": -1.4702497720718384, "logps/chosen": -149.71023559570312, "logps/rejected": -299.20025634765625, "loss": 0.0038, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.900949001312256, "rewards/margins": 16.8973388671875, "rewards/rejected": -22.79828643798828, "step": 4610 }, { "epoch": 2.11, "learning_rate": 1.650887573964497e-07, "logits/chosen": -1.6408029794692993, "logits/rejected": -1.4777195453643799, "logps/chosen": -144.4774627685547, "logps/rejected": -303.2586364746094, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.714861869812012, "rewards/margins": 17.49419593811035, "rewards/rejected": -23.20905876159668, "step": 4620 }, { "epoch": 2.11, "learning_rate": 1.6424344885883345e-07, "logits/chosen": -1.6856443881988525, "logits/rejected": -1.4800993204116821, "logps/chosen": -140.2975311279297, "logps/rejected": -325.49261474609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.508673667907715, "rewards/margins": 21.040714263916016, "rewards/rejected": -25.549386978149414, "step": 4630 }, { "epoch": 2.12, "learning_rate": 1.6339814032121723e-07, "logits/chosen": -1.6470167636871338, "logits/rejected": -1.4312283992767334, "logps/chosen": -144.17864990234375, "logps/rejected": -332.71923828125, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.611395835876465, "rewards/margins": 20.437816619873047, "rewards/rejected": -26.049213409423828, "step": 4640 }, { "epoch": 2.12, "learning_rate": 1.6255283178360101e-07, "logits/chosen": -1.6887588500976562, "logits/rejected": -1.4585940837860107, "logps/chosen": -145.76132202148438, "logps/rejected": -341.21124267578125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -5.22983455657959, "rewards/margins": 21.57925796508789, "rewards/rejected": -26.809091567993164, "step": 4650 }, { "epoch": 2.13, "learning_rate": 1.6170752324598477e-07, "logits/chosen": -1.6384716033935547, "logits/rejected": -1.452253818511963, "logps/chosen": -140.97543334960938, "logps/rejected": -318.2596130371094, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -4.9640793800354, "rewards/margins": 19.87236785888672, "rewards/rejected": -24.836448669433594, "step": 4660 }, { "epoch": 2.13, "learning_rate": 1.6086221470836856e-07, "logits/chosen": -1.6505107879638672, "logits/rejected": -1.484649658203125, "logps/chosen": -153.21498107910156, "logps/rejected": -319.28643798828125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.582076072692871, "rewards/margins": 19.079242706298828, "rewards/rejected": -24.661317825317383, "step": 4670 }, { "epoch": 2.14, "learning_rate": 1.600169061707523e-07, "logits/chosen": -1.6539497375488281, "logits/rejected": -1.4012236595153809, "logps/chosen": -136.5476837158203, "logps/rejected": -337.0199279785156, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.0985212326049805, "rewards/margins": 22.486196517944336, "rewards/rejected": -26.584720611572266, "step": 4680 }, { "epoch": 2.14, "learning_rate": 1.591715976331361e-07, "logits/chosen": -1.6659095287322998, "logits/rejected": -1.4670120477676392, "logps/chosen": -157.36988830566406, "logps/rejected": -344.1231384277344, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.303195476531982, "rewards/margins": 20.90297508239746, "rewards/rejected": -27.2061710357666, "step": 4690 }, { "epoch": 2.15, "learning_rate": 1.5832628909551985e-07, "logits/chosen": -1.6164276599884033, "logits/rejected": -1.414585828781128, "logps/chosen": -165.75399780273438, "logps/rejected": -359.6187438964844, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.301906585693359, "rewards/margins": 20.94976043701172, "rewards/rejected": -28.25166893005371, "step": 4700 }, { "epoch": 2.15, "eval_logits/chosen": -1.5344696044921875, "eval_logits/rejected": -1.310213327407837, "eval_logps/chosen": -160.243408203125, "eval_logps/rejected": -335.3616638183594, "eval_loss": 0.023973895236849785, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.900619029998779, "eval_rewards/margins": 19.625089645385742, "eval_rewards/rejected": -26.525711059570312, "eval_runtime": 48.6837, "eval_samples_per_second": 58.788, "eval_steps_per_second": 1.849, "step": 4700 }, { "epoch": 2.15, "learning_rate": 1.5748098055790364e-07, "logits/chosen": -1.5866243839263916, "logits/rejected": -1.3655788898468018, "logps/chosen": -151.45236206054688, "logps/rejected": -345.5313720703125, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -6.042023181915283, "rewards/margins": 21.149606704711914, "rewards/rejected": -27.19162940979004, "step": 4710 }, { "epoch": 2.15, "learning_rate": 1.5663567202028742e-07, "logits/chosen": -1.568950891494751, "logits/rejected": -1.3398112058639526, "logps/chosen": -144.99575805664062, "logps/rejected": -346.5789794921875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -5.015862464904785, "rewards/margins": 22.201229095458984, "rewards/rejected": -27.217090606689453, "step": 4720 }, { "epoch": 2.16, "learning_rate": 1.5579036348267115e-07, "logits/chosen": -1.6242564916610718, "logits/rejected": -1.4060070514678955, "logps/chosen": -163.28323364257812, "logps/rejected": -360.0334777832031, "loss": 0.0067, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -6.851768493652344, "rewards/margins": 21.580278396606445, "rewards/rejected": -28.43204689025879, "step": 4730 }, { "epoch": 2.16, "learning_rate": 1.5494505494505493e-07, "logits/chosen": -1.5484068393707275, "logits/rejected": -1.3234050273895264, "logps/chosen": -156.7251434326172, "logps/rejected": -364.34539794921875, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.725734710693359, "rewards/margins": 22.758655548095703, "rewards/rejected": -29.484386444091797, "step": 4740 }, { "epoch": 2.17, "learning_rate": 1.540997464074387e-07, "logits/chosen": -1.5197080373764038, "logits/rejected": -1.2975013256072998, "logps/chosen": -163.72894287109375, "logps/rejected": -360.9460754394531, "loss": 0.0058, "rewards/accuracies": 1.0, "rewards/chosen": -6.5403289794921875, "rewards/margins": 22.118616104125977, "rewards/rejected": -28.658945083618164, "step": 4750 }, { "epoch": 2.17, "learning_rate": 1.5325443786982248e-07, "logits/chosen": -1.5585333108901978, "logits/rejected": -1.3211743831634521, "logps/chosen": -158.20669555664062, "logps/rejected": -374.6647033691406, "loss": 0.0015, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.051785945892334, "rewards/margins": 24.1824893951416, "rewards/rejected": -30.234272003173828, "step": 4760 }, { "epoch": 2.18, "learning_rate": 1.5240912933220626e-07, "logits/chosen": -1.5456629991531372, "logits/rejected": -1.304001808166504, "logps/chosen": -154.15481567382812, "logps/rejected": -383.1095886230469, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.04030179977417, "rewards/margins": 24.318885803222656, "rewards/rejected": -30.35919189453125, "step": 4770 }, { "epoch": 2.18, "learning_rate": 1.5156382079459002e-07, "logits/chosen": -1.554931402206421, "logits/rejected": -1.3219283819198608, "logps/chosen": -156.97434997558594, "logps/rejected": -355.1632995605469, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -5.934172630310059, "rewards/margins": 22.271665573120117, "rewards/rejected": -28.205841064453125, "step": 4780 }, { "epoch": 2.19, "learning_rate": 1.507185122569738e-07, "logits/chosen": -1.5992720127105713, "logits/rejected": -1.357313871383667, "logps/chosen": -159.3846893310547, "logps/rejected": -356.4526062011719, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.093033790588379, "rewards/margins": 22.349348068237305, "rewards/rejected": -28.4423828125, "step": 4790 }, { "epoch": 2.19, "learning_rate": 1.4987320371935756e-07, "logits/chosen": -1.603276014328003, "logits/rejected": -1.3721181154251099, "logps/chosen": -150.90286254882812, "logps/rejected": -358.8915100097656, "loss": 0.0075, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.906771183013916, "rewards/margins": 22.509685516357422, "rewards/rejected": -28.416454315185547, "step": 4800 }, { "epoch": 2.19, "eval_logits/chosen": -1.4933305978775024, "eval_logits/rejected": -1.2593837976455688, "eval_logps/chosen": -157.8497314453125, "eval_logps/rejected": -338.1903076171875, "eval_loss": 0.025256937369704247, "eval_rewards/accuracies": 0.9777777791023254, "eval_rewards/chosen": -6.661252975463867, "eval_rewards/margins": 20.14731788635254, "eval_rewards/rejected": -26.80857276916504, "eval_runtime": 48.1166, "eval_samples_per_second": 59.481, "eval_steps_per_second": 1.87, "step": 4800 }, { "epoch": 2.2, "learning_rate": 1.4902789518174134e-07, "logits/chosen": -1.5837562084197998, "logits/rejected": -1.3809980154037476, "logps/chosen": -147.4069366455078, "logps/rejected": -335.81890869140625, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -5.58919620513916, "rewards/margins": 20.910274505615234, "rewards/rejected": -26.49947166442871, "step": 4810 }, { "epoch": 2.2, "learning_rate": 1.4818258664412512e-07, "logits/chosen": -1.6084327697753906, "logits/rejected": -1.3756760358810425, "logps/chosen": -147.60806274414062, "logps/rejected": -345.25799560546875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.7579145431518555, "rewards/margins": 21.470531463623047, "rewards/rejected": -27.22844886779785, "step": 4820 }, { "epoch": 2.2, "learning_rate": 1.4733727810650885e-07, "logits/chosen": -1.6319348812103271, "logits/rejected": -1.444595456123352, "logps/chosen": -136.5446014404297, "logps/rejected": -318.79443359375, "loss": 0.0028, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.821673393249512, "rewards/margins": 20.090484619140625, "rewards/rejected": -24.91215705871582, "step": 4830 }, { "epoch": 2.21, "learning_rate": 1.4649196956889264e-07, "logits/chosen": -1.6026424169540405, "logits/rejected": -1.3949694633483887, "logps/chosen": -138.6693572998047, "logps/rejected": -329.12579345703125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.085145473480225, "rewards/margins": 20.640195846557617, "rewards/rejected": -25.725341796875, "step": 4840 }, { "epoch": 2.21, "learning_rate": 1.456466610312764e-07, "logits/chosen": -1.6504093408584595, "logits/rejected": -1.434548020362854, "logps/chosen": -141.36990356445312, "logps/rejected": -325.2200622558594, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.669219017028809, "rewards/margins": 20.838899612426758, "rewards/rejected": -25.50812339782715, "step": 4850 }, { "epoch": 2.22, "learning_rate": 1.4480135249366018e-07, "logits/chosen": -1.621881127357483, "logits/rejected": -1.3952808380126953, "logps/chosen": -139.6595458984375, "logps/rejected": -318.80841064453125, "loss": 0.0014, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.290844917297363, "rewards/margins": 20.612688064575195, "rewards/rejected": -24.903533935546875, "step": 4860 }, { "epoch": 2.22, "learning_rate": 1.4395604395604394e-07, "logits/chosen": -1.5858741998672485, "logits/rejected": -1.3580384254455566, "logps/chosen": -136.5399627685547, "logps/rejected": -336.61773681640625, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.7228240966796875, "rewards/margins": 21.94614601135254, "rewards/rejected": -26.668970108032227, "step": 4870 }, { "epoch": 2.23, "learning_rate": 1.4311073541842772e-07, "logits/chosen": -1.5768780708312988, "logits/rejected": -1.3338980674743652, "logps/chosen": -148.2299346923828, "logps/rejected": -345.2416076660156, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.761619567871094, "rewards/margins": 21.782390594482422, "rewards/rejected": -27.54401206970215, "step": 4880 }, { "epoch": 2.23, "learning_rate": 1.422654268808115e-07, "logits/chosen": -1.598933458328247, "logits/rejected": -1.406333088874817, "logps/chosen": -157.4684295654297, "logps/rejected": -347.3194274902344, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.242190361022949, "rewards/margins": 21.24979591369629, "rewards/rejected": -27.49198341369629, "step": 4890 }, { "epoch": 2.24, "learning_rate": 1.4142011834319526e-07, "logits/chosen": -1.6200568675994873, "logits/rejected": -1.3976492881774902, "logps/chosen": -132.42913818359375, "logps/rejected": -337.7710876464844, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -3.944077968597412, "rewards/margins": 22.762954711914062, "rewards/rejected": -26.70703125, "step": 4900 }, { "epoch": 2.24, "eval_logits/chosen": -1.531797170639038, "eval_logits/rejected": -1.3038618564605713, "eval_logps/chosen": -150.62075805664062, "eval_logps/rejected": -323.0674133300781, "eval_loss": 0.023812316358089447, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.938355922698975, "eval_rewards/margins": 19.35792350769043, "eval_rewards/rejected": -25.296281814575195, "eval_runtime": 48.8333, "eval_samples_per_second": 58.608, "eval_steps_per_second": 1.843, "step": 4900 }, { "epoch": 2.24, "learning_rate": 1.4057480980557904e-07, "logits/chosen": -1.5462377071380615, "logits/rejected": -1.3417539596557617, "logps/chosen": -150.1999969482422, "logps/rejected": -328.15264892578125, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -5.9226484298706055, "rewards/margins": 19.603456497192383, "rewards/rejected": -25.526103973388672, "step": 4910 }, { "epoch": 2.25, "learning_rate": 1.397295012679628e-07, "logits/chosen": -1.648712158203125, "logits/rejected": -1.4413989782333374, "logps/chosen": -148.29783630371094, "logps/rejected": -334.0263977050781, "loss": 0.0073, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -4.927051544189453, "rewards/margins": 21.278085708618164, "rewards/rejected": -26.205135345458984, "step": 4920 }, { "epoch": 2.25, "learning_rate": 1.3888419273034658e-07, "logits/chosen": -1.710524320602417, "logits/rejected": -1.5135892629623413, "logps/chosen": -137.21377563476562, "logps/rejected": -329.8485412597656, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.232450008392334, "rewards/margins": 21.307376861572266, "rewards/rejected": -25.539825439453125, "step": 4930 }, { "epoch": 2.25, "learning_rate": 1.3803888419273034e-07, "logits/chosen": -1.6205447912216187, "logits/rejected": -1.3998703956604004, "logps/chosen": -141.4749755859375, "logps/rejected": -336.75555419921875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -4.847267150878906, "rewards/margins": 21.587642669677734, "rewards/rejected": -26.434911727905273, "step": 4940 }, { "epoch": 2.26, "learning_rate": 1.371935756551141e-07, "logits/chosen": -1.5820177793502808, "logits/rejected": -1.3905309438705444, "logps/chosen": -152.05067443847656, "logps/rejected": -321.4071960449219, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -6.205025672912598, "rewards/margins": 18.604568481445312, "rewards/rejected": -24.80959701538086, "step": 4950 }, { "epoch": 2.26, "learning_rate": 1.3634826711749788e-07, "logits/chosen": -1.5784461498260498, "logits/rejected": -1.381110429763794, "logps/chosen": -140.77227783203125, "logps/rejected": -335.337158203125, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -4.567250728607178, "rewards/margins": 21.762025833129883, "rewards/rejected": -26.329275131225586, "step": 4960 }, { "epoch": 2.27, "learning_rate": 1.3550295857988164e-07, "logits/chosen": -1.5645751953125, "logits/rejected": -1.3427592515945435, "logps/chosen": -148.5469207763672, "logps/rejected": -318.39874267578125, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -5.7441606521606445, "rewards/margins": 18.878780364990234, "rewards/rejected": -24.62293815612793, "step": 4970 }, { "epoch": 2.27, "learning_rate": 1.3465765004226542e-07, "logits/chosen": -1.573047161102295, "logits/rejected": -1.3629062175750732, "logps/chosen": -141.57052612304688, "logps/rejected": -312.4440002441406, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -4.663500785827637, "rewards/margins": 19.396190643310547, "rewards/rejected": -24.059690475463867, "step": 4980 }, { "epoch": 2.28, "learning_rate": 1.3381234150464918e-07, "logits/chosen": -1.5677963495254517, "logits/rejected": -1.3477671146392822, "logps/chosen": -147.5939483642578, "logps/rejected": -329.4576416015625, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/chosen": -5.594753742218018, "rewards/margins": 19.6628360748291, "rewards/rejected": -25.257587432861328, "step": 4990 }, { "epoch": 2.28, "learning_rate": 1.3296703296703296e-07, "logits/chosen": -1.5639418363571167, "logits/rejected": -1.344002604484558, "logps/chosen": -148.0439910888672, "logps/rejected": -328.4283752441406, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.538041591644287, "rewards/margins": 19.774883270263672, "rewards/rejected": -25.312923431396484, "step": 5000 }, { "epoch": 2.28, "eval_logits/chosen": -1.5276734828948975, "eval_logits/rejected": -1.3104116916656494, "eval_logps/chosen": -148.38333129882812, "eval_logps/rejected": -302.6257019042969, "eval_loss": 0.021709125488996506, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -5.71461296081543, "eval_rewards/margins": 17.53749656677246, "eval_rewards/rejected": -23.25210952758789, "eval_runtime": 48.0344, "eval_samples_per_second": 59.582, "eval_steps_per_second": 1.874, "step": 5000 }, { "epoch": 2.29, "learning_rate": 1.3212172442941675e-07, "logits/chosen": -1.5523064136505127, "logits/rejected": -1.3519313335418701, "logps/chosen": -147.11253356933594, "logps/rejected": -307.6585693359375, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.264976501464844, "rewards/margins": 18.00934600830078, "rewards/rejected": -23.27431869506836, "step": 5010 }, { "epoch": 2.29, "learning_rate": 1.312764158918005e-07, "logits/chosen": -1.6357170343399048, "logits/rejected": -1.4496716260910034, "logps/chosen": -143.47427368164062, "logps/rejected": -306.81475830078125, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.771214485168457, "rewards/margins": 19.028942108154297, "rewards/rejected": -23.800159454345703, "step": 5020 }, { "epoch": 2.3, "learning_rate": 1.304311073541843e-07, "logits/chosen": -1.6925318241119385, "logits/rejected": -1.476401925086975, "logps/chosen": -133.5230255126953, "logps/rejected": -321.890869140625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -4.030728340148926, "rewards/margins": 21.044105529785156, "rewards/rejected": -25.0748291015625, "step": 5030 }, { "epoch": 2.3, "learning_rate": 1.2958579881656802e-07, "logits/chosen": -1.63198983669281, "logits/rejected": -1.4392688274383545, "logps/chosen": -147.86672973632812, "logps/rejected": -322.14739990234375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.490933418273926, "rewards/margins": 19.4083194732666, "rewards/rejected": -24.899255752563477, "step": 5040 }, { "epoch": 2.3, "learning_rate": 1.287404902789518e-07, "logits/chosen": -1.5643590688705444, "logits/rejected": -1.3293625116348267, "logps/chosen": -137.88485717773438, "logps/rejected": -315.028564453125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.360368251800537, "rewards/margins": 19.42208480834961, "rewards/rejected": -24.782455444335938, "step": 5050 }, { "epoch": 2.31, "learning_rate": 1.2789518174133559e-07, "logits/chosen": -1.5989701747894287, "logits/rejected": -1.4192179441452026, "logps/chosen": -151.6392364501953, "logps/rejected": -300.0302734375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.988175392150879, "rewards/margins": 17.0905704498291, "rewards/rejected": -23.078744888305664, "step": 5060 }, { "epoch": 2.31, "learning_rate": 1.2704987320371934e-07, "logits/chosen": -1.6071436405181885, "logits/rejected": -1.3919312953948975, "logps/chosen": -153.93104553222656, "logps/rejected": -318.19659423828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.994771957397461, "rewards/margins": 18.597782135009766, "rewards/rejected": -24.592554092407227, "step": 5070 }, { "epoch": 2.32, "learning_rate": 1.2620456466610313e-07, "logits/chosen": -1.5697176456451416, "logits/rejected": -1.3543591499328613, "logps/chosen": -148.27279663085938, "logps/rejected": -330.57574462890625, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/chosen": -5.1298627853393555, "rewards/margins": 20.94051742553711, "rewards/rejected": -26.070384979248047, "step": 5080 }, { "epoch": 2.32, "learning_rate": 1.2535925612848688e-07, "logits/chosen": -1.525294542312622, "logits/rejected": -1.2792097330093384, "logps/chosen": -144.99293518066406, "logps/rejected": -362.4373474121094, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -5.554945945739746, "rewards/margins": 23.6368350982666, "rewards/rejected": -29.191781997680664, "step": 5090 }, { "epoch": 2.33, "learning_rate": 1.2451394759087067e-07, "logits/chosen": -1.530333399772644, "logits/rejected": -1.2804116010665894, "logps/chosen": -157.20462036132812, "logps/rejected": -360.80126953125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.6529669761657715, "rewards/margins": 22.147747039794922, "rewards/rejected": -28.800708770751953, "step": 5100 }, { "epoch": 2.33, "eval_logits/chosen": -1.4798411130905151, "eval_logits/rejected": -1.236836552619934, "eval_logps/chosen": -159.94859313964844, "eval_logps/rejected": -338.69610595703125, "eval_loss": 0.023448189720511436, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.871140003204346, "eval_rewards/margins": 19.988006591796875, "eval_rewards/rejected": -26.859148025512695, "eval_runtime": 48.5272, "eval_samples_per_second": 58.977, "eval_steps_per_second": 1.855, "step": 5100 }, { "epoch": 2.33, "learning_rate": 1.2366863905325443e-07, "logits/chosen": -1.570920705795288, "logits/rejected": -1.345963954925537, "logps/chosen": -157.03396606445312, "logps/rejected": -333.129150390625, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.871922969818115, "rewards/margins": 20.749706268310547, "rewards/rejected": -26.621631622314453, "step": 5110 }, { "epoch": 2.34, "learning_rate": 1.228233305156382e-07, "logits/chosen": -1.5429118871688843, "logits/rejected": -1.3001679182052612, "logps/chosen": -141.0686492919922, "logps/rejected": -349.8362731933594, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.485856056213379, "rewards/margins": 22.739933013916016, "rewards/rejected": -28.22579002380371, "step": 5120 }, { "epoch": 2.34, "learning_rate": 1.2197802197802197e-07, "logits/chosen": -1.5656944513320923, "logits/rejected": -1.3424060344696045, "logps/chosen": -164.17990112304688, "logps/rejected": -354.5301208496094, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.938088893890381, "rewards/margins": 21.791156768798828, "rewards/rejected": -28.7292423248291, "step": 5130 }, { "epoch": 2.35, "learning_rate": 1.2113271344040575e-07, "logits/chosen": -1.5167311429977417, "logits/rejected": -1.2887309789657593, "logps/chosen": -162.03640747070312, "logps/rejected": -361.96142578125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.25159215927124, "rewards/margins": 22.400646209716797, "rewards/rejected": -28.652240753173828, "step": 5140 }, { "epoch": 2.35, "learning_rate": 1.202874049027895e-07, "logits/chosen": -1.601619005203247, "logits/rejected": -1.3553307056427002, "logps/chosen": -149.4628448486328, "logps/rejected": -330.8864440917969, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/chosen": -5.38382625579834, "rewards/margins": 20.474346160888672, "rewards/rejected": -25.85817527770996, "step": 5150 }, { "epoch": 2.36, "learning_rate": 1.194420963651733e-07, "logits/chosen": -1.597741723060608, "logits/rejected": -1.4130442142486572, "logps/chosen": -158.59152221679688, "logps/rejected": -321.4584045410156, "loss": 0.0047, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -6.9201979637146, "rewards/margins": 18.02041244506836, "rewards/rejected": -24.940608978271484, "step": 5160 }, { "epoch": 2.36, "learning_rate": 1.1859678782755706e-07, "logits/chosen": -1.563659906387329, "logits/rejected": -1.3519216775894165, "logps/chosen": -145.5685272216797, "logps/rejected": -319.1047668457031, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.211328029632568, "rewards/margins": 19.444351196289062, "rewards/rejected": -24.655681610107422, "step": 5170 }, { "epoch": 2.36, "learning_rate": 1.1775147928994082e-07, "logits/chosen": -1.58176589012146, "logits/rejected": -1.3772716522216797, "logps/chosen": -152.3541259765625, "logps/rejected": -326.29095458984375, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.837064743041992, "rewards/margins": 19.613500595092773, "rewards/rejected": -25.450565338134766, "step": 5180 }, { "epoch": 2.37, "learning_rate": 1.1690617075232459e-07, "logits/chosen": -1.5791524648666382, "logits/rejected": -1.3527902364730835, "logps/chosen": -157.03770446777344, "logps/rejected": -346.9322204589844, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.26717472076416, "rewards/margins": 21.00448226928711, "rewards/rejected": -27.271657943725586, "step": 5190 }, { "epoch": 2.37, "learning_rate": 1.1606086221470836e-07, "logits/chosen": -1.599036455154419, "logits/rejected": -1.371401309967041, "logps/chosen": -143.78948974609375, "logps/rejected": -343.8502502441406, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -5.401170253753662, "rewards/margins": 22.104997634887695, "rewards/rejected": -27.506168365478516, "step": 5200 }, { "epoch": 2.37, "eval_logits/chosen": -1.5095221996307373, "eval_logits/rejected": -1.2791478633880615, "eval_logps/chosen": -155.79994201660156, "eval_logps/rejected": -324.2215270996094, "eval_loss": 0.023295849561691284, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.456273555755615, "eval_rewards/margins": 18.955419540405273, "eval_rewards/rejected": -25.411693572998047, "eval_runtime": 49.6179, "eval_samples_per_second": 57.681, "eval_steps_per_second": 1.814, "step": 5200 }, { "epoch": 2.38, "learning_rate": 1.1521555367709214e-07, "logits/chosen": -1.4285168647766113, "logits/rejected": -1.1557750701904297, "logps/chosen": -154.73887634277344, "logps/rejected": -342.5699157714844, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -6.326033592224121, "rewards/margins": 20.766834259033203, "rewards/rejected": -27.09286880493164, "step": 5210 }, { "epoch": 2.38, "learning_rate": 1.1437024513947591e-07, "logits/chosen": -1.3615461587905884, "logits/rejected": -1.1402201652526855, "logps/chosen": -144.0945587158203, "logps/rejected": -319.58355712890625, "loss": 0.0029, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.830936431884766, "rewards/margins": 18.93877601623535, "rewards/rejected": -24.769710540771484, "step": 5220 }, { "epoch": 2.39, "learning_rate": 1.1352493660185967e-07, "logits/chosen": -1.5037003755569458, "logits/rejected": -1.2450432777404785, "logps/chosen": -141.416015625, "logps/rejected": -322.6919860839844, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -4.933599948883057, "rewards/margins": 20.433279037475586, "rewards/rejected": -25.366878509521484, "step": 5230 }, { "epoch": 2.39, "learning_rate": 1.1267962806424344e-07, "logits/chosen": -1.6027101278305054, "logits/rejected": -1.3594125509262085, "logps/chosen": -143.9319610595703, "logps/rejected": -300.5264587402344, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -4.489424705505371, "rewards/margins": 18.510822296142578, "rewards/rejected": -23.000247955322266, "step": 5240 }, { "epoch": 2.4, "learning_rate": 1.1183431952662721e-07, "logits/chosen": -1.5492119789123535, "logits/rejected": -1.3233853578567505, "logps/chosen": -142.66136169433594, "logps/rejected": -319.67828369140625, "loss": 0.005, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.087625980377197, "rewards/margins": 19.76156234741211, "rewards/rejected": -24.849185943603516, "step": 5250 }, { "epoch": 2.4, "learning_rate": 1.1098901098901098e-07, "logits/chosen": -1.5399987697601318, "logits/rejected": -1.2904198169708252, "logps/chosen": -142.1669464111328, "logps/rejected": -317.7565612792969, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.012362003326416, "rewards/margins": 19.859041213989258, "rewards/rejected": -24.871402740478516, "step": 5260 }, { "epoch": 2.41, "learning_rate": 1.1014370245139476e-07, "logits/chosen": -1.5936027765274048, "logits/rejected": -1.3584023714065552, "logps/chosen": -147.6734619140625, "logps/rejected": -336.40167236328125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/chosen": -5.050702095031738, "rewards/margins": 20.767940521240234, "rewards/rejected": -25.81864356994629, "step": 5270 }, { "epoch": 2.41, "learning_rate": 1.0929839391377852e-07, "logits/chosen": -1.5691344738006592, "logits/rejected": -1.3617496490478516, "logps/chosen": -140.85934448242188, "logps/rejected": -320.1832580566406, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": -5.1365251541137695, "rewards/margins": 19.71951675415039, "rewards/rejected": -24.856042861938477, "step": 5280 }, { "epoch": 2.41, "learning_rate": 1.0845308537616229e-07, "logits/chosen": -1.5475467443466187, "logits/rejected": -1.3007447719573975, "logps/chosen": -139.46876525878906, "logps/rejected": -312.297607421875, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.21682071685791, "rewards/margins": 20.048919677734375, "rewards/rejected": -24.26573944091797, "step": 5290 }, { "epoch": 2.42, "learning_rate": 1.0760777683854606e-07, "logits/chosen": -1.5608899593353271, "logits/rejected": -1.3258769512176514, "logps/chosen": -141.82247924804688, "logps/rejected": -311.65533447265625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -4.637762546539307, "rewards/margins": 19.574703216552734, "rewards/rejected": -24.212467193603516, "step": 5300 }, { "epoch": 2.42, "eval_logits/chosen": -1.4908230304718018, "eval_logits/rejected": -1.2663732767105103, "eval_logps/chosen": -151.76470947265625, "eval_logps/rejected": -307.0641784667969, "eval_loss": 0.022586598992347717, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.0527520179748535, "eval_rewards/margins": 17.64320945739746, "eval_rewards/rejected": -23.69596290588379, "eval_runtime": 48.3485, "eval_samples_per_second": 59.195, "eval_steps_per_second": 1.861, "step": 5300 }, { "epoch": 2.42, "learning_rate": 1.0676246830092983e-07, "logits/chosen": -1.626143217086792, "logits/rejected": -1.4086579084396362, "logps/chosen": -145.34234619140625, "logps/rejected": -313.7509765625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -5.302502632141113, "rewards/margins": 18.770069122314453, "rewards/rejected": -24.07257080078125, "step": 5310 }, { "epoch": 2.43, "learning_rate": 1.059171597633136e-07, "logits/chosen": -1.6133781671524048, "logits/rejected": -1.3685221672058105, "logps/chosen": -137.14315795898438, "logps/rejected": -323.71917724609375, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -4.440985679626465, "rewards/margins": 20.653667449951172, "rewards/rejected": -25.094654083251953, "step": 5320 }, { "epoch": 2.43, "learning_rate": 1.0507185122569739e-07, "logits/chosen": -1.6073236465454102, "logits/rejected": -1.4107120037078857, "logps/chosen": -142.0266571044922, "logps/rejected": -305.3078308105469, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -4.942056655883789, "rewards/margins": 18.439678192138672, "rewards/rejected": -23.381732940673828, "step": 5330 }, { "epoch": 2.44, "learning_rate": 1.0422654268808114e-07, "logits/chosen": -1.6311960220336914, "logits/rejected": -1.3799140453338623, "logps/chosen": -144.93405151367188, "logps/rejected": -332.131103515625, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.900788307189941, "rewards/margins": 20.720251083374023, "rewards/rejected": -25.621042251586914, "step": 5340 }, { "epoch": 2.44, "learning_rate": 1.0338123415046491e-07, "logits/chosen": -1.599922776222229, "logits/rejected": -1.3647892475128174, "logps/chosen": -144.65731811523438, "logps/rejected": -324.754638671875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.055871486663818, "rewards/margins": 19.931934356689453, "rewards/rejected": -24.987808227539062, "step": 5350 }, { "epoch": 2.45, "learning_rate": 1.0253592561284868e-07, "logits/chosen": -1.5935919284820557, "logits/rejected": -1.3738867044448853, "logps/chosen": -143.09603881835938, "logps/rejected": -315.74591064453125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -4.438485145568848, "rewards/margins": 19.96566390991211, "rewards/rejected": -24.40414810180664, "step": 5360 }, { "epoch": 2.45, "learning_rate": 1.0169061707523245e-07, "logits/chosen": -1.609093427658081, "logits/rejected": -1.3772783279418945, "logps/chosen": -140.55917358398438, "logps/rejected": -324.67547607421875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -4.489682197570801, "rewards/margins": 20.565767288208008, "rewards/rejected": -25.055450439453125, "step": 5370 }, { "epoch": 2.46, "learning_rate": 1.0084530853761623e-07, "logits/chosen": -1.5920069217681885, "logits/rejected": -1.364201545715332, "logps/chosen": -141.46438598632812, "logps/rejected": -338.9762268066406, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -4.8665313720703125, "rewards/margins": 21.738967895507812, "rewards/rejected": -26.605499267578125, "step": 5380 }, { "epoch": 2.46, "learning_rate": 1e-07, "logits/chosen": -1.5002410411834717, "logits/rejected": -1.2806379795074463, "logps/chosen": -155.71710205078125, "logps/rejected": -321.7295837402344, "loss": 0.0058, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.903067588806152, "rewards/margins": 18.76999855041504, "rewards/rejected": -24.673063278198242, "step": 5390 }, { "epoch": 2.46, "learning_rate": 9.915469146238377e-08, "logits/chosen": -1.5670658349990845, "logits/rejected": -1.3476722240447998, "logps/chosen": -156.94216918945312, "logps/rejected": -331.74029541015625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.655655860900879, "rewards/margins": 19.565311431884766, "rewards/rejected": -26.220966339111328, "step": 5400 }, { "epoch": 2.46, "eval_logits/chosen": -1.478770136833191, "eval_logits/rejected": -1.2360926866531372, "eval_logps/chosen": -157.89659118652344, "eval_logps/rejected": -326.7684326171875, "eval_loss": 0.02334408089518547, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.665939807891846, "eval_rewards/margins": 19.000446319580078, "eval_rewards/rejected": -25.666383743286133, "eval_runtime": 48.9157, "eval_samples_per_second": 58.509, "eval_steps_per_second": 1.84, "step": 5400 }, { "epoch": 2.47, "learning_rate": 9.830938292476754e-08, "logits/chosen": -1.5275108814239502, "logits/rejected": -1.2839040756225586, "logps/chosen": -142.76083374023438, "logps/rejected": -345.90472412109375, "loss": 0.0049, "rewards/accuracies": 1.0, "rewards/chosen": -4.783443450927734, "rewards/margins": 22.5191650390625, "rewards/rejected": -27.302608489990234, "step": 5410 }, { "epoch": 2.47, "learning_rate": 9.74640743871513e-08, "logits/chosen": -1.548032522201538, "logits/rejected": -1.3077924251556396, "logps/chosen": -156.00958251953125, "logps/rejected": -349.4509582519531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.358696460723877, "rewards/margins": 21.26180648803711, "rewards/rejected": -27.62050437927246, "step": 5420 }, { "epoch": 2.48, "learning_rate": 9.661876584953508e-08, "logits/chosen": -1.5474439859390259, "logits/rejected": -1.3252089023590088, "logps/chosen": -151.78616333007812, "logps/rejected": -343.47808837890625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -6.073981285095215, "rewards/margins": 21.182497024536133, "rewards/rejected": -27.2564754486084, "step": 5430 }, { "epoch": 2.48, "learning_rate": 9.577345731191883e-08, "logits/chosen": -1.5336410999298096, "logits/rejected": -1.2730783224105835, "logps/chosen": -150.72671508789062, "logps/rejected": -336.71807861328125, "loss": 0.0012, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.051814556121826, "rewards/margins": 20.846153259277344, "rewards/rejected": -26.89797019958496, "step": 5440 }, { "epoch": 2.49, "learning_rate": 9.492814877430262e-08, "logits/chosen": -1.5212593078613281, "logits/rejected": -1.2231850624084473, "logps/chosen": -145.03448486328125, "logps/rejected": -345.9190368652344, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/chosen": -4.964097023010254, "rewards/margins": 22.741249084472656, "rewards/rejected": -27.705347061157227, "step": 5450 }, { "epoch": 2.49, "learning_rate": 9.408284023668639e-08, "logits/chosen": -1.5088036060333252, "logits/rejected": -1.2679582834243774, "logps/chosen": -146.006103515625, "logps/rejected": -361.91949462890625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.680733680725098, "rewards/margins": 23.57914161682129, "rewards/rejected": -29.259876251220703, "step": 5460 }, { "epoch": 2.5, "learning_rate": 9.323753169907016e-08, "logits/chosen": -1.5367735624313354, "logits/rejected": -1.2960542440414429, "logps/chosen": -145.0201873779297, "logps/rejected": -355.9728088378906, "loss": 0.0026, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -5.884902000427246, "rewards/margins": 22.25425148010254, "rewards/rejected": -28.1391544342041, "step": 5470 }, { "epoch": 2.5, "learning_rate": 9.239222316145393e-08, "logits/chosen": -1.5144281387329102, "logits/rejected": -1.254509449005127, "logps/chosen": -157.70468139648438, "logps/rejected": -352.5044860839844, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.387197971343994, "rewards/margins": 21.703859329223633, "rewards/rejected": -28.0910587310791, "step": 5480 }, { "epoch": 2.51, "learning_rate": 9.15469146238377e-08, "logits/chosen": -1.5131080150604248, "logits/rejected": -1.2870782613754272, "logps/chosen": -148.97203063964844, "logps/rejected": -355.46661376953125, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/chosen": -6.038233757019043, "rewards/margins": 22.466554641723633, "rewards/rejected": -28.50478744506836, "step": 5490 }, { "epoch": 2.51, "learning_rate": 9.070160608622146e-08, "logits/chosen": -1.5873641967773438, "logits/rejected": -1.3531397581100464, "logps/chosen": -159.92434692382812, "logps/rejected": -348.5457458496094, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/chosen": -6.174721717834473, "rewards/margins": 21.510509490966797, "rewards/rejected": -27.685232162475586, "step": 5500 }, { "epoch": 2.51, "eval_logits/chosen": -1.494807481765747, "eval_logits/rejected": -1.2609878778457642, "eval_logps/chosen": -158.99363708496094, "eval_logps/rejected": -331.5590515136719, "eval_loss": 0.024974165484309196, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.775642395019531, "eval_rewards/margins": 19.36980438232422, "eval_rewards/rejected": -26.145444869995117, "eval_runtime": 48.9031, "eval_samples_per_second": 58.524, "eval_steps_per_second": 1.84, "step": 5500 }, { "epoch": 2.51, "learning_rate": 8.985629754860524e-08, "logits/chosen": -1.6191699504852295, "logits/rejected": -1.376314640045166, "logps/chosen": -148.91360473632812, "logps/rejected": -344.6647644042969, "loss": 0.0037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.875352382659912, "rewards/margins": 21.492692947387695, "rewards/rejected": -27.3680419921875, "step": 5510 }, { "epoch": 2.52, "learning_rate": 8.901098901098901e-08, "logits/chosen": -1.5830731391906738, "logits/rejected": -1.3820655345916748, "logps/chosen": -151.00320434570312, "logps/rejected": -325.35076904296875, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.6005425453186035, "rewards/margins": 18.949426651000977, "rewards/rejected": -25.549968719482422, "step": 5520 }, { "epoch": 2.52, "learning_rate": 8.816568047337278e-08, "logits/chosen": -1.613149881362915, "logits/rejected": -1.3727449178695679, "logps/chosen": -159.81369018554688, "logps/rejected": -357.45867919921875, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.34648323059082, "rewards/margins": 22.07329559326172, "rewards/rejected": -28.41977882385254, "step": 5530 }, { "epoch": 2.53, "learning_rate": 8.732037193575655e-08, "logits/chosen": -1.6033811569213867, "logits/rejected": -1.3570531606674194, "logps/chosen": -151.11459350585938, "logps/rejected": -353.0989074707031, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.0476179122924805, "rewards/margins": 22.07670783996582, "rewards/rejected": -28.124324798583984, "step": 5540 }, { "epoch": 2.53, "learning_rate": 8.647506339814031e-08, "logits/chosen": -1.6857588291168213, "logits/rejected": -1.4397852420806885, "logps/chosen": -150.65194702148438, "logps/rejected": -328.0426025390625, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.40293025970459, "rewards/margins": 20.280824661254883, "rewards/rejected": -25.68375587463379, "step": 5550 }, { "epoch": 2.54, "learning_rate": 8.562975486052408e-08, "logits/chosen": -1.6101045608520508, "logits/rejected": -1.3583890199661255, "logps/chosen": -145.62823486328125, "logps/rejected": -346.24053955078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.573197364807129, "rewards/margins": 21.59356689453125, "rewards/rejected": -27.166759490966797, "step": 5560 }, { "epoch": 2.54, "learning_rate": 8.478444632290786e-08, "logits/chosen": -1.6645548343658447, "logits/rejected": -1.454132318496704, "logps/chosen": -150.60423278808594, "logps/rejected": -328.65740966796875, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/chosen": -4.984841346740723, "rewards/margins": 20.694120407104492, "rewards/rejected": -25.678964614868164, "step": 5570 }, { "epoch": 2.55, "learning_rate": 8.393913778529163e-08, "logits/chosen": -1.6236995458602905, "logits/rejected": -1.3915711641311646, "logps/chosen": -140.1810302734375, "logps/rejected": -341.6992492675781, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.804213523864746, "rewards/margins": 22.202869415283203, "rewards/rejected": -27.007083892822266, "step": 5580 }, { "epoch": 2.55, "learning_rate": 8.30938292476754e-08, "logits/chosen": -1.602725625038147, "logits/rejected": -1.38978111743927, "logps/chosen": -160.79714965820312, "logps/rejected": -334.1737365722656, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/chosen": -6.724407196044922, "rewards/margins": 19.819522857666016, "rewards/rejected": -26.543926239013672, "step": 5590 }, { "epoch": 2.56, "learning_rate": 8.224852071005916e-08, "logits/chosen": -1.6001827716827393, "logits/rejected": -1.327143907546997, "logps/chosen": -159.7926025390625, "logps/rejected": -371.45941162109375, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -6.600203514099121, "rewards/margins": 23.463272094726562, "rewards/rejected": -30.063480377197266, "step": 5600 }, { "epoch": 2.56, "eval_logits/chosen": -1.5472838878631592, "eval_logits/rejected": -1.3104890584945679, "eval_logps/chosen": -157.7054443359375, "eval_logps/rejected": -330.0379638671875, "eval_loss": 0.02463771402835846, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.646824359893799, "eval_rewards/margins": 19.346515655517578, "eval_rewards/rejected": -25.99333953857422, "eval_runtime": 48.2452, "eval_samples_per_second": 59.322, "eval_steps_per_second": 1.865, "step": 5600 }, { "epoch": 2.56, "learning_rate": 8.140321217244293e-08, "logits/chosen": -1.5861504077911377, "logits/rejected": -1.3507585525512695, "logps/chosen": -149.94241333007812, "logps/rejected": -344.6899719238281, "loss": 0.0029, "rewards/accuracies": 1.0, "rewards/chosen": -5.634446620941162, "rewards/margins": 21.981483459472656, "rewards/rejected": -27.615930557250977, "step": 5610 }, { "epoch": 2.57, "learning_rate": 8.05579036348267e-08, "logits/chosen": -1.5509120225906372, "logits/rejected": -1.3108220100402832, "logps/chosen": -155.71743774414062, "logps/rejected": -339.0517578125, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -6.951332092285156, "rewards/margins": 19.823192596435547, "rewards/rejected": -26.774524688720703, "step": 5620 }, { "epoch": 2.57, "learning_rate": 7.971259509721048e-08, "logits/chosen": -1.5993841886520386, "logits/rejected": -1.3499586582183838, "logps/chosen": -142.0042724609375, "logps/rejected": -348.3353576660156, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.063478946685791, "rewards/margins": 22.494701385498047, "rewards/rejected": -27.558177947998047, "step": 5630 }, { "epoch": 2.57, "learning_rate": 7.886728655959425e-08, "logits/chosen": -1.5573492050170898, "logits/rejected": -1.3277238607406616, "logps/chosen": -149.28855895996094, "logps/rejected": -334.1047668457031, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.636155128479004, "rewards/margins": 20.696395874023438, "rewards/rejected": -26.332550048828125, "step": 5640 }, { "epoch": 2.58, "learning_rate": 7.802197802197803e-08, "logits/chosen": -1.5743649005889893, "logits/rejected": -1.3265464305877686, "logps/chosen": -152.75967407226562, "logps/rejected": -342.3771057128906, "loss": 0.0025, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.603058815002441, "rewards/margins": 21.599857330322266, "rewards/rejected": -27.20291519165039, "step": 5650 }, { "epoch": 2.58, "learning_rate": 7.717666948436178e-08, "logits/chosen": -1.5480163097381592, "logits/rejected": -1.304038405418396, "logps/chosen": -148.1897430419922, "logps/rejected": -345.70404052734375, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -6.109408378601074, "rewards/margins": 21.218894958496094, "rewards/rejected": -27.328304290771484, "step": 5660 }, { "epoch": 2.59, "learning_rate": 7.633136094674555e-08, "logits/chosen": -1.564328908920288, "logits/rejected": -1.3425943851470947, "logps/chosen": -167.41494750976562, "logps/rejected": -334.5479431152344, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -7.401378631591797, "rewards/margins": 18.970346450805664, "rewards/rejected": -26.371723175048828, "step": 5670 }, { "epoch": 2.59, "learning_rate": 7.548605240912932e-08, "logits/chosen": -1.5382803678512573, "logits/rejected": -1.3093044757843018, "logps/chosen": -149.05081176757812, "logps/rejected": -340.0987854003906, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.222987174987793, "rewards/margins": 20.633708953857422, "rewards/rejected": -26.8566951751709, "step": 5680 }, { "epoch": 2.6, "learning_rate": 7.464074387151311e-08, "logits/chosen": -1.5770385265350342, "logits/rejected": -1.3411109447479248, "logps/chosen": -162.41824340820312, "logps/rejected": -363.8817443847656, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.601351737976074, "rewards/margins": 21.970367431640625, "rewards/rejected": -28.57172203063965, "step": 5690 }, { "epoch": 2.6, "learning_rate": 7.379543533389688e-08, "logits/chosen": -1.576453447341919, "logits/rejected": -1.3042480945587158, "logps/chosen": -161.98483276367188, "logps/rejected": -331.50091552734375, "loss": 0.0017, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.679347991943359, "rewards/margins": 19.423847198486328, "rewards/rejected": -26.103191375732422, "step": 5700 }, { "epoch": 2.6, "eval_logits/chosen": -1.4879993200302124, "eval_logits/rejected": -1.2445040941238403, "eval_logps/chosen": -160.33197021484375, "eval_logps/rejected": -330.4330749511719, "eval_loss": 0.02477310597896576, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.909476280212402, "eval_rewards/margins": 19.123369216918945, "eval_rewards/rejected": -26.03284454345703, "eval_runtime": 49.4109, "eval_samples_per_second": 57.922, "eval_steps_per_second": 1.821, "step": 5700 }, { "epoch": 2.61, "learning_rate": 7.295012679628063e-08, "logits/chosen": -1.5742652416229248, "logits/rejected": -1.3203189373016357, "logps/chosen": -144.94931030273438, "logps/rejected": -353.5227355957031, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -5.223757743835449, "rewards/margins": 23.126510620117188, "rewards/rejected": -28.350269317626953, "step": 5710 }, { "epoch": 2.61, "learning_rate": 7.21048182586644e-08, "logits/chosen": -1.5544811487197876, "logits/rejected": -1.3336037397384644, "logps/chosen": -150.41961669921875, "logps/rejected": -340.2244873046875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/chosen": -6.044550895690918, "rewards/margins": 20.950010299682617, "rewards/rejected": -26.99456214904785, "step": 5720 }, { "epoch": 2.62, "learning_rate": 7.125950972104817e-08, "logits/chosen": -1.5454206466674805, "logits/rejected": -1.2845289707183838, "logps/chosen": -153.46827697753906, "logps/rejected": -342.5339660644531, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.792252063751221, "rewards/margins": 21.301063537597656, "rewards/rejected": -27.09331703186035, "step": 5730 }, { "epoch": 2.62, "learning_rate": 7.041420118343195e-08, "logits/chosen": -1.501680612564087, "logits/rejected": -1.2636568546295166, "logps/chosen": -157.52508544921875, "logps/rejected": -365.23486328125, "loss": 0.0038, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -6.290675163269043, "rewards/margins": 22.773101806640625, "rewards/rejected": -29.06377601623535, "step": 5740 }, { "epoch": 2.62, "learning_rate": 6.956889264581573e-08, "logits/chosen": -1.498518943786621, "logits/rejected": -1.2680155038833618, "logps/chosen": -157.18557739257812, "logps/rejected": -360.58941650390625, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -6.826033115386963, "rewards/margins": 22.10856819152832, "rewards/rejected": -28.934600830078125, "step": 5750 }, { "epoch": 2.63, "learning_rate": 6.872358410819949e-08, "logits/chosen": -1.4816702604293823, "logits/rejected": -1.2337892055511475, "logps/chosen": -147.61468505859375, "logps/rejected": -333.8875732421875, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.283675670623779, "rewards/margins": 19.97292709350586, "rewards/rejected": -26.256603240966797, "step": 5760 }, { "epoch": 2.63, "learning_rate": 6.787827557058326e-08, "logits/chosen": -1.5251476764678955, "logits/rejected": -1.2596557140350342, "logps/chosen": -158.60626220703125, "logps/rejected": -352.7878723144531, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.330402374267578, "rewards/margins": 21.95157241821289, "rewards/rejected": -28.2819766998291, "step": 5770 }, { "epoch": 2.64, "learning_rate": 6.703296703296703e-08, "logits/chosen": -1.4867092370986938, "logits/rejected": -1.262742042541504, "logps/chosen": -171.3079071044922, "logps/rejected": -350.98760986328125, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -7.340981960296631, "rewards/margins": 20.05600929260254, "rewards/rejected": -27.396991729736328, "step": 5780 }, { "epoch": 2.64, "learning_rate": 6.61876584953508e-08, "logits/chosen": -1.4862596988677979, "logits/rejected": -1.2417380809783936, "logps/chosen": -163.3212432861328, "logps/rejected": -348.84796142578125, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/chosen": -7.111495018005371, "rewards/margins": 20.963470458984375, "rewards/rejected": -28.074966430664062, "step": 5790 }, { "epoch": 2.65, "learning_rate": 6.534234995773457e-08, "logits/chosen": -1.4852240085601807, "logits/rejected": -1.2178817987442017, "logps/chosen": -149.77035522460938, "logps/rejected": -368.5277099609375, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -5.752679347991943, "rewards/margins": 23.98304557800293, "rewards/rejected": -29.735727310180664, "step": 5800 }, { "epoch": 2.65, "eval_logits/chosen": -1.4105994701385498, "eval_logits/rejected": -1.1525661945343018, "eval_logps/chosen": -166.90325927734375, "eval_logps/rejected": -348.3179931640625, "eval_loss": 0.025658363476395607, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -7.566605567932129, "eval_rewards/margins": 20.25473403930664, "eval_rewards/rejected": -27.821340560913086, "eval_runtime": 48.6793, "eval_samples_per_second": 58.793, "eval_steps_per_second": 1.849, "step": 5800 }, { "epoch": 2.65, "learning_rate": 6.449704142011835e-08, "logits/chosen": -1.5362986326217651, "logits/rejected": -1.2705743312835693, "logps/chosen": -162.11782836914062, "logps/rejected": -355.174072265625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -6.987889289855957, "rewards/margins": 21.60100746154785, "rewards/rejected": -28.588897705078125, "step": 5810 }, { "epoch": 2.66, "learning_rate": 6.365173288250211e-08, "logits/chosen": -1.5174287557601929, "logits/rejected": -1.2580162286758423, "logps/chosen": -152.82260131835938, "logps/rejected": -358.1809997558594, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.536625862121582, "rewards/margins": 22.3090763092041, "rewards/rejected": -28.845699310302734, "step": 5820 }, { "epoch": 2.66, "learning_rate": 6.280642434488588e-08, "logits/chosen": -1.5420914888381958, "logits/rejected": -1.278857707977295, "logps/chosen": -142.987548828125, "logps/rejected": -353.4092712402344, "loss": 0.0037, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.59360408782959, "rewards/margins": 22.749876022338867, "rewards/rejected": -28.343481063842773, "step": 5830 }, { "epoch": 2.67, "learning_rate": 6.196111580726965e-08, "logits/chosen": -1.4608005285263062, "logits/rejected": -1.1506479978561401, "logps/chosen": -152.3258056640625, "logps/rejected": -392.17266845703125, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.077305793762207, "rewards/margins": 25.937328338623047, "rewards/rejected": -32.01463317871094, "step": 5840 }, { "epoch": 2.67, "learning_rate": 6.111580726965342e-08, "logits/chosen": -1.4075108766555786, "logits/rejected": -1.1176444292068481, "logps/chosen": -152.30905151367188, "logps/rejected": -353.33258056640625, "loss": 0.0058, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -6.439084053039551, "rewards/margins": 22.003307342529297, "rewards/rejected": -28.4423885345459, "step": 5850 }, { "epoch": 2.67, "learning_rate": 6.027049873203719e-08, "logits/chosen": -1.5007381439208984, "logits/rejected": -1.2174341678619385, "logps/chosen": -160.6419677734375, "logps/rejected": -369.1076354980469, "loss": 0.0065, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.2555928230285645, "rewards/margins": 23.638750076293945, "rewards/rejected": -29.894338607788086, "step": 5860 }, { "epoch": 2.68, "learning_rate": 5.942519019442096e-08, "logits/chosen": -1.4120880365371704, "logits/rejected": -1.1486588716506958, "logps/chosen": -165.69900512695312, "logps/rejected": -370.86114501953125, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -7.068509101867676, "rewards/margins": 22.668869018554688, "rewards/rejected": -29.737377166748047, "step": 5870 }, { "epoch": 2.68, "learning_rate": 5.857988165680473e-08, "logits/chosen": -1.4748567342758179, "logits/rejected": -1.2470533847808838, "logps/chosen": -152.8289794921875, "logps/rejected": -346.9162292480469, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -6.39940881729126, "rewards/margins": 21.182292938232422, "rewards/rejected": -27.581701278686523, "step": 5880 }, { "epoch": 2.69, "learning_rate": 5.77345731191885e-08, "logits/chosen": -1.3950707912445068, "logits/rejected": -1.1131783723831177, "logps/chosen": -155.28848266601562, "logps/rejected": -378.28997802734375, "loss": 0.0091, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -6.076868534088135, "rewards/margins": 24.460920333862305, "rewards/rejected": -30.537792205810547, "step": 5890 }, { "epoch": 2.69, "learning_rate": 5.688926458157227e-08, "logits/chosen": -1.5006979703903198, "logits/rejected": -1.1969038248062134, "logps/chosen": -157.77194213867188, "logps/rejected": -364.2388000488281, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -6.46331787109375, "rewards/margins": 22.99692726135254, "rewards/rejected": -29.46024513244629, "step": 5900 }, { "epoch": 2.69, "eval_logits/chosen": -1.3563249111175537, "eval_logits/rejected": -1.0876926183700562, "eval_logps/chosen": -167.66436767578125, "eval_logps/rejected": -355.3266296386719, "eval_loss": 0.026315541937947273, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -7.6427178382873535, "eval_rewards/margins": 20.879486083984375, "eval_rewards/rejected": -28.522199630737305, "eval_runtime": 49.1795, "eval_samples_per_second": 58.195, "eval_steps_per_second": 1.83, "step": 5900 }, { "epoch": 2.7, "learning_rate": 5.604395604395604e-08, "logits/chosen": -1.4665958881378174, "logits/rejected": -1.1959645748138428, "logps/chosen": -149.21376037597656, "logps/rejected": -355.60443115234375, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.6861572265625, "rewards/margins": 22.935123443603516, "rewards/rejected": -28.621280670166016, "step": 5910 }, { "epoch": 2.7, "learning_rate": 5.519864750633981e-08, "logits/chosen": -1.4556392431259155, "logits/rejected": -1.1394058465957642, "logps/chosen": -160.46078491210938, "logps/rejected": -371.42120361328125, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -6.608606815338135, "rewards/margins": 23.34994125366211, "rewards/rejected": -29.958547592163086, "step": 5920 }, { "epoch": 2.71, "learning_rate": 5.435333896872358e-08, "logits/chosen": -1.4221923351287842, "logits/rejected": -1.1590121984481812, "logps/chosen": -154.56790161132812, "logps/rejected": -368.48944091796875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -5.784639835357666, "rewards/margins": 23.924802780151367, "rewards/rejected": -29.709442138671875, "step": 5930 }, { "epoch": 2.71, "learning_rate": 5.350803043110735e-08, "logits/chosen": -1.4274301528930664, "logits/rejected": -1.168290376663208, "logps/chosen": -167.0966796875, "logps/rejected": -396.8948669433594, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -7.695293426513672, "rewards/margins": 24.249807357788086, "rewards/rejected": -31.945098876953125, "step": 5940 }, { "epoch": 2.72, "learning_rate": 5.266272189349112e-08, "logits/chosen": -1.474579095840454, "logits/rejected": -1.196025013923645, "logps/chosen": -152.70132446289062, "logps/rejected": -369.7622985839844, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.682868003845215, "rewards/margins": 23.80048370361328, "rewards/rejected": -29.483348846435547, "step": 5950 }, { "epoch": 2.72, "learning_rate": 5.181741335587489e-08, "logits/chosen": -1.4360884428024292, "logits/rejected": -1.2026808261871338, "logps/chosen": -161.60153198242188, "logps/rejected": -364.6895751953125, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.776439666748047, "rewards/margins": 21.810924530029297, "rewards/rejected": -28.58736228942871, "step": 5960 }, { "epoch": 2.72, "learning_rate": 5.0972104818258664e-08, "logits/chosen": -1.4061036109924316, "logits/rejected": -1.1320244073867798, "logps/chosen": -158.87969970703125, "logps/rejected": -367.72894287109375, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.624148368835449, "rewards/margins": 23.071802139282227, "rewards/rejected": -29.695947647094727, "step": 5970 }, { "epoch": 2.73, "learning_rate": 5.0126796280642434e-08, "logits/chosen": -1.4696153402328491, "logits/rejected": -1.1484925746917725, "logps/chosen": -165.90017700195312, "logps/rejected": -379.2989501953125, "loss": 0.0045, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -7.214005947113037, "rewards/margins": 23.535503387451172, "rewards/rejected": -30.74951171875, "step": 5980 }, { "epoch": 2.73, "learning_rate": 4.92814877430262e-08, "logits/chosen": -1.5003819465637207, "logits/rejected": -1.194960355758667, "logps/chosen": -153.00140380859375, "logps/rejected": -358.32098388671875, "loss": 0.0046, "rewards/accuracies": 1.0, "rewards/chosen": -5.173357963562012, "rewards/margins": 23.310537338256836, "rewards/rejected": -28.483896255493164, "step": 5990 }, { "epoch": 2.74, "learning_rate": 4.8436179205409975e-08, "logits/chosen": -1.4279770851135254, "logits/rejected": -1.1825916767120361, "logps/chosen": -152.53591918945312, "logps/rejected": -368.3629455566406, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/chosen": -6.5308427810668945, "rewards/margins": 23.156478881835938, "rewards/rejected": -29.687320709228516, "step": 6000 }, { "epoch": 2.74, "eval_logits/chosen": -1.4632593393325806, "eval_logits/rejected": -1.2176082134246826, "eval_logps/chosen": -159.17491149902344, "eval_logps/rejected": -333.15716552734375, "eval_loss": 0.024197373539209366, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.793771743774414, "eval_rewards/margins": 19.51148796081543, "eval_rewards/rejected": -26.30525779724121, "eval_runtime": 48.3964, "eval_samples_per_second": 59.137, "eval_steps_per_second": 1.86, "step": 6000 }, { "epoch": 2.74, "learning_rate": 4.7590870667793745e-08, "logits/chosen": -1.5039942264556885, "logits/rejected": -1.2951033115386963, "logps/chosen": -154.0543975830078, "logps/rejected": -350.24090576171875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.628406524658203, "rewards/margins": 21.300485610961914, "rewards/rejected": -27.928890228271484, "step": 6010 }, { "epoch": 2.75, "learning_rate": 4.674556213017751e-08, "logits/chosen": -1.557823896408081, "logits/rejected": -1.3056771755218506, "logps/chosen": -140.63388061523438, "logps/rejected": -359.533447265625, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -4.889941215515137, "rewards/margins": 23.574077606201172, "rewards/rejected": -28.46401596069336, "step": 6020 }, { "epoch": 2.75, "learning_rate": 4.5900253592561286e-08, "logits/chosen": -1.5295337438583374, "logits/rejected": -1.261516809463501, "logps/chosen": -152.05203247070312, "logps/rejected": -349.45220947265625, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/chosen": -5.756598472595215, "rewards/margins": 22.195484161376953, "rewards/rejected": -27.95208168029785, "step": 6030 }, { "epoch": 2.76, "learning_rate": 4.505494505494505e-08, "logits/chosen": -1.489527940750122, "logits/rejected": -1.2501986026763916, "logps/chosen": -149.98753356933594, "logps/rejected": -355.73828125, "loss": 0.0063, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.916062355041504, "rewards/margins": 22.23210906982422, "rewards/rejected": -28.14817237854004, "step": 6040 }, { "epoch": 2.76, "learning_rate": 4.420963651732882e-08, "logits/chosen": -1.5801336765289307, "logits/rejected": -1.3497835397720337, "logps/chosen": -158.12344360351562, "logps/rejected": -346.0705871582031, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -6.168856620788574, "rewards/margins": 20.862895965576172, "rewards/rejected": -27.031749725341797, "step": 6050 }, { "epoch": 2.77, "learning_rate": 4.33643279797126e-08, "logits/chosen": -1.532447338104248, "logits/rejected": -1.340090274810791, "logps/chosen": -151.39772033691406, "logps/rejected": -325.19647216796875, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.766188621520996, "rewards/margins": 18.759479522705078, "rewards/rejected": -25.525672912597656, "step": 6060 }, { "epoch": 2.77, "learning_rate": 4.251901944209636e-08, "logits/chosen": -1.5740821361541748, "logits/rejected": -1.3083857297897339, "logps/chosen": -155.3162841796875, "logps/rejected": -347.370361328125, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -5.801811218261719, "rewards/margins": 21.371374130249023, "rewards/rejected": -27.173187255859375, "step": 6070 }, { "epoch": 2.77, "learning_rate": 4.167371090448013e-08, "logits/chosen": -1.5464942455291748, "logits/rejected": -1.306731104850769, "logps/chosen": -142.73570251464844, "logps/rejected": -342.77777099609375, "loss": 0.0036, "rewards/accuracies": 1.0, "rewards/chosen": -5.27818489074707, "rewards/margins": 21.76172637939453, "rewards/rejected": -27.0399112701416, "step": 6080 }, { "epoch": 2.78, "learning_rate": 4.082840236686391e-08, "logits/chosen": -1.4867476224899292, "logits/rejected": -1.2121250629425049, "logps/chosen": -142.33984375, "logps/rejected": -352.05255126953125, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -5.472867012023926, "rewards/margins": 22.67243003845215, "rewards/rejected": -28.145299911499023, "step": 6090 }, { "epoch": 2.78, "learning_rate": 3.998309382924767e-08, "logits/chosen": -1.5143063068389893, "logits/rejected": -1.2820769548416138, "logps/chosen": -154.45347595214844, "logps/rejected": -353.0608215332031, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -6.095731258392334, "rewards/margins": 22.116703033447266, "rewards/rejected": -28.21243667602539, "step": 6100 }, { "epoch": 2.78, "eval_logits/chosen": -1.4429353475570679, "eval_logits/rejected": -1.1958377361297607, "eval_logps/chosen": -159.91256713867188, "eval_logps/rejected": -335.501953125, "eval_loss": 0.02424330823123455, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.8675360679626465, "eval_rewards/margins": 19.67220115661621, "eval_rewards/rejected": -26.539735794067383, "eval_runtime": 48.4944, "eval_samples_per_second": 59.017, "eval_steps_per_second": 1.856, "step": 6100 }, { "epoch": 2.79, "learning_rate": 3.913778529163144e-08, "logits/chosen": -1.4917789697647095, "logits/rejected": -1.2775121927261353, "logps/chosen": -166.1258544921875, "logps/rejected": -356.85479736328125, "loss": 0.0034, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -7.622159004211426, "rewards/margins": 20.657934188842773, "rewards/rejected": -28.280094146728516, "step": 6110 }, { "epoch": 2.79, "learning_rate": 3.829247675401521e-08, "logits/chosen": -1.5115435123443604, "logits/rejected": -1.2851530313491821, "logps/chosen": -157.784912109375, "logps/rejected": -343.08648681640625, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.815495491027832, "rewards/margins": 21.354597091674805, "rewards/rejected": -27.170089721679688, "step": 6120 }, { "epoch": 2.8, "learning_rate": 3.744716821639898e-08, "logits/chosen": -1.4754083156585693, "logits/rejected": -1.2475736141204834, "logps/chosen": -153.1291046142578, "logps/rejected": -343.4634704589844, "loss": 0.0045, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.1217546463012695, "rewards/margins": 21.024320602416992, "rewards/rejected": -27.146076202392578, "step": 6130 }, { "epoch": 2.8, "learning_rate": 3.6601859678782753e-08, "logits/chosen": -1.540523648262024, "logits/rejected": -1.3162726163864136, "logps/chosen": -160.690185546875, "logps/rejected": -339.6878967285156, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.333074569702148, "rewards/margins": 20.471248626708984, "rewards/rejected": -26.804325103759766, "step": 6140 }, { "epoch": 2.81, "learning_rate": 3.5756551141166524e-08, "logits/chosen": -1.5146772861480713, "logits/rejected": -1.2612508535385132, "logps/chosen": -160.58139038085938, "logps/rejected": -364.7797546386719, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.714122772216797, "rewards/margins": 22.714826583862305, "rewards/rejected": -29.428951263427734, "step": 6150 }, { "epoch": 2.81, "learning_rate": 3.4911242603550294e-08, "logits/chosen": -1.504428744316101, "logits/rejected": -1.236289620399475, "logps/chosen": -149.89620971679688, "logps/rejected": -360.3150939941406, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -5.815154075622559, "rewards/margins": 23.259376525878906, "rewards/rejected": -29.07452964782715, "step": 6160 }, { "epoch": 2.82, "learning_rate": 3.4065934065934065e-08, "logits/chosen": -1.5386370420455933, "logits/rejected": -1.2299727201461792, "logps/chosen": -150.00607299804688, "logps/rejected": -350.6068115234375, "loss": 0.0019, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.3740010261535645, "rewards/margins": 22.33721923828125, "rewards/rejected": -27.71121597290039, "step": 6170 }, { "epoch": 2.82, "learning_rate": 3.3220625528317835e-08, "logits/chosen": -1.4936457872390747, "logits/rejected": -1.2583879232406616, "logps/chosen": -163.72991943359375, "logps/rejected": -359.2135925292969, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.795522212982178, "rewards/margins": 21.916730880737305, "rewards/rejected": -28.712255477905273, "step": 6180 }, { "epoch": 2.83, "learning_rate": 3.2375316990701605e-08, "logits/chosen": -1.4773077964782715, "logits/rejected": -1.2353664636611938, "logps/chosen": -158.53857421875, "logps/rejected": -351.96295166015625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.630317687988281, "rewards/margins": 21.631898880004883, "rewards/rejected": -28.262216567993164, "step": 6190 }, { "epoch": 2.83, "learning_rate": 3.153000845308537e-08, "logits/chosen": -1.4404528141021729, "logits/rejected": -1.2168524265289307, "logps/chosen": -151.5290069580078, "logps/rejected": -339.2045593261719, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/chosen": -6.218883991241455, "rewards/margins": 20.83730125427246, "rewards/rejected": -27.05618667602539, "step": 6200 }, { "epoch": 2.83, "eval_logits/chosen": -1.400346279144287, "eval_logits/rejected": -1.1466337442398071, "eval_logps/chosen": -162.3729248046875, "eval_logps/rejected": -342.3287048339844, "eval_loss": 0.024862240999937057, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -7.11357307434082, "eval_rewards/margins": 20.108837127685547, "eval_rewards/rejected": -27.222412109375, "eval_runtime": 49.356, "eval_samples_per_second": 57.987, "eval_steps_per_second": 1.823, "step": 6200 }, { "epoch": 2.83, "learning_rate": 3.0684699915469146e-08, "logits/chosen": -1.5231372117996216, "logits/rejected": -1.2724891901016235, "logps/chosen": -153.64529418945312, "logps/rejected": -357.3453369140625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -5.938612937927246, "rewards/margins": 22.537324905395508, "rewards/rejected": -28.475936889648438, "step": 6210 }, { "epoch": 2.84, "learning_rate": 2.9839391377852916e-08, "logits/chosen": -1.4560668468475342, "logits/rejected": -1.2280206680297852, "logps/chosen": -169.55458068847656, "logps/rejected": -367.2767639160156, "loss": 0.0024, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -7.545860290527344, "rewards/margins": 21.704364776611328, "rewards/rejected": -29.250225067138672, "step": 6220 }, { "epoch": 2.84, "learning_rate": 2.8994082840236687e-08, "logits/chosen": -1.512521743774414, "logits/rejected": -1.2356914281845093, "logps/chosen": -156.36895751953125, "logps/rejected": -352.8377380371094, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.077000617980957, "rewards/margins": 22.118440628051758, "rewards/rejected": -28.1954402923584, "step": 6230 }, { "epoch": 2.85, "learning_rate": 2.8148774302620457e-08, "logits/chosen": -1.5163729190826416, "logits/rejected": -1.242244839668274, "logps/chosen": -152.9321746826172, "logps/rejected": -368.83221435546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -6.0060224533081055, "rewards/margins": 23.76858139038086, "rewards/rejected": -29.774608612060547, "step": 6240 }, { "epoch": 2.85, "learning_rate": 2.7303465765004224e-08, "logits/chosen": -1.4778908491134644, "logits/rejected": -1.2573697566986084, "logps/chosen": -162.7844696044922, "logps/rejected": -369.68560791015625, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -7.185385227203369, "rewards/margins": 22.159021377563477, "rewards/rejected": -29.344406127929688, "step": 6250 }, { "epoch": 2.86, "learning_rate": 2.6458157227387995e-08, "logits/chosen": -1.4954620599746704, "logits/rejected": -1.2360405921936035, "logps/chosen": -147.39080810546875, "logps/rejected": -339.0125732421875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -5.425978660583496, "rewards/margins": 21.780513763427734, "rewards/rejected": -27.206493377685547, "step": 6260 }, { "epoch": 2.86, "learning_rate": 2.5612848689771768e-08, "logits/chosen": -1.482560634613037, "logits/rejected": -1.218420386314392, "logps/chosen": -162.4448699951172, "logps/rejected": -368.6003723144531, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.563105583190918, "rewards/margins": 23.120697021484375, "rewards/rejected": -29.68380355834961, "step": 6270 }, { "epoch": 2.87, "learning_rate": 2.4767540152155535e-08, "logits/chosen": -1.4622437953948975, "logits/rejected": -1.205731749534607, "logps/chosen": -158.74293518066406, "logps/rejected": -361.62799072265625, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.524346351623535, "rewards/margins": 22.388973236083984, "rewards/rejected": -28.913320541381836, "step": 6280 }, { "epoch": 2.87, "learning_rate": 2.3922231614539306e-08, "logits/chosen": -1.429924488067627, "logits/rejected": -1.2010109424591064, "logps/chosen": -162.57614135742188, "logps/rejected": -357.17596435546875, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -7.556378364562988, "rewards/margins": 21.281539916992188, "rewards/rejected": -28.83791732788086, "step": 6290 }, { "epoch": 2.88, "learning_rate": 2.3076923076923076e-08, "logits/chosen": -1.4317352771759033, "logits/rejected": -1.1608693599700928, "logps/chosen": -144.91014099121094, "logps/rejected": -360.8427734375, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/chosen": -4.919538974761963, "rewards/margins": 24.008647918701172, "rewards/rejected": -28.92818832397461, "step": 6300 }, { "epoch": 2.88, "eval_logits/chosen": -1.4024568796157837, "eval_logits/rejected": -1.1501376628875732, "eval_logps/chosen": -160.83221435546875, "eval_logps/rejected": -341.0271911621094, "eval_loss": 0.02510543167591095, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -6.95950174331665, "eval_rewards/margins": 20.132761001586914, "eval_rewards/rejected": -27.092260360717773, "eval_runtime": 49.4653, "eval_samples_per_second": 57.859, "eval_steps_per_second": 1.819, "step": 6300 }, { "epoch": 2.88, "learning_rate": 2.2231614539306847e-08, "logits/chosen": -1.5092194080352783, "logits/rejected": -1.2494463920593262, "logps/chosen": -160.4544677734375, "logps/rejected": -357.55181884765625, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.256556034088135, "rewards/margins": 22.368412017822266, "rewards/rejected": -28.624969482421875, "step": 6310 }, { "epoch": 2.88, "learning_rate": 2.1386306001690617e-08, "logits/chosen": -1.4365084171295166, "logits/rejected": -1.1854063272476196, "logps/chosen": -153.9835662841797, "logps/rejected": -371.0440979003906, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.483023166656494, "rewards/margins": 23.244380950927734, "rewards/rejected": -29.727405548095703, "step": 6320 }, { "epoch": 2.89, "learning_rate": 2.0540997464074387e-08, "logits/chosen": -1.4596434831619263, "logits/rejected": -1.1587064266204834, "logps/chosen": -154.91305541992188, "logps/rejected": -355.8430480957031, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.246144771575928, "rewards/margins": 22.51506996154785, "rewards/rejected": -28.761215209960938, "step": 6330 }, { "epoch": 2.89, "learning_rate": 1.9695688926458154e-08, "logits/chosen": -1.4274392127990723, "logits/rejected": -1.1878191232681274, "logps/chosen": -157.2246551513672, "logps/rejected": -373.7826232910156, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/chosen": -6.614645481109619, "rewards/margins": 23.510032653808594, "rewards/rejected": -30.124679565429688, "step": 6340 }, { "epoch": 2.9, "learning_rate": 1.8850380388841928e-08, "logits/chosen": -1.468880534172058, "logits/rejected": -1.1757152080535889, "logps/chosen": -150.7034912109375, "logps/rejected": -348.62127685546875, "loss": 0.0023, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -5.9861860275268555, "rewards/margins": 21.781715393066406, "rewards/rejected": -27.767902374267578, "step": 6350 }, { "epoch": 2.9, "learning_rate": 1.80050718512257e-08, "logits/chosen": -1.419793725013733, "logits/rejected": -1.1910134553909302, "logps/chosen": -155.99221801757812, "logps/rejected": -349.697265625, "loss": 0.0035, "rewards/accuracies": 1.0, "rewards/chosen": -6.731202602386475, "rewards/margins": 21.212007522583008, "rewards/rejected": -27.94321060180664, "step": 6360 }, { "epoch": 2.91, "learning_rate": 1.7159763313609465e-08, "logits/chosen": -1.4699496030807495, "logits/rejected": -1.1939094066619873, "logps/chosen": -159.7974090576172, "logps/rejected": -363.49420166015625, "loss": 0.0088, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.286614418029785, "rewards/margins": 22.9273681640625, "rewards/rejected": -29.213979721069336, "step": 6370 }, { "epoch": 2.91, "learning_rate": 1.6314454775993236e-08, "logits/chosen": -1.443658709526062, "logits/rejected": -1.1851694583892822, "logps/chosen": -140.8810272216797, "logps/rejected": -347.7745056152344, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": -5.21396541595459, "rewards/margins": 22.442602157592773, "rewards/rejected": -27.656566619873047, "step": 6380 }, { "epoch": 2.92, "learning_rate": 1.5469146238377006e-08, "logits/chosen": -1.4029505252838135, "logits/rejected": -1.1696784496307373, "logps/chosen": -152.52932739257812, "logps/rejected": -353.049560546875, "loss": 0.0013, "rewards/accuracies": 1.0, "rewards/chosen": -5.933673858642578, "rewards/margins": 22.368228912353516, "rewards/rejected": -28.301904678344727, "step": 6390 }, { "epoch": 2.92, "learning_rate": 1.4623837700760778e-08, "logits/chosen": -1.5081956386566162, "logits/rejected": -1.2300573587417603, "logps/chosen": -142.51382446289062, "logps/rejected": -364.7895202636719, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -4.941340446472168, "rewards/margins": 24.369924545288086, "rewards/rejected": -29.311267852783203, "step": 6400 }, { "epoch": 2.92, "eval_logits/chosen": -1.386672854423523, "eval_logits/rejected": -1.128792405128479, "eval_logps/chosen": -162.50047302246094, "eval_logps/rejected": -347.1543884277344, "eval_loss": 0.025260092690587044, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -7.126327037811279, "eval_rewards/margins": 20.57865333557129, "eval_rewards/rejected": -27.70498275756836, "eval_runtime": 48.9439, "eval_samples_per_second": 58.475, "eval_steps_per_second": 1.839, "step": 6400 }, { "epoch": 2.93, "learning_rate": 1.3778529163144547e-08, "logits/chosen": -1.3817229270935059, "logits/rejected": -1.1325594186782837, "logps/chosen": -154.70213317871094, "logps/rejected": -367.01953125, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/chosen": -6.565983772277832, "rewards/margins": 22.861417770385742, "rewards/rejected": -29.42740249633789, "step": 6410 }, { "epoch": 2.93, "learning_rate": 1.2933220625528317e-08, "logits/chosen": -1.4914562702178955, "logits/rejected": -1.2700700759887695, "logps/chosen": -153.0315399169922, "logps/rejected": -350.91888427734375, "loss": 0.0047, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.609368801116943, "rewards/margins": 21.178762435913086, "rewards/rejected": -27.788131713867188, "step": 6420 }, { "epoch": 2.93, "learning_rate": 1.2087912087912088e-08, "logits/chosen": -1.445056676864624, "logits/rejected": -1.1885985136032104, "logps/chosen": -154.37966918945312, "logps/rejected": -371.3355407714844, "loss": 0.0036, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.280908107757568, "rewards/margins": 23.554365158081055, "rewards/rejected": -29.835275650024414, "step": 6430 }, { "epoch": 2.94, "learning_rate": 1.1242603550295858e-08, "logits/chosen": -1.4893832206726074, "logits/rejected": -1.2540134191513062, "logps/chosen": -157.16061401367188, "logps/rejected": -351.6845703125, "loss": 0.0034, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.9279327392578125, "rewards/margins": 20.94320297241211, "rewards/rejected": -27.871135711669922, "step": 6440 }, { "epoch": 2.94, "learning_rate": 1.0397295012679627e-08, "logits/chosen": -1.4614284038543701, "logits/rejected": -1.2071059942245483, "logps/chosen": -154.94326782226562, "logps/rejected": -356.627197265625, "loss": 0.0015, "rewards/accuracies": 1.0, "rewards/chosen": -6.525387763977051, "rewards/margins": 21.915996551513672, "rewards/rejected": -28.441381454467773, "step": 6450 }, { "epoch": 2.95, "learning_rate": 9.551986475063399e-09, "logits/chosen": -1.4893442392349243, "logits/rejected": -1.2231934070587158, "logps/chosen": -154.13449096679688, "logps/rejected": -371.9717712402344, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/chosen": -6.246697425842285, "rewards/margins": 23.48638153076172, "rewards/rejected": -29.733081817626953, "step": 6460 }, { "epoch": 2.95, "learning_rate": 8.706677937447167e-09, "logits/chosen": -1.5489857196807861, "logits/rejected": -1.2381068468093872, "logps/chosen": -157.33367919921875, "logps/rejected": -368.0112609863281, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -6.018952369689941, "rewards/margins": 23.636343002319336, "rewards/rejected": -29.655292510986328, "step": 6470 }, { "epoch": 2.96, "learning_rate": 7.861369399830938e-09, "logits/chosen": -1.4903433322906494, "logits/rejected": -1.2489861249923706, "logps/chosen": -157.29364013671875, "logps/rejected": -350.2141418457031, "loss": 0.0035, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -6.610640525817871, "rewards/margins": 21.394311904907227, "rewards/rejected": -28.004953384399414, "step": 6480 }, { "epoch": 2.96, "learning_rate": 7.016060862214708e-09, "logits/chosen": -1.4743247032165527, "logits/rejected": -1.2524207830429077, "logps/chosen": -162.22581481933594, "logps/rejected": -361.66302490234375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/chosen": -7.312684535980225, "rewards/margins": 21.844139099121094, "rewards/rejected": -29.15682601928711, "step": 6490 }, { "epoch": 2.97, "learning_rate": 6.170752324598478e-09, "logits/chosen": -1.4401403665542603, "logits/rejected": -1.2095723152160645, "logps/chosen": -158.8749237060547, "logps/rejected": -342.3064880371094, "loss": 0.0023, "rewards/accuracies": 1.0, "rewards/chosen": -6.759598731994629, "rewards/margins": 20.1064510345459, "rewards/rejected": -26.866052627563477, "step": 6500 }, { "epoch": 2.97, "eval_logits/chosen": -1.3866673707962036, "eval_logits/rejected": -1.12844979763031, "eval_logps/chosen": -162.0652313232422, "eval_logps/rejected": -347.0838317871094, "eval_loss": 0.02533269301056862, "eval_rewards/accuracies": 0.980555534362793, "eval_rewards/chosen": -7.0828022956848145, "eval_rewards/margins": 20.615121841430664, "eval_rewards/rejected": -27.697925567626953, "eval_runtime": 48.9445, "eval_samples_per_second": 58.474, "eval_steps_per_second": 1.839, "step": 6500 }, { "epoch": 2.97, "learning_rate": 5.325443786982248e-09, "logits/chosen": -1.5094420909881592, "logits/rejected": -1.2922451496124268, "logps/chosen": -153.08706665039062, "logps/rejected": -348.4012756347656, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/chosen": -5.878302097320557, "rewards/margins": 22.082895278930664, "rewards/rejected": -27.961196899414062, "step": 6510 }, { "epoch": 2.98, "learning_rate": 4.4801352493660185e-09, "logits/chosen": -1.4939616918563843, "logits/rejected": -1.2416346073150635, "logps/chosen": -144.56320190429688, "logps/rejected": -368.6065673828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.104874610900879, "rewards/margins": 24.626834869384766, "rewards/rejected": -29.73171043395996, "step": 6520 }, { "epoch": 2.98, "learning_rate": 3.6348267117497885e-09, "logits/chosen": -1.456238031387329, "logits/rejected": -1.2182656526565552, "logps/chosen": -169.69577026367188, "logps/rejected": -350.4523620605469, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/chosen": -7.985285758972168, "rewards/margins": 20.02364158630371, "rewards/rejected": -28.008926391601562, "step": 6530 }, { "epoch": 2.98, "learning_rate": 2.7895181741335584e-09, "logits/chosen": -1.459014654159546, "logits/rejected": -1.2330175638198853, "logps/chosen": -160.15809631347656, "logps/rejected": -352.0977478027344, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/chosen": -6.381222724914551, "rewards/margins": 21.789058685302734, "rewards/rejected": -28.170278549194336, "step": 6540 }, { "epoch": 2.99, "learning_rate": 1.944209636517329e-09, "logits/chosen": -1.5096580982208252, "logits/rejected": -1.2079827785491943, "logps/chosen": -148.11973571777344, "logps/rejected": -359.211181640625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/chosen": -5.379202365875244, "rewards/margins": 23.468761444091797, "rewards/rejected": -28.847965240478516, "step": 6550 }, { "epoch": 2.99, "learning_rate": 1.0989010989010988e-09, "logits/chosen": -1.4669349193572998, "logits/rejected": -1.241938591003418, "logps/chosen": -158.7514190673828, "logps/rejected": -364.5976257324219, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/chosen": -6.362316131591797, "rewards/margins": 22.672849655151367, "rewards/rejected": -29.035167694091797, "step": 6560 }, { "epoch": 3.0, "learning_rate": 2.53592561284869e-10, "logits/chosen": -1.4910657405853271, "logits/rejected": -1.1942976713180542, "logps/chosen": -152.2435760498047, "logps/rejected": -353.9536437988281, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/chosen": -5.943681716918945, "rewards/margins": 22.530424118041992, "rewards/rejected": -28.474105834960938, "step": 6570 }, { "epoch": 3.0, "step": 6573, "total_flos": 0.0, "train_loss": 0.026962732661432107, "train_runtime": 14195.2291, "train_samples_per_second": 29.63, "train_steps_per_second": 0.463 } ], "logging_steps": 10, "max_steps": 6573, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }