{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1185, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.7777777777777776e-09, "logits/chosen": -2.251155138015747, "logits/rejected": -2.290525197982788, "logps/chosen": -10.782907485961914, "logps/rejected": -20.26141357421875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 5.555555555555555e-09, "logits/chosen": -2.3156614303588867, "logits/rejected": -2.3112926483154297, "logps/chosen": -8.906049728393555, "logps/rejected": -13.079424858093262, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.01, "learning_rate": 8.333333333333332e-09, "logits/chosen": -2.2933030128479004, "logits/rejected": -2.3070309162139893, "logps/chosen": -20.602537155151367, "logps/rejected": -8.019554138183594, "loss": 0.6941, "rewards/accuracies": 0.0, "rewards/chosen": -0.006184768863022327, "rewards/margins": -0.00855875015258789, "rewards/rejected": 0.002373981522396207, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.111111111111111e-08, "logits/chosen": -2.2357864379882812, "logits/rejected": -2.2234561443328857, "logps/chosen": -14.492926597595215, "logps/rejected": -11.480640411376953, "loss": 0.6903, "rewards/accuracies": 1.0, "rewards/chosen": 0.0146026611328125, "rewards/margins": 0.019470691680908203, "rewards/rejected": -0.004868030548095703, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.3888888888888889e-08, "logits/chosen": -2.4275574684143066, "logits/rejected": -2.4360222816467285, "logps/chosen": -10.13316535949707, "logps/rejected": -8.722558975219727, "loss": 0.6944, "rewards/accuracies": 0.0, "rewards/chosen": 0.009807586669921875, "rewards/margins": -0.00670023076236248, "rewards/rejected": 0.016507817432284355, "step": 5 }, { "epoch": 0.02, "learning_rate": 1.6666666666666664e-08, "logits/chosen": -2.399014472961426, "logits/rejected": -2.4101688861846924, "logps/chosen": -11.98112964630127, "logps/rejected": -8.969330787658691, "loss": 0.6925, "rewards/accuracies": 1.0, "rewards/chosen": 0.0010432243579998612, "rewards/margins": 0.0028240205720067024, "rewards/rejected": -0.0017807960975915194, "step": 6 }, { "epoch": 0.02, "learning_rate": 1.9444444444444445e-08, "logits/chosen": -2.3042173385620117, "logits/rejected": -2.297013521194458, "logps/chosen": -9.274126052856445, "logps/rejected": -12.779285430908203, "loss": 0.6973, "rewards/accuracies": 0.0, "rewards/chosen": -0.0021114349365234375, "rewards/margins": -0.010490513406693935, "rewards/rejected": 0.008379078470170498, "step": 7 }, { "epoch": 0.02, "learning_rate": 2.222222222222222e-08, "logits/chosen": -2.404820680618286, "logits/rejected": -2.4090590476989746, "logps/chosen": -9.351872444152832, "logps/rejected": -15.882179260253906, "loss": 0.6928, "rewards/accuracies": 0.0, "rewards/chosen": -0.012915420345962048, "rewards/margins": -0.019392967224121094, "rewards/rejected": 0.006477546878159046, "step": 8 }, { "epoch": 0.02, "learning_rate": 2.5e-08, "logits/chosen": -2.316859483718872, "logits/rejected": -2.4578850269317627, "logps/chosen": -11.231912612915039, "logps/rejected": -28.373615264892578, "loss": 0.6911, "rewards/accuracies": 0.0, "rewards/chosen": -0.0007831573602743447, "rewards/margins": -0.004547882359474897, "rewards/rejected": 0.00376472482457757, "step": 9 }, { "epoch": 0.03, "learning_rate": 2.7777777777777777e-08, "logits/chosen": -2.3539140224456787, "logits/rejected": -2.3642921447753906, "logps/chosen": -11.053009033203125, "logps/rejected": -9.935391426086426, "loss": 0.6974, "rewards/accuracies": 0.0, "rewards/chosen": -0.0036386491265147924, "rewards/margins": -0.009048747830092907, "rewards/rejected": 0.005410098936408758, "step": 10 }, { "epoch": 0.03, "learning_rate": 3.0555555555555556e-08, "logits/chosen": -2.297903299331665, "logits/rejected": -2.333979606628418, "logps/chosen": -20.888988494873047, "logps/rejected": -10.221244812011719, "loss": 0.6958, "rewards/accuracies": 1.0, "rewards/chosen": 0.0008787155384197831, "rewards/margins": 0.0005325317615643144, "rewards/rejected": 0.00034618377685546875, "step": 11 }, { "epoch": 0.03, "learning_rate": 3.333333333333333e-08, "logits/chosen": -2.284327983856201, "logits/rejected": -2.2842278480529785, "logps/chosen": -16.384443283081055, "logps/rejected": -10.148228645324707, "loss": 0.6909, "rewards/accuracies": 0.0, "rewards/chosen": 0.004810142796486616, "rewards/margins": -0.005770205985754728, "rewards/rejected": 0.010580348782241344, "step": 12 }, { "epoch": 0.03, "learning_rate": 3.6111111111111106e-08, "logits/chosen": -2.245131015777588, "logits/rejected": -2.2456161975860596, "logps/chosen": -9.487794876098633, "logps/rejected": -11.964010238647461, "loss": 0.6968, "rewards/accuracies": 0.0, "rewards/chosen": -0.007477378938347101, "rewards/margins": -0.009975242428481579, "rewards/rejected": 0.0024978637229651213, "step": 13 }, { "epoch": 0.04, "learning_rate": 3.888888888888889e-08, "logits/chosen": -2.32973051071167, "logits/rejected": -2.367626190185547, "logps/chosen": -8.745000839233398, "logps/rejected": -14.180999755859375, "loss": 0.6896, "rewards/accuracies": 0.0, "rewards/chosen": 9.403228614246473e-05, "rewards/margins": -0.008861160837113857, "rewards/rejected": 0.008955192752182484, "step": 14 }, { "epoch": 0.04, "learning_rate": 4.166666666666667e-08, "logits/chosen": -2.323753595352173, "logits/rejected": -2.3319687843322754, "logps/chosen": -16.87789535522461, "logps/rejected": -9.563054084777832, "loss": 0.6935, "rewards/accuracies": 0.0, "rewards/chosen": 0.0025482177734375, "rewards/margins": -0.005190849304199219, "rewards/rejected": 0.007739067077636719, "step": 15 }, { "epoch": 0.04, "learning_rate": 4.444444444444444e-08, "logits/chosen": -2.2659263610839844, "logits/rejected": -2.258920907974243, "logps/chosen": -10.178679466247559, "logps/rejected": -12.64559268951416, "loss": 0.6939, "rewards/accuracies": 0.0, "rewards/chosen": -0.011472797952592373, "rewards/margins": -0.012462520971894264, "rewards/rejected": 0.000989723252132535, "step": 16 }, { "epoch": 0.04, "learning_rate": 4.722222222222222e-08, "logits/chosen": -2.3194334506988525, "logits/rejected": -2.3239552974700928, "logps/chosen": -10.912995338439941, "logps/rejected": -7.9347147941589355, "loss": 0.6912, "rewards/accuracies": 0.0, "rewards/chosen": -0.0033225060906261206, "rewards/margins": -0.002136373659595847, "rewards/rejected": -0.0011861324310302734, "step": 17 }, { "epoch": 0.05, "learning_rate": 5e-08, "logits/chosen": -2.222842216491699, "logits/rejected": -2.2079272270202637, "logps/chosen": -13.186687469482422, "logps/rejected": -9.008330345153809, "loss": 0.6934, "rewards/accuracies": 0.0, "rewards/chosen": 0.0040111541748046875, "rewards/margins": -0.004067706875503063, "rewards/rejected": 0.00807886105030775, "step": 18 }, { "epoch": 0.05, "learning_rate": 5.2777777777777776e-08, "logits/chosen": -2.313772439956665, "logits/rejected": -2.3616647720336914, "logps/chosen": -9.962752342224121, "logps/rejected": -17.478290557861328, "loss": 0.6948, "rewards/accuracies": 0.0, "rewards/chosen": 0.0003853797970805317, "rewards/margins": -0.013519954867661, "rewards/rejected": 0.013905334286391735, "step": 19 }, { "epoch": 0.05, "learning_rate": 5.5555555555555555e-08, "logits/chosen": -2.340240478515625, "logits/rejected": -2.3382697105407715, "logps/chosen": -7.785999774932861, "logps/rejected": -14.24544906616211, "loss": 0.6918, "rewards/accuracies": 1.0, "rewards/chosen": 0.012238931842148304, "rewards/margins": 0.014431810937821865, "rewards/rejected": -0.0021928788628429174, "step": 20 }, { "epoch": 0.05, "learning_rate": 5.833333333333333e-08, "logits/chosen": -2.4015228748321533, "logits/rejected": -2.4659464359283447, "logps/chosen": -11.977352142333984, "logps/rejected": -60.278194427490234, "loss": 0.6895, "rewards/accuracies": 1.0, "rewards/chosen": 0.01577158086001873, "rewards/margins": 0.009728718549013138, "rewards/rejected": 0.006042861845344305, "step": 21 }, { "epoch": 0.06, "learning_rate": 6.111111111111111e-08, "logits/chosen": -2.2287230491638184, "logits/rejected": -2.2383711338043213, "logps/chosen": -16.117895126342773, "logps/rejected": -10.509078979492188, "loss": 0.6889, "rewards/accuracies": 1.0, "rewards/chosen": 0.01761798933148384, "rewards/margins": 0.01485433615744114, "rewards/rejected": 0.002763652941212058, "step": 22 }, { "epoch": 0.06, "learning_rate": 6.388888888888888e-08, "logits/chosen": -2.2411246299743652, "logits/rejected": -2.2347028255462646, "logps/chosen": -10.085240364074707, "logps/rejected": -14.151752471923828, "loss": 0.6934, "rewards/accuracies": 0.0, "rewards/chosen": 0.0019618987571448088, "rewards/margins": -0.010255050845444202, "rewards/rejected": 0.012216949835419655, "step": 23 }, { "epoch": 0.06, "learning_rate": 6.666666666666665e-08, "logits/chosen": -2.2314107418060303, "logits/rejected": -2.245650053024292, "logps/chosen": -12.037229537963867, "logps/rejected": -20.54799461364746, "loss": 0.6902, "rewards/accuracies": 0.0, "rewards/chosen": -0.007698822300881147, "rewards/margins": -0.004960251040756702, "rewards/rejected": -0.002738571260124445, "step": 24 }, { "epoch": 0.06, "learning_rate": 6.944444444444444e-08, "logits/chosen": -2.3739867210388184, "logits/rejected": -2.4022092819213867, "logps/chosen": -13.848346710205078, "logps/rejected": -18.15325927734375, "loss": 0.6878, "rewards/accuracies": 1.0, "rewards/chosen": 0.014647865667939186, "rewards/margins": 0.023394012823700905, "rewards/rejected": -0.008746147155761719, "step": 25 }, { "epoch": 0.07, "learning_rate": 7.222222222222221e-08, "logits/chosen": -2.3042447566986084, "logits/rejected": -2.3081510066986084, "logps/chosen": -11.33069896697998, "logps/rejected": -9.452447891235352, "loss": 0.6923, "rewards/accuracies": 1.0, "rewards/chosen": 0.013450145721435547, "rewards/margins": 0.004664135165512562, "rewards/rejected": 0.008786010555922985, "step": 26 }, { "epoch": 0.07, "learning_rate": 7.5e-08, "logits/chosen": -2.1975603103637695, "logits/rejected": -2.1902077198028564, "logps/chosen": -7.389289379119873, "logps/rejected": -14.604959487915039, "loss": 0.6974, "rewards/accuracies": 0.0, "rewards/chosen": -0.0017209053039550781, "rewards/margins": -0.020051384344697, "rewards/rejected": 0.01833047904074192, "step": 27 }, { "epoch": 0.07, "learning_rate": 7.777777777777778e-08, "logits/chosen": -2.228060245513916, "logits/rejected": -2.223344326019287, "logps/chosen": -10.184532165527344, "logps/rejected": -12.421089172363281, "loss": 0.6918, "rewards/accuracies": 1.0, "rewards/chosen": 0.021340180188417435, "rewards/margins": 0.02054758183658123, "rewards/rejected": 0.0007925987592898309, "step": 28 }, { "epoch": 0.07, "learning_rate": 8.055555555555555e-08, "logits/chosen": -2.310373306274414, "logits/rejected": -2.457501173019409, "logps/chosen": -10.028726577758789, "logps/rejected": -38.69629669189453, "loss": 0.6985, "rewards/accuracies": 0.0, "rewards/chosen": 0.0020967484451830387, "rewards/margins": -0.012630272656679153, "rewards/rejected": 0.014727020636200905, "step": 29 }, { "epoch": 0.08, "learning_rate": 8.333333333333334e-08, "logits/chosen": -2.2626585960388184, "logits/rejected": -2.280324697494507, "logps/chosen": -9.523645401000977, "logps/rejected": -12.855677604675293, "loss": 0.696, "rewards/accuracies": 0.0, "rewards/chosen": 0.011544418521225452, "rewards/margins": -9.317416697740555e-05, "rewards/rejected": 0.011637592688202858, "step": 30 }, { "epoch": 0.08, "learning_rate": 8.611111111111111e-08, "logits/chosen": -2.2047688961029053, "logits/rejected": -2.195566177368164, "logps/chosen": -8.019308090209961, "logps/rejected": -15.95208740234375, "loss": 0.6895, "rewards/accuracies": 1.0, "rewards/chosen": 0.018977642059326172, "rewards/margins": 0.008568095974624157, "rewards/rejected": 0.010409546084702015, "step": 31 }, { "epoch": 0.08, "learning_rate": 8.888888888888888e-08, "logits/chosen": -2.1517953872680664, "logits/rejected": -2.16373348236084, "logps/chosen": -25.792665481567383, "logps/rejected": -19.515846252441406, "loss": 0.6922, "rewards/accuracies": 1.0, "rewards/chosen": 0.01716175116598606, "rewards/margins": 0.01131897047162056, "rewards/rejected": 0.005842781160026789, "step": 32 }, { "epoch": 0.08, "learning_rate": 9.166666666666665e-08, "logits/chosen": -2.328913450241089, "logits/rejected": -2.3349523544311523, "logps/chosen": -10.255416870117188, "logps/rejected": -11.075268745422363, "loss": 0.6907, "rewards/accuracies": 1.0, "rewards/chosen": 0.020607758313417435, "rewards/margins": 0.0037012100219726562, "rewards/rejected": 0.01690654829144478, "step": 33 }, { "epoch": 0.09, "learning_rate": 9.444444444444444e-08, "logits/chosen": -2.3008005619049072, "logits/rejected": -2.348806858062744, "logps/chosen": -9.137683868408203, "logps/rejected": -23.968210220336914, "loss": 0.689, "rewards/accuracies": 1.0, "rewards/chosen": 0.011859036050736904, "rewards/margins": 0.03205766901373863, "rewards/rejected": -0.020198632031679153, "step": 34 }, { "epoch": 0.09, "learning_rate": 9.722222222222221e-08, "logits/chosen": -2.384718894958496, "logits/rejected": -2.3831288814544678, "logps/chosen": -7.260378360748291, "logps/rejected": -8.924722671508789, "loss": 0.6959, "rewards/accuracies": 0.0, "rewards/chosen": 0.005785846617072821, "rewards/margins": -0.0025912285782396793, "rewards/rejected": 0.0083770751953125, "step": 35 }, { "epoch": 0.09, "learning_rate": 1e-07, "logits/chosen": -2.2777934074401855, "logits/rejected": -2.2848429679870605, "logps/chosen": -15.27165699005127, "logps/rejected": -10.042055130004883, "loss": 0.6954, "rewards/accuracies": 1.0, "rewards/chosen": 0.021575450897216797, "rewards/margins": 0.011199283413589, "rewards/rejected": 0.010376167483627796, "step": 36 }, { "epoch": 0.09, "learning_rate": 9.999981310424979e-08, "logits/chosen": -2.258793830871582, "logits/rejected": -2.267273187637329, "logps/chosen": -9.347782135009766, "logps/rejected": -8.00654125213623, "loss": 0.6969, "rewards/accuracies": 0.0, "rewards/chosen": 0.009143448434770107, "rewards/margins": -0.012005710043013096, "rewards/rejected": 0.021149158477783203, "step": 37 }, { "epoch": 0.1, "learning_rate": 9.999925241839639e-08, "logits/chosen": -2.4075210094451904, "logits/rejected": -2.407179117202759, "logps/chosen": -8.593279838562012, "logps/rejected": -12.576166152954102, "loss": 0.6953, "rewards/accuracies": 0.0, "rewards/chosen": 0.0042213439010083675, "rewards/margins": -0.032406046986579895, "rewards/rejected": 0.0366273894906044, "step": 38 }, { "epoch": 0.1, "learning_rate": 9.999831794663139e-08, "logits/chosen": -2.2663564682006836, "logits/rejected": -2.2679898738861084, "logps/chosen": -6.470956802368164, "logps/rejected": -14.59794807434082, "loss": 0.6855, "rewards/accuracies": 0.0, "rewards/chosen": 0.017479801550507545, "rewards/margins": -0.002501200884580612, "rewards/rejected": 0.019981002435088158, "step": 39 }, { "epoch": 0.1, "learning_rate": 9.999700969594072e-08, "logits/chosen": -2.373479127883911, "logits/rejected": -2.373305559158325, "logps/chosen": -8.469653129577637, "logps/rejected": -11.079957962036133, "loss": 0.6946, "rewards/accuracies": 1.0, "rewards/chosen": 0.019240474328398705, "rewards/margins": 0.013956736773252487, "rewards/rejected": 0.00528373708948493, "step": 40 }, { "epoch": 0.1, "learning_rate": 9.999532767610463e-08, "logits/chosen": -2.2897374629974365, "logits/rejected": -2.2778537273406982, "logps/chosen": -7.845694065093994, "logps/rejected": -15.224809646606445, "loss": 0.6889, "rewards/accuracies": 1.0, "rewards/chosen": 0.030341004952788353, "rewards/margins": 0.024761628359556198, "rewards/rejected": 0.0055793761275708675, "step": 41 }, { "epoch": 0.11, "learning_rate": 9.999327189969766e-08, "logits/chosen": -2.3190882205963135, "logits/rejected": -2.326948881149292, "logps/chosen": -9.577224731445312, "logps/rejected": -11.122310638427734, "loss": 0.6938, "rewards/accuracies": 0.0, "rewards/chosen": 0.017658425495028496, "rewards/margins": -0.012121200561523438, "rewards/rejected": 0.029779626056551933, "step": 42 }, { "epoch": 0.11, "learning_rate": 9.999084238208843e-08, "logits/chosen": -2.3033673763275146, "logits/rejected": -2.3173880577087402, "logps/chosen": -10.61982536315918, "logps/rejected": -12.593499183654785, "loss": 0.6997, "rewards/accuracies": 0.0, "rewards/chosen": 0.017891883850097656, "rewards/margins": -0.019423771649599075, "rewards/rejected": 0.03731565549969673, "step": 43 }, { "epoch": 0.11, "learning_rate": 9.998803914143958e-08, "logits/chosen": -2.445028066635132, "logits/rejected": -2.441214084625244, "logps/chosen": -10.911981582641602, "logps/rejected": -8.203333854675293, "loss": 0.6919, "rewards/accuracies": 0.0, "rewards/chosen": 0.02522888220846653, "rewards/margins": -0.005054568871855736, "rewards/rejected": 0.030283451080322266, "step": 44 }, { "epoch": 0.11, "learning_rate": 9.998486219870768e-08, "logits/chosen": -2.405484914779663, "logits/rejected": -2.401054859161377, "logps/chosen": -9.220285415649414, "logps/rejected": -11.2347993850708, "loss": 0.6935, "rewards/accuracies": 0.0, "rewards/chosen": 0.025310611352324486, "rewards/margins": -0.002899361774325371, "rewards/rejected": 0.028209973126649857, "step": 45 }, { "epoch": 0.12, "learning_rate": 9.998131157764299e-08, "logits/chosen": -2.2933576107025146, "logits/rejected": -2.2848641872406006, "logps/chosen": -8.330476760864258, "logps/rejected": -12.087732315063477, "loss": 0.692, "rewards/accuracies": 1.0, "rewards/chosen": 0.037008192390203476, "rewards/margins": 0.008808804675936699, "rewards/rejected": 0.028199387714266777, "step": 46 }, { "epoch": 0.12, "learning_rate": 9.997738730478937e-08, "logits/chosen": -2.3420042991638184, "logits/rejected": -2.3499135971069336, "logps/chosen": -9.528127670288086, "logps/rejected": -9.760770797729492, "loss": 0.6923, "rewards/accuracies": 1.0, "rewards/chosen": 0.04446430131793022, "rewards/margins": 0.01138439029455185, "rewards/rejected": 0.03307991102337837, "step": 47 }, { "epoch": 0.12, "learning_rate": 9.997308940948403e-08, "logits/chosen": -2.2878506183624268, "logits/rejected": -2.3432788848876953, "logps/chosen": -12.468029022216797, "logps/rejected": -16.389095306396484, "loss": 0.6894, "rewards/accuracies": 0.0, "rewards/chosen": 0.02616109885275364, "rewards/margins": -0.0019085891544818878, "rewards/rejected": 0.028069688007235527, "step": 48 }, { "epoch": 0.12, "learning_rate": 9.996841792385728e-08, "logits/chosen": -2.209237575531006, "logits/rejected": -2.2137858867645264, "logps/chosen": -12.089640617370605, "logps/rejected": -8.6370267868042, "loss": 0.6946, "rewards/accuracies": 1.0, "rewards/chosen": 0.039658453315496445, "rewards/margins": 0.00021324306726455688, "rewards/rejected": 0.03944521024823189, "step": 49 }, { "epoch": 0.13, "learning_rate": 9.996337288283235e-08, "logits/chosen": -2.365189790725708, "logits/rejected": -2.368232250213623, "logps/chosen": -10.239635467529297, "logps/rejected": -8.128108978271484, "loss": 0.6923, "rewards/accuracies": 0.0, "rewards/chosen": 0.040624428540468216, "rewards/margins": -0.0037420280277729034, "rewards/rejected": 0.04436645656824112, "step": 50 }, { "epoch": 0.13, "learning_rate": 9.995795432412513e-08, "logits/chosen": -2.2657971382141113, "logits/rejected": -2.2674458026885986, "logps/chosen": -14.102256774902344, "logps/rejected": -10.162593841552734, "loss": 0.6921, "rewards/accuracies": 0.0, "rewards/chosen": 0.03197517618536949, "rewards/margins": -0.027306556701660156, "rewards/rejected": 0.05928173288702965, "step": 51 }, { "epoch": 0.13, "learning_rate": 9.995216228824383e-08, "logits/chosen": -2.2221310138702393, "logits/rejected": -2.2223565578460693, "logps/chosen": -11.075093269348145, "logps/rejected": -9.620279312133789, "loss": 0.6849, "rewards/accuracies": 1.0, "rewards/chosen": 0.04625396803021431, "rewards/margins": 0.018206214532256126, "rewards/rejected": 0.028047753497958183, "step": 52 }, { "epoch": 0.13, "learning_rate": 9.994599681848872e-08, "logits/chosen": -2.2892096042633057, "logits/rejected": -2.2831053733825684, "logps/chosen": -8.493809700012207, "logps/rejected": -12.282792091369629, "loss": 0.6882, "rewards/accuracies": 1.0, "rewards/chosen": 0.05215158686041832, "rewards/margins": 0.02818289026618004, "rewards/rejected": 0.02396869659423828, "step": 53 }, { "epoch": 0.14, "learning_rate": 9.993945796095182e-08, "logits/chosen": -2.3301384449005127, "logits/rejected": -2.3254809379577637, "logps/chosen": -8.311911582946777, "logps/rejected": -7.76749849319458, "loss": 0.6892, "rewards/accuracies": 0.0, "rewards/chosen": 0.043358899652957916, "rewards/margins": -0.008824203163385391, "rewards/rejected": 0.05218310281634331, "step": 54 }, { "epoch": 0.14, "learning_rate": 9.993254576451651e-08, "logits/chosen": -2.3386824131011963, "logits/rejected": -2.3437559604644775, "logps/chosen": -10.771146774291992, "logps/rejected": -9.599311828613281, "loss": 0.6983, "rewards/accuracies": 1.0, "rewards/chosen": 0.08056249469518661, "rewards/margins": 0.018746469169855118, "rewards/rejected": 0.0618160255253315, "step": 55 }, { "epoch": 0.14, "learning_rate": 9.992526028085719e-08, "logits/chosen": -2.3995988368988037, "logits/rejected": -2.405921697616577, "logps/chosen": -7.2163896560668945, "logps/rejected": -18.467792510986328, "loss": 0.7005, "rewards/accuracies": 0.0, "rewards/chosen": 0.056181956082582474, "rewards/margins": -0.013443712145090103, "rewards/rejected": 0.06962566822767258, "step": 56 }, { "epoch": 0.14, "learning_rate": 9.991760156443891e-08, "logits/chosen": -2.3563780784606934, "logits/rejected": -2.3700294494628906, "logps/chosen": -12.483226776123047, "logps/rejected": -17.985464096069336, "loss": 0.6949, "rewards/accuracies": 0.0, "rewards/chosen": 0.02932615391910076, "rewards/margins": -0.018426990136504173, "rewards/rejected": 0.047753144055604935, "step": 57 }, { "epoch": 0.15, "learning_rate": 9.990956967251691e-08, "logits/chosen": -2.3553686141967773, "logits/rejected": -2.4264047145843506, "logps/chosen": -8.145747184753418, "logps/rejected": -31.78484535217285, "loss": 0.6924, "rewards/accuracies": 1.0, "rewards/chosen": 0.0669245719909668, "rewards/margins": 0.026976488530635834, "rewards/rejected": 0.03994808346033096, "step": 58 }, { "epoch": 0.15, "learning_rate": 9.990116466513628e-08, "logits/chosen": -2.2811779975891113, "logits/rejected": -2.2770471572875977, "logps/chosen": -13.650777816772461, "logps/rejected": -8.854363441467285, "loss": 0.6886, "rewards/accuracies": 1.0, "rewards/chosen": 0.04131946712732315, "rewards/margins": 0.008516024798154831, "rewards/rejected": 0.03280344232916832, "step": 59 }, { "epoch": 0.15, "learning_rate": 9.989238660513139e-08, "logits/chosen": -2.3481178283691406, "logits/rejected": -2.3402676582336426, "logps/chosen": -6.687458515167236, "logps/rejected": -12.664064407348633, "loss": 0.6829, "rewards/accuracies": 1.0, "rewards/chosen": 0.07337222248315811, "rewards/margins": 0.007370710372924805, "rewards/rejected": 0.0660015121102333, "step": 60 }, { "epoch": 0.15, "learning_rate": 9.988323555812556e-08, "logits/chosen": -2.3630034923553467, "logits/rejected": -2.3759443759918213, "logps/chosen": -8.776424407958984, "logps/rejected": -7.585143566131592, "loss": 0.6786, "rewards/accuracies": 1.0, "rewards/chosen": 0.09770336002111435, "rewards/margins": 0.025303125381469727, "rewards/rejected": 0.07240023463964462, "step": 61 }, { "epoch": 0.16, "learning_rate": 9.987371159253045e-08, "logits/chosen": -2.290987730026245, "logits/rejected": -2.2802364826202393, "logps/chosen": -8.246307373046875, "logps/rejected": -14.845922470092773, "loss": 0.6899, "rewards/accuracies": 1.0, "rewards/chosen": 0.07657299190759659, "rewards/margins": 0.02967701107263565, "rewards/rejected": 0.04689598083496094, "step": 62 }, { "epoch": 0.16, "learning_rate": 9.98638147795456e-08, "logits/chosen": -2.3162119388580322, "logits/rejected": -2.3328897953033447, "logps/chosen": -11.033806800842285, "logps/rejected": -12.615653991699219, "loss": 0.6927, "rewards/accuracies": 1.0, "rewards/chosen": 0.055504512041807175, "rewards/margins": 0.020941734313964844, "rewards/rejected": 0.03456277772784233, "step": 63 }, { "epoch": 0.16, "learning_rate": 9.985354519315789e-08, "logits/chosen": -2.3995749950408936, "logits/rejected": -2.3815410137176514, "logps/chosen": -15.25584602355957, "logps/rejected": -13.785385131835938, "loss": 0.6859, "rewards/accuracies": 1.0, "rewards/chosen": 0.05100612714886665, "rewards/margins": 0.014096450060606003, "rewards/rejected": 0.03690967708826065, "step": 64 }, { "epoch": 0.16, "learning_rate": 9.984290291014104e-08, "logits/chosen": -2.4213905334472656, "logits/rejected": -2.418506622314453, "logps/chosen": -9.262581825256348, "logps/rejected": -8.607918739318848, "loss": 0.6811, "rewards/accuracies": 1.0, "rewards/chosen": 0.07333364337682724, "rewards/margins": 0.01247396320104599, "rewards/rejected": 0.06085968017578125, "step": 65 }, { "epoch": 0.17, "learning_rate": 9.983188801005491e-08, "logits/chosen": -2.281540632247925, "logits/rejected": -2.2869515419006348, "logps/chosen": -11.493328094482422, "logps/rejected": -14.484272003173828, "loss": 0.6907, "rewards/accuracies": 1.0, "rewards/chosen": 0.08212051540613174, "rewards/margins": 0.03359174728393555, "rewards/rejected": 0.0485287681221962, "step": 66 }, { "epoch": 0.17, "learning_rate": 9.982050057524504e-08, "logits/chosen": -2.321901559829712, "logits/rejected": -2.3079512119293213, "logps/chosen": -17.171924591064453, "logps/rejected": -8.322056770324707, "loss": 0.6867, "rewards/accuracies": 0.0, "rewards/chosen": 0.056252289563417435, "rewards/margins": -0.02923755720257759, "rewards/rejected": 0.08548984676599503, "step": 67 }, { "epoch": 0.17, "learning_rate": 9.980874069084195e-08, "logits/chosen": -2.4112558364868164, "logits/rejected": -2.4044647216796875, "logps/chosen": -7.031162261962891, "logps/rejected": -13.763116836547852, "loss": 0.6892, "rewards/accuracies": 0.0, "rewards/chosen": 0.07550058513879776, "rewards/margins": -0.025150679051876068, "rewards/rejected": 0.10065126419067383, "step": 68 }, { "epoch": 0.17, "learning_rate": 9.979660844476055e-08, "logits/chosen": -2.320528745651245, "logits/rejected": -2.313854694366455, "logps/chosen": -8.5520658493042, "logps/rejected": -12.9886474609375, "loss": 0.6966, "rewards/accuracies": 0.0, "rewards/chosen": 0.07935638725757599, "rewards/margins": -0.009007073938846588, "rewards/rejected": 0.08836346119642258, "step": 69 }, { "epoch": 0.18, "learning_rate": 9.978410392769942e-08, "logits/chosen": -2.273944854736328, "logits/rejected": -2.2621872425079346, "logps/chosen": -7.219916343688965, "logps/rejected": -9.739103317260742, "loss": 0.6835, "rewards/accuracies": 0.0, "rewards/chosen": 0.09610023349523544, "rewards/margins": -0.0017921477556228638, "rewards/rejected": 0.0978923812508583, "step": 70 }, { "epoch": 0.18, "learning_rate": 9.977122723314024e-08, "logits/chosen": -2.4012231826782227, "logits/rejected": -2.4005186557769775, "logps/chosen": -7.496364593505859, "logps/rejected": -11.709842681884766, "loss": 0.6968, "rewards/accuracies": 0.0, "rewards/chosen": 0.07882747799158096, "rewards/margins": -0.0297725647687912, "rewards/rejected": 0.10860004276037216, "step": 71 }, { "epoch": 0.18, "learning_rate": 9.975797845734697e-08, "logits/chosen": -2.3897013664245605, "logits/rejected": -2.397162914276123, "logps/chosen": -10.268528938293457, "logps/rejected": -6.465458393096924, "loss": 0.6799, "rewards/accuracies": 0.0, "rewards/chosen": 0.10178051143884659, "rewards/margins": -6.720423698425293e-06, "rewards/rejected": 0.10178723186254501, "step": 72 }, { "epoch": 0.18, "learning_rate": 9.974435769936521e-08, "logits/chosen": -2.385056495666504, "logits/rejected": -2.3882510662078857, "logps/chosen": -8.509510040283203, "logps/rejected": -6.572999000549316, "loss": 0.6836, "rewards/accuracies": 1.0, "rewards/chosen": 0.13900451362133026, "rewards/margins": 0.051112644374370575, "rewards/rejected": 0.08789186924695969, "step": 73 }, { "epoch": 0.19, "learning_rate": 9.973036506102143e-08, "logits/chosen": -2.2648138999938965, "logits/rejected": -2.257382869720459, "logps/chosen": -7.879438877105713, "logps/rejected": -14.408223152160645, "loss": 0.6958, "rewards/accuracies": 0.0, "rewards/chosen": 0.1146996021270752, "rewards/margins": -0.02904801070690155, "rewards/rejected": 0.14374761283397675, "step": 74 }, { "epoch": 0.19, "learning_rate": 9.97160006469222e-08, "logits/chosen": -2.274707317352295, "logits/rejected": -2.2733700275421143, "logps/chosen": -14.691344261169434, "logps/rejected": -11.639968872070312, "loss": 0.7028, "rewards/accuracies": 0.0, "rewards/chosen": 0.026598453521728516, "rewards/margins": -0.05607175827026367, "rewards/rejected": 0.08267021179199219, "step": 75 }, { "epoch": 0.19, "learning_rate": 9.970126456445347e-08, "logits/chosen": -2.4446263313293457, "logits/rejected": -2.445239543914795, "logps/chosen": -9.30801773071289, "logps/rejected": -11.824153900146484, "loss": 0.6883, "rewards/accuracies": 0.0, "rewards/chosen": 0.0892181396484375, "rewards/margins": -0.004361249506473541, "rewards/rejected": 0.09357938915491104, "step": 76 }, { "epoch": 0.19, "learning_rate": 9.968615692377967e-08, "logits/chosen": -2.3749377727508545, "logits/rejected": -2.385051727294922, "logps/chosen": -9.142724990844727, "logps/rejected": -7.792050838470459, "loss": 0.6775, "rewards/accuracies": 1.0, "rewards/chosen": 0.12743473052978516, "rewards/margins": 0.033888764679431915, "rewards/rejected": 0.09354596585035324, "step": 77 }, { "epoch": 0.2, "learning_rate": 9.967067783784296e-08, "logits/chosen": -2.29624080657959, "logits/rejected": -2.2811450958251953, "logps/chosen": -8.178417205810547, "logps/rejected": -13.68399429321289, "loss": 0.7008, "rewards/accuracies": 0.0, "rewards/chosen": 0.055104732513427734, "rewards/margins": -0.06831426918506622, "rewards/rejected": 0.12341900169849396, "step": 78 }, { "epoch": 0.2, "learning_rate": 9.965482742236233e-08, "logits/chosen": -2.321242332458496, "logits/rejected": -2.330425500869751, "logps/chosen": -12.599666595458984, "logps/rejected": -7.071631908416748, "loss": 0.6726, "rewards/accuracies": 1.0, "rewards/chosen": 0.131103515625, "rewards/margins": 0.01788334548473358, "rewards/rejected": 0.11322017014026642, "step": 79 }, { "epoch": 0.2, "learning_rate": 9.963860579583283e-08, "logits/chosen": -2.3321149349212646, "logits/rejected": -2.3273234367370605, "logps/chosen": -13.02752685546875, "logps/rejected": -6.677356243133545, "loss": 0.6771, "rewards/accuracies": 1.0, "rewards/chosen": 0.11950540542602539, "rewards/margins": 0.012194298207759857, "rewards/rejected": 0.10731110721826553, "step": 80 }, { "epoch": 0.21, "learning_rate": 9.962201307952455e-08, "logits/chosen": -2.349937677383423, "logits/rejected": -2.3546178340911865, "logps/chosen": -6.5398783683776855, "logps/rejected": -15.333742141723633, "loss": 0.6789, "rewards/accuracies": 1.0, "rewards/chosen": 0.13412271440029144, "rewards/margins": 0.044866614043712616, "rewards/rejected": 0.08925610035657883, "step": 81 }, { "epoch": 0.21, "learning_rate": 9.960504939748183e-08, "logits/chosen": -2.297102451324463, "logits/rejected": -2.299778938293457, "logps/chosen": -14.065062522888184, "logps/rejected": -11.87783432006836, "loss": 0.6822, "rewards/accuracies": 1.0, "rewards/chosen": 0.13335256278514862, "rewards/margins": 0.027179233729839325, "rewards/rejected": 0.1061733290553093, "step": 82 }, { "epoch": 0.21, "learning_rate": 9.958771487652229e-08, "logits/chosen": -2.3084757328033447, "logits/rejected": -2.369929313659668, "logps/chosen": -13.505768775939941, "logps/rejected": -16.122526168823242, "loss": 0.679, "rewards/accuracies": 1.0, "rewards/chosen": 0.09060239791870117, "rewards/margins": 0.03444013372063637, "rewards/rejected": 0.056162264198064804, "step": 83 }, { "epoch": 0.21, "learning_rate": 9.957000964623584e-08, "logits/chosen": -2.2419540882110596, "logits/rejected": -2.245893955230713, "logps/chosen": -11.629585266113281, "logps/rejected": -6.926548480987549, "loss": 0.697, "rewards/accuracies": 0.0, "rewards/chosen": 0.12380485981702805, "rewards/margins": -0.013680122792720795, "rewards/rejected": 0.13748498260974884, "step": 84 }, { "epoch": 0.22, "learning_rate": 9.955193383898375e-08, "logits/chosen": -2.275421380996704, "logits/rejected": -2.2869162559509277, "logps/chosen": -6.962718486785889, "logps/rejected": -24.157032012939453, "loss": 0.6727, "rewards/accuracies": 1.0, "rewards/chosen": 0.16896148025989532, "rewards/margins": 0.03089919686317444, "rewards/rejected": 0.1380622833967209, "step": 85 }, { "epoch": 0.22, "learning_rate": 9.953348758989774e-08, "logits/chosen": -2.364327907562256, "logits/rejected": -2.3961331844329834, "logps/chosen": -6.074333190917969, "logps/rejected": -22.41566276550293, "loss": 0.6955, "rewards/accuracies": 1.0, "rewards/chosen": 0.12964411079883575, "rewards/margins": 0.054119013249874115, "rewards/rejected": 0.07552509754896164, "step": 86 }, { "epoch": 0.22, "learning_rate": 9.951467103687878e-08, "logits/chosen": -2.358398675918579, "logits/rejected": -2.366637706756592, "logps/chosen": -8.930864334106445, "logps/rejected": -6.957666397094727, "loss": 0.6774, "rewards/accuracies": 1.0, "rewards/chosen": 0.2121165245771408, "rewards/margins": 0.068640798330307, "rewards/rejected": 0.1434757262468338, "step": 87 }, { "epoch": 0.22, "learning_rate": 9.949548432059627e-08, "logits/chosen": -2.234553337097168, "logits/rejected": -2.2414472103118896, "logps/chosen": -8.131969451904297, "logps/rejected": -10.069925308227539, "loss": 0.6597, "rewards/accuracies": 1.0, "rewards/chosen": 0.1568155288696289, "rewards/margins": 0.05143146216869354, "rewards/rejected": 0.10538406670093536, "step": 88 }, { "epoch": 0.23, "learning_rate": 9.94759275844868e-08, "logits/chosen": -2.321408271789551, "logits/rejected": -2.322737455368042, "logps/chosen": -11.386994361877441, "logps/rejected": -7.7682013511657715, "loss": 0.6832, "rewards/accuracies": 1.0, "rewards/chosen": 0.18223229050636292, "rewards/margins": 0.039091065526008606, "rewards/rejected": 0.1431412249803543, "step": 89 }, { "epoch": 0.23, "learning_rate": 9.945600097475321e-08, "logits/chosen": -2.154876232147217, "logits/rejected": -2.1611087322235107, "logps/chosen": -5.6735382080078125, "logps/rejected": -14.151209831237793, "loss": 0.6899, "rewards/accuracies": 1.0, "rewards/chosen": 0.15440630912780762, "rewards/margins": 0.002858772873878479, "rewards/rejected": 0.15154753625392914, "step": 90 }, { "epoch": 0.23, "learning_rate": 9.943570464036346e-08, "logits/chosen": -2.3191275596618652, "logits/rejected": -2.3116331100463867, "logps/chosen": -7.667213439941406, "logps/rejected": -15.116925239562988, "loss": 0.6844, "rewards/accuracies": 0.0, "rewards/chosen": 0.15812216699123383, "rewards/margins": -0.02414149045944214, "rewards/rejected": 0.18226365745067596, "step": 91 }, { "epoch": 0.23, "learning_rate": 9.941503873304948e-08, "logits/chosen": -2.3163650035858154, "logits/rejected": -2.306354284286499, "logps/chosen": -6.184035301208496, "logps/rejected": -12.396512031555176, "loss": 0.7089, "rewards/accuracies": 0.0, "rewards/chosen": 0.13709627091884613, "rewards/margins": -0.06034129858016968, "rewards/rejected": 0.1974375694990158, "step": 92 }, { "epoch": 0.24, "learning_rate": 9.939400340730611e-08, "logits/chosen": -2.418696880340576, "logits/rejected": -2.416820526123047, "logps/chosen": -8.644186973571777, "logps/rejected": -16.93317413330078, "loss": 0.6824, "rewards/accuracies": 0.0, "rewards/chosen": 0.10516462475061417, "rewards/margins": -0.08320837467908859, "rewards/rejected": 0.18837299942970276, "step": 93 }, { "epoch": 0.24, "learning_rate": 9.937259882038984e-08, "logits/chosen": -2.3100626468658447, "logits/rejected": -2.3106935024261475, "logps/chosen": -10.803244590759277, "logps/rejected": -8.474474906921387, "loss": 0.6872, "rewards/accuracies": 1.0, "rewards/chosen": 0.1580028533935547, "rewards/margins": 0.02233286201953888, "rewards/rejected": 0.1356699913740158, "step": 94 }, { "epoch": 0.24, "learning_rate": 9.935082513231774e-08, "logits/chosen": -2.240180253982544, "logits/rejected": -2.2451658248901367, "logps/chosen": -8.883211135864258, "logps/rejected": -8.442010879516602, "loss": 0.67, "rewards/accuracies": 1.0, "rewards/chosen": 0.19505099952220917, "rewards/margins": 0.0028711259365081787, "rewards/rejected": 0.192179873585701, "step": 95 }, { "epoch": 0.24, "learning_rate": 9.932868250586618e-08, "logits/chosen": -2.377664804458618, "logits/rejected": -2.383371353149414, "logps/chosen": -9.529404640197754, "logps/rejected": -13.763164520263672, "loss": 0.6553, "rewards/accuracies": 1.0, "rewards/chosen": 0.17246675491333008, "rewards/margins": 0.051703453063964844, "rewards/rejected": 0.12076330184936523, "step": 96 }, { "epoch": 0.25, "learning_rate": 9.930617110656969e-08, "logits/chosen": -2.2977116107940674, "logits/rejected": -2.3047213554382324, "logps/chosen": -8.127012252807617, "logps/rejected": -10.958481788635254, "loss": 0.6969, "rewards/accuracies": 0.0, "rewards/chosen": 0.1540699005126953, "rewards/margins": -0.015508368611335754, "rewards/rejected": 0.16957826912403107, "step": 97 }, { "epoch": 0.25, "learning_rate": 9.928329110271967e-08, "logits/chosen": -2.2578206062316895, "logits/rejected": -2.2483878135681152, "logps/chosen": -12.649738311767578, "logps/rejected": -10.197129249572754, "loss": 0.6818, "rewards/accuracies": 0.0, "rewards/chosen": 0.12448320537805557, "rewards/margins": -0.03963746875524521, "rewards/rejected": 0.16412067413330078, "step": 98 }, { "epoch": 0.25, "learning_rate": 9.926004266536312e-08, "logits/chosen": -2.3449270725250244, "logits/rejected": -2.3345260620117188, "logps/chosen": -6.593894004821777, "logps/rejected": -11.91490364074707, "loss": 0.6913, "rewards/accuracies": 1.0, "rewards/chosen": 0.1739497184753418, "rewards/margins": 0.005191415548324585, "rewards/rejected": 0.1687583029270172, "step": 99 }, { "epoch": 0.25, "learning_rate": 9.92364259683014e-08, "logits/chosen": -2.313767671585083, "logits/rejected": -2.481663227081299, "logps/chosen": -7.4924540519714355, "logps/rejected": -24.800247192382812, "loss": 0.6495, "rewards/accuracies": 1.0, "rewards/chosen": 0.19359612464904785, "rewards/margins": 0.18517518043518066, "rewards/rejected": 0.008420944213867188, "step": 100 }, { "epoch": 0.26, "learning_rate": 9.921244118808896e-08, "logits/chosen": -2.3208534717559814, "logits/rejected": -2.3098530769348145, "logps/chosen": -6.33670711517334, "logps/rejected": -13.92251205444336, "loss": 0.7107, "rewards/accuracies": 0.0, "rewards/chosen": 0.16557936370372772, "rewards/margins": -0.11972491443157196, "rewards/rejected": 0.2853042781352997, "step": 101 }, { "epoch": 0.26, "learning_rate": 9.918808850403191e-08, "logits/chosen": -2.1488444805145264, "logits/rejected": -2.154360055923462, "logps/chosen": -9.952493667602539, "logps/rejected": -6.2892866134643555, "loss": 0.6775, "rewards/accuracies": 1.0, "rewards/chosen": 0.25112172961235046, "rewards/margins": 0.09563581645488739, "rewards/rejected": 0.15548591315746307, "step": 102 }, { "epoch": 0.26, "learning_rate": 9.916336809818677e-08, "logits/chosen": -2.0400068759918213, "logits/rejected": -2.059363842010498, "logps/chosen": -8.145963668823242, "logps/rejected": -10.451142311096191, "loss": 0.6556, "rewards/accuracies": 1.0, "rewards/chosen": 0.2394229918718338, "rewards/margins": 0.15277595818042755, "rewards/rejected": 0.08664703369140625, "step": 103 }, { "epoch": 0.26, "learning_rate": 9.913828015535912e-08, "logits/chosen": -2.419247627258301, "logits/rejected": -2.4119174480438232, "logps/chosen": -15.341346740722656, "logps/rejected": -12.569608688354492, "loss": 0.6772, "rewards/accuracies": 0.0, "rewards/chosen": 0.13003253936767578, "rewards/margins": -0.028047561645507812, "rewards/rejected": 0.1580801010131836, "step": 104 }, { "epoch": 0.27, "learning_rate": 9.911282486310213e-08, "logits/chosen": -2.3368308544158936, "logits/rejected": -2.3495352268218994, "logps/chosen": -18.056201934814453, "logps/rejected": -7.242648124694824, "loss": 0.6712, "rewards/accuracies": 1.0, "rewards/chosen": 0.2058860808610916, "rewards/margins": 0.06421041488647461, "rewards/rejected": 0.141675665974617, "step": 105 }, { "epoch": 0.27, "learning_rate": 9.908700241171525e-08, "logits/chosen": -2.3022446632385254, "logits/rejected": -2.307387590408325, "logps/chosen": -8.332282066345215, "logps/rejected": -7.77037239074707, "loss": 0.6697, "rewards/accuracies": 1.0, "rewards/chosen": 0.25433340668678284, "rewards/margins": 0.1159825325012207, "rewards/rejected": 0.13835087418556213, "step": 106 }, { "epoch": 0.27, "learning_rate": 9.906081299424274e-08, "logits/chosen": -2.31939959526062, "logits/rejected": -2.3223395347595215, "logps/chosen": -22.28997039794922, "logps/rejected": -18.9766845703125, "loss": 0.7226, "rewards/accuracies": 0.0, "rewards/chosen": 0.25091227889060974, "rewards/margins": -0.06259289383888245, "rewards/rejected": 0.3135051727294922, "step": 107 }, { "epoch": 0.27, "learning_rate": 9.903425680647223e-08, "logits/chosen": -2.244576930999756, "logits/rejected": -2.2510488033294678, "logps/chosen": -7.817705154418945, "logps/rejected": -7.859537601470947, "loss": 0.689, "rewards/accuracies": 1.0, "rewards/chosen": 0.24625340104103088, "rewards/margins": 0.044060662388801575, "rewards/rejected": 0.2021927386522293, "step": 108 }, { "epoch": 0.28, "learning_rate": 9.900733404693326e-08, "logits/chosen": -2.3133962154388428, "logits/rejected": -2.3439841270446777, "logps/chosen": -12.152213096618652, "logps/rejected": -13.265905380249023, "loss": 0.6535, "rewards/accuracies": 1.0, "rewards/chosen": 0.3097677230834961, "rewards/margins": 0.15168151259422302, "rewards/rejected": 0.15808621048927307, "step": 109 }, { "epoch": 0.28, "learning_rate": 9.89800449168958e-08, "logits/chosen": -2.28334379196167, "logits/rejected": -2.2756197452545166, "logps/chosen": -7.216150760650635, "logps/rejected": -11.241876602172852, "loss": 0.6984, "rewards/accuracies": 1.0, "rewards/chosen": 0.26003336906433105, "rewards/margins": 0.044960543513298035, "rewards/rejected": 0.21507282555103302, "step": 110 }, { "epoch": 0.28, "learning_rate": 9.895238962036878e-08, "logits/chosen": -2.3693323135375977, "logits/rejected": -2.3750040531158447, "logps/chosen": -6.576362133026123, "logps/rejected": -6.558511257171631, "loss": 0.6556, "rewards/accuracies": 1.0, "rewards/chosen": 0.28623396158218384, "rewards/margins": 0.09873038530349731, "rewards/rejected": 0.18750357627868652, "step": 111 }, { "epoch": 0.28, "learning_rate": 9.892436836409844e-08, "logits/chosen": -2.179234504699707, "logits/rejected": -2.1988205909729004, "logps/chosen": -6.903590202331543, "logps/rejected": -10.308650970458984, "loss": 0.674, "rewards/accuracies": 1.0, "rewards/chosen": 0.34281817078590393, "rewards/margins": 0.18841877579689026, "rewards/rejected": 0.15439939498901367, "step": 112 }, { "epoch": 0.29, "learning_rate": 9.889598135756698e-08, "logits/chosen": -2.3582968711853027, "logits/rejected": -2.374633312225342, "logps/chosen": -6.701103210449219, "logps/rejected": -13.75051498413086, "loss": 0.6873, "rewards/accuracies": 1.0, "rewards/chosen": 0.33890077471733093, "rewards/margins": 0.1922319531440735, "rewards/rejected": 0.14666882157325745, "step": 113 }, { "epoch": 0.29, "learning_rate": 9.88672288129908e-08, "logits/chosen": -2.282735586166382, "logits/rejected": -2.318765163421631, "logps/chosen": -5.8327317237854, "logps/rejected": -11.032581329345703, "loss": 0.6833, "rewards/accuracies": 1.0, "rewards/chosen": 0.3493565022945404, "rewards/margins": 0.1413799673318863, "rewards/rejected": 0.2079765349626541, "step": 114 }, { "epoch": 0.29, "learning_rate": 9.883811094531905e-08, "logits/chosen": -2.3629648685455322, "logits/rejected": -2.355062484741211, "logps/chosen": -3.1533265113830566, "logps/rejected": -11.863086700439453, "loss": 0.7028, "rewards/accuracies": 0.0, "rewards/chosen": 0.26024171710014343, "rewards/margins": -0.23545300960540771, "rewards/rejected": 0.49569472670555115, "step": 115 }, { "epoch": 0.29, "learning_rate": 9.880862797223197e-08, "logits/chosen": -2.293534278869629, "logits/rejected": -2.3033268451690674, "logps/chosen": -7.7163896560668945, "logps/rejected": -10.86029052734375, "loss": 0.6646, "rewards/accuracies": 1.0, "rewards/chosen": 0.25925788283348083, "rewards/margins": 0.15155276656150818, "rewards/rejected": 0.10770511627197266, "step": 116 }, { "epoch": 0.3, "learning_rate": 9.877878011413922e-08, "logits/chosen": -2.4036202430725098, "logits/rejected": -2.4085326194763184, "logps/chosen": -4.272627830505371, "logps/rejected": -14.79520034790039, "loss": 0.7114, "rewards/accuracies": 0.0, "rewards/chosen": 0.2945423722267151, "rewards/margins": -0.09378066658973694, "rewards/rejected": 0.388323038816452, "step": 117 }, { "epoch": 0.3, "learning_rate": 9.874856759417835e-08, "logits/chosen": -2.3779141902923584, "logits/rejected": -2.3770787715911865, "logps/chosen": -6.360766887664795, "logps/rejected": -12.156745910644531, "loss": 0.6776, "rewards/accuracies": 1.0, "rewards/chosen": 0.29383721947669983, "rewards/margins": 0.13030706346035004, "rewards/rejected": 0.1635301560163498, "step": 118 }, { "epoch": 0.3, "learning_rate": 9.871799063821302e-08, "logits/chosen": -2.327033758163452, "logits/rejected": -2.3394649028778076, "logps/chosen": -12.435856819152832, "logps/rejected": -7.968975067138672, "loss": 0.7064, "rewards/accuracies": 1.0, "rewards/chosen": 0.28056594729423523, "rewards/margins": 0.004228323698043823, "rewards/rejected": 0.2763376235961914, "step": 119 }, { "epoch": 0.3, "learning_rate": 9.868704947483133e-08, "logits/chosen": -2.3708302974700928, "logits/rejected": -2.3727800846099854, "logps/chosen": -5.859662055969238, "logps/rejected": -5.416698455810547, "loss": 0.6707, "rewards/accuracies": 0.0, "rewards/chosen": 0.23342399299144745, "rewards/margins": -0.13961802423000336, "rewards/rejected": 0.3730420172214508, "step": 120 }, { "epoch": 0.31, "learning_rate": 9.865574433534419e-08, "logits/chosen": -2.330406427383423, "logits/rejected": -2.359208345413208, "logps/chosen": -25.804088592529297, "logps/rejected": -20.68337631225586, "loss": 0.6309, "rewards/accuracies": 1.0, "rewards/chosen": 0.26122817397117615, "rewards/margins": 0.10962599515914917, "rewards/rejected": 0.15160217881202698, "step": 121 }, { "epoch": 0.31, "learning_rate": 9.862407545378347e-08, "logits/chosen": -2.2272346019744873, "logits/rejected": -2.2254374027252197, "logps/chosen": -7.800145149230957, "logps/rejected": -9.878728866577148, "loss": 0.6524, "rewards/accuracies": 1.0, "rewards/chosen": 0.41749030351638794, "rewards/margins": 0.14163866639137268, "rewards/rejected": 0.27585163712501526, "step": 122 }, { "epoch": 0.31, "learning_rate": 9.859204306690037e-08, "logits/chosen": -2.3341994285583496, "logits/rejected": -2.3361082077026367, "logps/chosen": -6.023181915283203, "logps/rejected": -6.3039727210998535, "loss": 0.6757, "rewards/accuracies": 1.0, "rewards/chosen": 0.26251038908958435, "rewards/margins": 0.10011395812034607, "rewards/rejected": 0.16239643096923828, "step": 123 }, { "epoch": 0.31, "learning_rate": 9.855964741416354e-08, "logits/chosen": -2.393300771713257, "logits/rejected": -2.425994634628296, "logps/chosen": -7.506701946258545, "logps/rejected": -12.673072814941406, "loss": 0.6787, "rewards/accuracies": 1.0, "rewards/chosen": 0.29827913641929626, "rewards/margins": 0.0481223464012146, "rewards/rejected": 0.25015679001808167, "step": 124 }, { "epoch": 0.32, "learning_rate": 9.85268887377574e-08, "logits/chosen": -2.316159248352051, "logits/rejected": -2.3049795627593994, "logps/chosen": -5.140657901763916, "logps/rejected": -13.41786003112793, "loss": 0.6785, "rewards/accuracies": 1.0, "rewards/chosen": 0.3821887671947479, "rewards/margins": 0.03549572825431824, "rewards/rejected": 0.3466930389404297, "step": 125 }, { "epoch": 0.32, "learning_rate": 9.849376728258023e-08, "logits/chosen": -2.2903144359588623, "logits/rejected": -2.262986421585083, "logps/chosen": -8.763707160949707, "logps/rejected": -15.552227020263672, "loss": 0.7497, "rewards/accuracies": 0.0, "rewards/chosen": 0.17461319267749786, "rewards/margins": -0.34425830841064453, "rewards/rejected": 0.5188714861869812, "step": 126 }, { "epoch": 0.32, "learning_rate": 9.84602832962424e-08, "logits/chosen": -2.2316598892211914, "logits/rejected": -2.2311768531799316, "logps/chosen": -3.764014482498169, "logps/rejected": -13.267660140991211, "loss": 0.7056, "rewards/accuracies": 0.0, "rewards/chosen": 0.30869683623313904, "rewards/margins": -0.2579972445964813, "rewards/rejected": 0.5666940808296204, "step": 127 }, { "epoch": 0.32, "learning_rate": 9.842643702906453e-08, "logits/chosen": -2.3822357654571533, "logits/rejected": -2.3960750102996826, "logps/chosen": -11.825008392333984, "logps/rejected": -6.478175640106201, "loss": 0.7229, "rewards/accuracies": 1.0, "rewards/chosen": 0.3489634692668915, "rewards/margins": 0.013830333948135376, "rewards/rejected": 0.3351331353187561, "step": 128 }, { "epoch": 0.33, "learning_rate": 9.839222873407552e-08, "logits/chosen": -2.439065456390381, "logits/rejected": -2.4364006519317627, "logps/chosen": -4.928779602050781, "logps/rejected": -6.798061370849609, "loss": 0.6165, "rewards/accuracies": 1.0, "rewards/chosen": 0.394925981760025, "rewards/margins": 0.09657755494117737, "rewards/rejected": 0.29834842681884766, "step": 129 }, { "epoch": 0.33, "learning_rate": 9.835765866701077e-08, "logits/chosen": -2.2620058059692383, "logits/rejected": -2.2697103023529053, "logps/chosen": -6.966469764709473, "logps/rejected": -7.710611343383789, "loss": 0.6698, "rewards/accuracies": 1.0, "rewards/chosen": 0.42531099915504456, "rewards/margins": 0.13037100434303284, "rewards/rejected": 0.2949399948120117, "step": 130 }, { "epoch": 0.33, "learning_rate": 9.832272708631026e-08, "logits/chosen": -2.3113510608673096, "logits/rejected": -2.3187713623046875, "logps/chosen": -12.80862045288086, "logps/rejected": -7.347620964050293, "loss": 0.6502, "rewards/accuracies": 1.0, "rewards/chosen": 0.3658798336982727, "rewards/margins": 0.0685567855834961, "rewards/rejected": 0.2973230481147766, "step": 131 }, { "epoch": 0.33, "learning_rate": 9.828743425311652e-08, "logits/chosen": -2.29249906539917, "logits/rejected": -2.2947604656219482, "logps/chosen": -6.445010185241699, "logps/rejected": -13.163768768310547, "loss": 0.6784, "rewards/accuracies": 1.0, "rewards/chosen": 0.3698354661464691, "rewards/margins": 0.09157770872116089, "rewards/rejected": 0.2782577574253082, "step": 132 }, { "epoch": 0.34, "learning_rate": 9.825178043127278e-08, "logits/chosen": -2.3019397258758545, "logits/rejected": -2.295632839202881, "logps/chosen": -7.343489646911621, "logps/rejected": -8.798011779785156, "loss": 0.7006, "rewards/accuracies": 0.0, "rewards/chosen": 0.32914361357688904, "rewards/margins": -0.06597825884819031, "rewards/rejected": 0.39512187242507935, "step": 133 }, { "epoch": 0.34, "learning_rate": 9.821576588732095e-08, "logits/chosen": -2.2811813354492188, "logits/rejected": -2.3367855548858643, "logps/chosen": -10.040592193603516, "logps/rejected": -16.481548309326172, "loss": 0.6344, "rewards/accuracies": 1.0, "rewards/chosen": 0.29993095993995667, "rewards/margins": 0.1679346114397049, "rewards/rejected": 0.13199634850025177, "step": 134 }, { "epoch": 0.34, "learning_rate": 9.817939089049964e-08, "logits/chosen": -2.3455984592437744, "logits/rejected": -2.3527631759643555, "logps/chosen": -11.794549942016602, "logps/rejected": -5.240765571594238, "loss": 0.6696, "rewards/accuracies": 1.0, "rewards/chosen": 0.2623371183872223, "rewards/margins": 0.02257804572582245, "rewards/rejected": 0.23975907266139984, "step": 135 }, { "epoch": 0.34, "learning_rate": 9.814265571274213e-08, "logits/chosen": -2.2551517486572266, "logits/rejected": -2.243075132369995, "logps/chosen": -5.156126499176025, "logps/rejected": -10.879575729370117, "loss": 0.7271, "rewards/accuracies": 0.0, "rewards/chosen": 0.292110413312912, "rewards/margins": -0.014911890029907227, "rewards/rejected": 0.3070223033428192, "step": 136 }, { "epoch": 0.35, "learning_rate": 9.810556062867439e-08, "logits/chosen": -2.3251521587371826, "logits/rejected": -2.3179359436035156, "logps/chosen": -11.533658981323242, "logps/rejected": -12.17635440826416, "loss": 0.6736, "rewards/accuracies": 1.0, "rewards/chosen": 0.2675495147705078, "rewards/margins": 0.03806896507740021, "rewards/rejected": 0.2294805496931076, "step": 137 }, { "epoch": 0.35, "learning_rate": 9.806810591561294e-08, "logits/chosen": -2.2727789878845215, "logits/rejected": -2.269482135772705, "logps/chosen": -3.4326956272125244, "logps/rejected": -13.157539367675781, "loss": 0.7278, "rewards/accuracies": 0.0, "rewards/chosen": 0.33214420080184937, "rewards/margins": -0.18508434295654297, "rewards/rejected": 0.5172285437583923, "step": 138 }, { "epoch": 0.35, "learning_rate": 9.803029185356285e-08, "logits/chosen": -2.3262276649475098, "logits/rejected": -2.309828758239746, "logps/chosen": -5.453367233276367, "logps/rejected": -11.04053783416748, "loss": 0.658, "rewards/accuracies": 0.0, "rewards/chosen": 0.34868985414505005, "rewards/margins": -0.03904503583908081, "rewards/rejected": 0.38773488998413086, "step": 139 }, { "epoch": 0.35, "learning_rate": 9.799211872521563e-08, "logits/chosen": -2.1577486991882324, "logits/rejected": -2.1618781089782715, "logps/chosen": -5.050619125366211, "logps/rejected": -4.973663330078125, "loss": 0.6399, "rewards/accuracies": 1.0, "rewards/chosen": 0.45255565643310547, "rewards/margins": 0.2435894012451172, "rewards/rejected": 0.20896625518798828, "step": 140 }, { "epoch": 0.36, "learning_rate": 9.79535868159471e-08, "logits/chosen": -2.2946789264678955, "logits/rejected": -2.303238868713379, "logps/chosen": -7.129020690917969, "logps/rejected": -5.923000335693359, "loss": 0.6375, "rewards/accuracies": 1.0, "rewards/chosen": 0.42227956652641296, "rewards/margins": 0.1285427212715149, "rewards/rejected": 0.29373684525489807, "step": 141 }, { "epoch": 0.36, "learning_rate": 9.791469641381526e-08, "logits/chosen": -2.2385504245758057, "logits/rejected": -2.2412736415863037, "logps/chosen": -8.13792610168457, "logps/rejected": -11.152235984802246, "loss": 0.6895, "rewards/accuracies": 1.0, "rewards/chosen": 0.35099372267723083, "rewards/margins": 0.00492781400680542, "rewards/rejected": 0.3460659086704254, "step": 142 }, { "epoch": 0.36, "learning_rate": 9.787544780955813e-08, "logits/chosen": -2.1482346057891846, "logits/rejected": -2.140476942062378, "logps/chosen": -3.5855789184570312, "logps/rejected": -7.3987135887146, "loss": 0.6415, "rewards/accuracies": 0.0, "rewards/chosen": 0.442613810300827, "rewards/margins": -0.14743420481681824, "rewards/rejected": 0.5900480151176453, "step": 143 }, { "epoch": 0.36, "learning_rate": 9.783584129659161e-08, "logits/chosen": -2.256906509399414, "logits/rejected": -2.250915765762329, "logps/chosen": -4.669398307800293, "logps/rejected": -10.641128540039062, "loss": 0.6407, "rewards/accuracies": 1.0, "rewards/chosen": 0.42481374740600586, "rewards/margins": 0.066138356924057, "rewards/rejected": 0.35867539048194885, "step": 144 }, { "epoch": 0.37, "learning_rate": 9.779587717100728e-08, "logits/chosen": -2.3769047260284424, "logits/rejected": -2.4857289791107178, "logps/chosen": -6.493217468261719, "logps/rejected": -21.127017974853516, "loss": 0.6313, "rewards/accuracies": 1.0, "rewards/chosen": 0.38678619265556335, "rewards/margins": 0.24216796457767487, "rewards/rejected": 0.1446182280778885, "step": 145 }, { "epoch": 0.37, "learning_rate": 9.775555573157013e-08, "logits/chosen": -2.2601144313812256, "logits/rejected": -2.2723019123077393, "logps/chosen": -11.811671257019043, "logps/rejected": -5.860054969787598, "loss": 0.678, "rewards/accuracies": 1.0, "rewards/chosen": 0.4446362555027008, "rewards/margins": 0.0616338849067688, "rewards/rejected": 0.383002370595932, "step": 146 }, { "epoch": 0.37, "learning_rate": 9.771487727971641e-08, "logits/chosen": -2.1899166107177734, "logits/rejected": -2.2044122219085693, "logps/chosen": -8.254350662231445, "logps/rejected": -7.145992279052734, "loss": 0.641, "rewards/accuracies": 1.0, "rewards/chosen": 0.45974236726760864, "rewards/margins": 0.040909022092819214, "rewards/rejected": 0.41883334517478943, "step": 147 }, { "epoch": 0.37, "learning_rate": 9.767384211955125e-08, "logits/chosen": -2.2923405170440674, "logits/rejected": -2.364055871963501, "logps/chosen": -4.203858375549316, "logps/rejected": -26.485170364379883, "loss": 0.6061, "rewards/accuracies": 1.0, "rewards/chosen": 0.4666661322116852, "rewards/margins": 0.35140639543533325, "rewards/rejected": 0.11525974422693253, "step": 148 }, { "epoch": 0.38, "learning_rate": 9.763245055784661e-08, "logits/chosen": -2.3041412830352783, "logits/rejected": -2.3046772480010986, "logps/chosen": -4.0285749435424805, "logps/rejected": -5.863162994384766, "loss": 0.6483, "rewards/accuracies": 1.0, "rewards/chosen": 0.46696433424949646, "rewards/margins": 0.16838854551315308, "rewards/rejected": 0.2985757887363434, "step": 149 }, { "epoch": 0.38, "learning_rate": 9.759070290403871e-08, "logits/chosen": -2.18149471282959, "logits/rejected": -2.191098690032959, "logps/chosen": -5.587672233581543, "logps/rejected": -7.865267276763916, "loss": 0.6954, "rewards/accuracies": 1.0, "rewards/chosen": 0.593652069568634, "rewards/margins": 0.3200993239879608, "rewards/rejected": 0.2735527455806732, "step": 150 }, { "epoch": 0.38, "learning_rate": 9.754859947022596e-08, "logits/chosen": -2.3490025997161865, "logits/rejected": -2.338491916656494, "logps/chosen": -3.5011470317840576, "logps/rejected": -13.215836524963379, "loss": 0.6586, "rewards/accuracies": 0.0, "rewards/chosen": 0.4468056857585907, "rewards/margins": -0.010465413331985474, "rewards/rejected": 0.45727109909057617, "step": 151 }, { "epoch": 0.38, "learning_rate": 9.750614057116642e-08, "logits/chosen": -2.366805076599121, "logits/rejected": -2.371258020401001, "logps/chosen": -7.961428642272949, "logps/rejected": -4.2210187911987305, "loss": 0.6253, "rewards/accuracies": 1.0, "rewards/chosen": 0.5380949974060059, "rewards/margins": 0.10774877667427063, "rewards/rejected": 0.43034622073173523, "step": 152 }, { "epoch": 0.39, "learning_rate": 9.746332652427564e-08, "logits/chosen": -2.3775219917297363, "logits/rejected": -2.465226173400879, "logps/chosen": -6.432157516479492, "logps/rejected": -27.911752700805664, "loss": 0.6721, "rewards/accuracies": 1.0, "rewards/chosen": 0.35478851199150085, "rewards/margins": 0.03524124622344971, "rewards/rejected": 0.31954726576805115, "step": 153 }, { "epoch": 0.39, "learning_rate": 9.742015764962416e-08, "logits/chosen": -2.292438268661499, "logits/rejected": -2.282320022583008, "logps/chosen": -4.941239833831787, "logps/rejected": -10.595090866088867, "loss": 0.7514, "rewards/accuracies": 0.0, "rewards/chosen": 0.36504092812538147, "rewards/margins": -0.15836665034294128, "rewards/rejected": 0.5234075784683228, "step": 154 }, { "epoch": 0.39, "learning_rate": 9.737663426993511e-08, "logits/chosen": -2.3081862926483154, "logits/rejected": -2.317310094833374, "logps/chosen": -3.875333309173584, "logps/rejected": -8.565555572509766, "loss": 0.6631, "rewards/accuracies": 1.0, "rewards/chosen": 0.40879273414611816, "rewards/margins": 0.011015444993972778, "rewards/rejected": 0.3977772891521454, "step": 155 }, { "epoch": 0.39, "learning_rate": 9.733275671058194e-08, "logits/chosen": -2.303739070892334, "logits/rejected": -2.3019816875457764, "logps/chosen": -4.555109977722168, "logps/rejected": -5.724639415740967, "loss": 0.6411, "rewards/accuracies": 1.0, "rewards/chosen": 0.7312593460083008, "rewards/margins": 0.3037358820438385, "rewards/rejected": 0.4275234639644623, "step": 156 }, { "epoch": 0.4, "learning_rate": 9.728852529958578e-08, "logits/chosen": -2.316300868988037, "logits/rejected": -2.3460185527801514, "logps/chosen": -3.1270229816436768, "logps/rejected": -19.608299255371094, "loss": 0.6434, "rewards/accuracies": 1.0, "rewards/chosen": 0.5395016670227051, "rewards/margins": 0.2752213478088379, "rewards/rejected": 0.2642803192138672, "step": 157 }, { "epoch": 0.4, "learning_rate": 9.724394036761315e-08, "logits/chosen": -2.3483290672302246, "logits/rejected": -2.3424839973449707, "logps/chosen": -3.160304069519043, "logps/rejected": -8.286210060119629, "loss": 0.6647, "rewards/accuracies": 1.0, "rewards/chosen": 0.4263099730014801, "rewards/margins": 0.06619089841842651, "rewards/rejected": 0.3601190745830536, "step": 158 }, { "epoch": 0.4, "learning_rate": 9.71990022479734e-08, "logits/chosen": -2.3393466472625732, "logits/rejected": -2.3469436168670654, "logps/chosen": -5.56282377243042, "logps/rejected": -5.841671466827393, "loss": 0.6229, "rewards/accuracies": 1.0, "rewards/chosen": 0.5697243213653564, "rewards/margins": 0.23546019196510315, "rewards/rejected": 0.3342641294002533, "step": 159 }, { "epoch": 0.41, "learning_rate": 9.71537112766163e-08, "logits/chosen": -2.2507057189941406, "logits/rejected": -2.2682979106903076, "logps/chosen": -6.4190449714660645, "logps/rejected": -10.51038932800293, "loss": 0.6003, "rewards/accuracies": 1.0, "rewards/chosen": 0.6375297904014587, "rewards/margins": 0.44690173864364624, "rewards/rejected": 0.1906280517578125, "step": 160 }, { "epoch": 0.41, "learning_rate": 9.710806779212945e-08, "logits/chosen": -2.331033706665039, "logits/rejected": -2.339332342147827, "logps/chosen": -8.573750495910645, "logps/rejected": -12.52794075012207, "loss": 0.6935, "rewards/accuracies": 1.0, "rewards/chosen": 0.5180749297142029, "rewards/margins": 0.24214327335357666, "rewards/rejected": 0.2759316563606262, "step": 161 }, { "epoch": 0.41, "learning_rate": 9.706207213573578e-08, "logits/chosen": -2.273850202560425, "logits/rejected": -2.335700273513794, "logps/chosen": -5.116693019866943, "logps/rejected": -23.0468807220459, "loss": 0.608, "rewards/accuracies": 1.0, "rewards/chosen": 0.5978979468345642, "rewards/margins": 0.6712486743927002, "rewards/rejected": -0.07335072010755539, "step": 162 }, { "epoch": 0.41, "learning_rate": 9.7015724651291e-08, "logits/chosen": -2.309309959411621, "logits/rejected": -2.3219263553619385, "logps/chosen": -10.353595733642578, "logps/rejected": -5.690059661865234, "loss": 0.6654, "rewards/accuracies": 1.0, "rewards/chosen": 0.5510540008544922, "rewards/margins": 0.040304481983184814, "rewards/rejected": 0.5107495188713074, "step": 163 }, { "epoch": 0.42, "learning_rate": 9.696902568528102e-08, "logits/chosen": -2.0849199295043945, "logits/rejected": -2.089514970779419, "logps/chosen": -7.113705158233643, "logps/rejected": -5.809881210327148, "loss": 0.6205, "rewards/accuracies": 1.0, "rewards/chosen": 0.5170270800590515, "rewards/margins": 0.10799157619476318, "rewards/rejected": 0.40903550386428833, "step": 164 }, { "epoch": 0.42, "learning_rate": 9.692197558681938e-08, "logits/chosen": -2.337721586227417, "logits/rejected": -2.3864352703094482, "logps/chosen": -5.131543159484863, "logps/rejected": -11.924695014953613, "loss": 0.6515, "rewards/accuracies": 1.0, "rewards/chosen": 0.5766773223876953, "rewards/margins": 0.4188881814479828, "rewards/rejected": 0.15778914093971252, "step": 165 }, { "epoch": 0.42, "learning_rate": 9.68745747076446e-08, "logits/chosen": -2.2444794178009033, "logits/rejected": -2.2471463680267334, "logps/chosen": -3.2070913314819336, "logps/rejected": -3.6602373123168945, "loss": 0.6468, "rewards/accuracies": 1.0, "rewards/chosen": 0.5963620543479919, "rewards/margins": 0.1901712715625763, "rewards/rejected": 0.40619078278541565, "step": 166 }, { "epoch": 0.42, "learning_rate": 9.68268234021176e-08, "logits/chosen": -2.372817277908325, "logits/rejected": -2.366262674331665, "logps/chosen": -3.827530860900879, "logps/rejected": -7.458813667297363, "loss": 0.5961, "rewards/accuracies": 0.0, "rewards/chosen": 0.5048334002494812, "rewards/margins": -0.196003258228302, "rewards/rejected": 0.7008366584777832, "step": 167 }, { "epoch": 0.43, "learning_rate": 9.677872202721904e-08, "logits/chosen": -2.2813801765441895, "logits/rejected": -2.2687389850616455, "logps/chosen": -2.6028714179992676, "logps/rejected": -9.280851364135742, "loss": 0.7548, "rewards/accuracies": 0.0, "rewards/chosen": 0.549227774143219, "rewards/margins": -0.10715043544769287, "rewards/rejected": 0.6563782095909119, "step": 168 }, { "epoch": 0.43, "learning_rate": 9.673027094254663e-08, "logits/chosen": -2.292219400405884, "logits/rejected": -2.2867588996887207, "logps/chosen": -5.25357723236084, "logps/rejected": -5.735021114349365, "loss": 0.6248, "rewards/accuracies": 0.0, "rewards/chosen": 0.39836809039115906, "rewards/margins": -0.21499255299568176, "rewards/rejected": 0.6133606433868408, "step": 169 }, { "epoch": 0.43, "learning_rate": 9.668147051031239e-08, "logits/chosen": -2.357351541519165, "logits/rejected": -2.359900951385498, "logps/chosen": -3.14003849029541, "logps/rejected": -7.5748291015625, "loss": 0.6887, "rewards/accuracies": 1.0, "rewards/chosen": 0.6261230707168579, "rewards/margins": 0.12968045473098755, "rewards/rejected": 0.49644261598587036, "step": 170 }, { "epoch": 0.43, "learning_rate": 9.66323210953401e-08, "logits/chosen": -2.1246721744537354, "logits/rejected": -2.117778778076172, "logps/chosen": -4.811463832855225, "logps/rejected": -11.787696838378906, "loss": 0.7195, "rewards/accuracies": 1.0, "rewards/chosen": 0.4963875412940979, "rewards/margins": 0.05425897240638733, "rewards/rejected": 0.44212856888771057, "step": 171 }, { "epoch": 0.44, "learning_rate": 9.658282306506242e-08, "logits/chosen": -2.364356517791748, "logits/rejected": -2.3615119457244873, "logps/chosen": -3.5153157711029053, "logps/rejected": -9.481832504272461, "loss": 0.6457, "rewards/accuracies": 1.0, "rewards/chosen": 0.5845302939414978, "rewards/margins": 0.17631343007087708, "rewards/rejected": 0.4082168638706207, "step": 172 }, { "epoch": 0.44, "learning_rate": 9.65329767895182e-08, "logits/chosen": -2.2876698970794678, "logits/rejected": -2.279618740081787, "logps/chosen": -2.5119152069091797, "logps/rejected": -8.072874069213867, "loss": 0.6558, "rewards/accuracies": 1.0, "rewards/chosen": 0.5370586514472961, "rewards/margins": 0.047028928995132446, "rewards/rejected": 0.4900297224521637, "step": 173 }, { "epoch": 0.44, "learning_rate": 9.648278264134974e-08, "logits/chosen": -2.210768938064575, "logits/rejected": -2.211775064468384, "logps/chosen": -9.2225341796875, "logps/rejected": -3.882093906402588, "loss": 0.7211, "rewards/accuracies": 0.0, "rewards/chosen": 0.5986940264701843, "rewards/margins": -0.046826064586639404, "rewards/rejected": 0.6455200910568237, "step": 174 }, { "epoch": 0.44, "learning_rate": 9.643224099579996e-08, "logits/chosen": -2.227360248565674, "logits/rejected": -2.225043773651123, "logps/chosen": -3.787600517272949, "logps/rejected": -3.9125185012817383, "loss": 0.6005, "rewards/accuracies": 1.0, "rewards/chosen": 0.734695553779602, "rewards/margins": 0.2501716911792755, "rewards/rejected": 0.48452386260032654, "step": 175 }, { "epoch": 0.45, "learning_rate": 9.63813522307096e-08, "logits/chosen": -2.2422866821289062, "logits/rejected": -2.247602939605713, "logps/chosen": -5.871066093444824, "logps/rejected": -4.855759620666504, "loss": 0.7478, "rewards/accuracies": 0.0, "rewards/chosen": 0.33224916458129883, "rewards/margins": -0.2795196771621704, "rewards/rejected": 0.6117688417434692, "step": 176 }, { "epoch": 0.45, "learning_rate": 9.633011672651443e-08, "logits/chosen": -2.2527265548706055, "logits/rejected": -2.269583225250244, "logps/chosen": -4.088090896606445, "logps/rejected": -9.352494239807129, "loss": 0.5859, "rewards/accuracies": 1.0, "rewards/chosen": 0.6978138089179993, "rewards/margins": 0.32292014360427856, "rewards/rejected": 0.3748936653137207, "step": 177 }, { "epoch": 0.45, "learning_rate": 9.627853486624233e-08, "logits/chosen": -2.3145289421081543, "logits/rejected": -2.3638360500335693, "logps/chosen": -3.2528817653656006, "logps/rejected": -10.815567016601562, "loss": 0.5732, "rewards/accuracies": 1.0, "rewards/chosen": 0.6586081385612488, "rewards/margins": 0.3934996426105499, "rewards/rejected": 0.26510849595069885, "step": 178 }, { "epoch": 0.45, "learning_rate": 9.622660703551058e-08, "logits/chosen": -2.3029308319091797, "logits/rejected": -2.2977962493896484, "logps/chosen": -5.6917405128479, "logps/rejected": -8.657557487487793, "loss": 0.5926, "rewards/accuracies": 0.0, "rewards/chosen": 0.4228381812572479, "rewards/margins": -0.0286979079246521, "rewards/rejected": 0.4515360891819, "step": 179 }, { "epoch": 0.46, "learning_rate": 9.617433362252276e-08, "logits/chosen": -2.394881010055542, "logits/rejected": -2.3985536098480225, "logps/chosen": -6.852348327636719, "logps/rejected": -4.858220100402832, "loss": 0.6901, "rewards/accuracies": 0.0, "rewards/chosen": 0.47626611590385437, "rewards/margins": -0.12474355101585388, "rewards/rejected": 0.6010096669197083, "step": 180 }, { "epoch": 0.46, "learning_rate": 9.612171501806605e-08, "logits/chosen": -2.280266761779785, "logits/rejected": -2.3070664405822754, "logps/chosen": -3.8239660263061523, "logps/rejected": -14.817578315734863, "loss": 0.748, "rewards/accuracies": 0.0, "rewards/chosen": 0.5183539390563965, "rewards/margins": -0.018031537532806396, "rewards/rejected": 0.5363854765892029, "step": 181 }, { "epoch": 0.46, "learning_rate": 9.606875161550819e-08, "logits/chosen": -2.32751202583313, "logits/rejected": -2.3261449337005615, "logps/chosen": -3.287388801574707, "logps/rejected": -9.602625846862793, "loss": 0.5646, "rewards/accuracies": 1.0, "rewards/chosen": 0.6209336519241333, "rewards/margins": 0.10398322343826294, "rewards/rejected": 0.5169504284858704, "step": 182 }, { "epoch": 0.46, "learning_rate": 9.601544381079456e-08, "logits/chosen": -2.3685381412506104, "logits/rejected": -2.362083911895752, "logps/chosen": -4.23568058013916, "logps/rejected": -10.18048095703125, "loss": 0.7188, "rewards/accuracies": 0.0, "rewards/chosen": 0.381561279296875, "rewards/margins": -0.1695689558982849, "rewards/rejected": 0.5511302351951599, "step": 183 }, { "epoch": 0.47, "learning_rate": 9.596179200244526e-08, "logits/chosen": -2.270871162414551, "logits/rejected": -2.262275218963623, "logps/chosen": -1.4460735321044922, "logps/rejected": -10.896650314331055, "loss": 0.8903, "rewards/accuracies": 0.0, "rewards/chosen": 0.6431666612625122, "rewards/margins": -0.42551684379577637, "rewards/rejected": 1.0686835050582886, "step": 184 }, { "epoch": 0.47, "learning_rate": 9.590779659155208e-08, "logits/chosen": -2.4042487144470215, "logits/rejected": -2.5304696559906006, "logps/chosen": -3.5976672172546387, "logps/rejected": -16.792312622070312, "loss": 0.6688, "rewards/accuracies": 1.0, "rewards/chosen": 0.5868691205978394, "rewards/margins": 0.11375835537910461, "rewards/rejected": 0.47311076521873474, "step": 185 }, { "epoch": 0.47, "learning_rate": 9.585345798177556e-08, "logits/chosen": -2.254892587661743, "logits/rejected": -2.2522709369659424, "logps/chosen": -5.10137939453125, "logps/rejected": -10.67294979095459, "loss": 0.6743, "rewards/accuracies": 1.0, "rewards/chosen": 0.7058269381523132, "rewards/margins": 0.15953397750854492, "rewards/rejected": 0.5462929606437683, "step": 186 }, { "epoch": 0.47, "learning_rate": 9.579877657934186e-08, "logits/chosen": -2.1568689346313477, "logits/rejected": -2.3279616832733154, "logps/chosen": -3.572422981262207, "logps/rejected": -26.153635025024414, "loss": 0.6026, "rewards/accuracies": 1.0, "rewards/chosen": 0.6095871925354004, "rewards/margins": 0.5451674461364746, "rewards/rejected": 0.06441974639892578, "step": 187 }, { "epoch": 0.48, "learning_rate": 9.574375279303988e-08, "logits/chosen": -2.2847161293029785, "logits/rejected": -2.271406650543213, "logps/chosen": -5.627955913543701, "logps/rejected": -6.324826717376709, "loss": 0.813, "rewards/accuracies": 0.0, "rewards/chosen": 0.44276365637779236, "rewards/margins": -0.41614386439323425, "rewards/rejected": 0.8589075207710266, "step": 188 }, { "epoch": 0.48, "learning_rate": 9.568838703421808e-08, "logits/chosen": -2.221299409866333, "logits/rejected": -2.2780864238739014, "logps/chosen": -2.68843936920166, "logps/rejected": -10.762029647827148, "loss": 0.6981, "rewards/accuracies": 1.0, "rewards/chosen": 0.6139096617698669, "rewards/margins": 0.31267160177230835, "rewards/rejected": 0.3012380599975586, "step": 189 }, { "epoch": 0.48, "learning_rate": 9.563267971678151e-08, "logits/chosen": -2.218686580657959, "logits/rejected": -2.2155046463012695, "logps/chosen": -4.145890235900879, "logps/rejected": -4.787487506866455, "loss": 0.5734, "rewards/accuracies": 1.0, "rewards/chosen": 0.6738041043281555, "rewards/margins": 0.22649788856506348, "rewards/rejected": 0.44730621576309204, "step": 190 }, { "epoch": 0.48, "learning_rate": 9.557663125718854e-08, "logits/chosen": -2.2621285915374756, "logits/rejected": -2.270218849182129, "logps/chosen": -3.413024425506592, "logps/rejected": -3.7081942558288574, "loss": 0.6125, "rewards/accuracies": 1.0, "rewards/chosen": 0.7278992533683777, "rewards/margins": 0.2358214557170868, "rewards/rejected": 0.4920777976512909, "step": 191 }, { "epoch": 0.49, "learning_rate": 9.552024207444794e-08, "logits/chosen": -2.322531223297119, "logits/rejected": -2.3306806087493896, "logps/chosen": -3.0523312091827393, "logps/rejected": -23.164321899414062, "loss": 0.6152, "rewards/accuracies": 1.0, "rewards/chosen": 0.5658009648323059, "rewards/margins": 0.0890345573425293, "rewards/rejected": 0.4767664074897766, "step": 192 }, { "epoch": 0.49, "learning_rate": 9.546351259011567e-08, "logits/chosen": -2.33430814743042, "logits/rejected": -2.3258934020996094, "logps/chosen": -2.0784707069396973, "logps/rejected": -4.974394798278809, "loss": 0.7496, "rewards/accuracies": 0.0, "rewards/chosen": 0.4734286963939667, "rewards/margins": -0.38112398982048035, "rewards/rejected": 0.854552686214447, "step": 193 }, { "epoch": 0.49, "learning_rate": 9.540644322829172e-08, "logits/chosen": -2.1659271717071533, "logits/rejected": -2.1569418907165527, "logps/chosen": -6.051072120666504, "logps/rejected": -10.546429634094238, "loss": 0.669, "rewards/accuracies": 0.0, "rewards/chosen": 0.5872923731803894, "rewards/margins": -0.07125282287597656, "rewards/rejected": 0.658545196056366, "step": 194 }, { "epoch": 0.49, "learning_rate": 9.534903441561692e-08, "logits/chosen": -2.1929454803466797, "logits/rejected": -2.194155693054199, "logps/chosen": -2.54067325592041, "logps/rejected": -11.031060218811035, "loss": 0.6805, "rewards/accuracies": 0.0, "rewards/chosen": 0.6447510123252869, "rewards/margins": -0.03316885232925415, "rewards/rejected": 0.677919864654541, "step": 195 }, { "epoch": 0.5, "learning_rate": 9.529128658126979e-08, "logits/chosen": -2.238251209259033, "logits/rejected": -2.240748643875122, "logps/chosen": -2.3829164505004883, "logps/rejected": -9.708270072937012, "loss": 0.7359, "rewards/accuracies": 0.0, "rewards/chosen": 0.5178288817405701, "rewards/margins": -0.33609938621520996, "rewards/rejected": 0.85392826795578, "step": 196 }, { "epoch": 0.5, "learning_rate": 9.523320015696335e-08, "logits/chosen": -2.1850099563598633, "logits/rejected": -2.2249133586883545, "logps/chosen": -2.002481698989868, "logps/rejected": -8.087606430053711, "loss": 0.72, "rewards/accuracies": 0.0, "rewards/chosen": 0.5219219326972961, "rewards/margins": -0.1306753158569336, "rewards/rejected": 0.6525972485542297, "step": 197 }, { "epoch": 0.5, "learning_rate": 9.51747755769418e-08, "logits/chosen": -2.259052276611328, "logits/rejected": -2.2462944984436035, "logps/chosen": -3.6930482387542725, "logps/rejected": -9.451170921325684, "loss": 0.627, "rewards/accuracies": 0.0, "rewards/chosen": 0.4778766632080078, "rewards/margins": -0.15192508697509766, "rewards/rejected": 0.6298017501831055, "step": 198 }, { "epoch": 0.5, "learning_rate": 9.511601327797739e-08, "logits/chosen": -2.211836576461792, "logits/rejected": -2.2985572814941406, "logps/chosen": -21.87612533569336, "logps/rejected": -21.544418334960938, "loss": 0.6355, "rewards/accuracies": 1.0, "rewards/chosen": 0.8130569458007812, "rewards/margins": 0.5381149053573608, "rewards/rejected": 0.274942010641098, "step": 199 }, { "epoch": 0.51, "learning_rate": 9.505691369936708e-08, "logits/chosen": -2.202284574508667, "logits/rejected": -2.189361333847046, "logps/chosen": -9.703621864318848, "logps/rejected": -3.5714111328125, "loss": 0.5999, "rewards/accuracies": 1.0, "rewards/chosen": 0.6420933604240417, "rewards/margins": 0.22493788599967957, "rewards/rejected": 0.4171554744243622, "step": 200 }, { "epoch": 0.51, "learning_rate": 9.499747728292926e-08, "logits/chosen": -2.190592050552368, "logits/rejected": -2.1910390853881836, "logps/chosen": -10.658897399902344, "logps/rejected": -4.172177314758301, "loss": 0.6039, "rewards/accuracies": 1.0, "rewards/chosen": 0.5862564444541931, "rewards/margins": 0.010311603546142578, "rewards/rejected": 0.5759448409080505, "step": 201 }, { "epoch": 0.51, "learning_rate": 9.493770447300048e-08, "logits/chosen": -2.26658296585083, "logits/rejected": -2.2697300910949707, "logps/chosen": -2.462601661682129, "logps/rejected": -5.870565414428711, "loss": 0.7697, "rewards/accuracies": 0.0, "rewards/chosen": 0.46044865250587463, "rewards/margins": -0.37287482619285583, "rewards/rejected": 0.8333234786987305, "step": 202 }, { "epoch": 0.51, "learning_rate": 9.48775957164321e-08, "logits/chosen": -2.3210322856903076, "logits/rejected": -2.331409215927124, "logps/chosen": -10.432024002075195, "logps/rejected": -3.3188788890838623, "loss": 0.6867, "rewards/accuracies": 1.0, "rewards/chosen": 0.6779813766479492, "rewards/margins": 0.012314856052398682, "rewards/rejected": 0.6656665205955505, "step": 203 }, { "epoch": 0.52, "learning_rate": 9.481715146258698e-08, "logits/chosen": -2.2627782821655273, "logits/rejected": -2.2767438888549805, "logps/chosen": -3.4090240001678467, "logps/rejected": -3.995790719985962, "loss": 0.6228, "rewards/accuracies": 1.0, "rewards/chosen": 0.6768927574157715, "rewards/margins": 0.26825931668281555, "rewards/rejected": 0.40863344073295593, "step": 204 }, { "epoch": 0.52, "learning_rate": 9.47563721633361e-08, "logits/chosen": -2.3570737838745117, "logits/rejected": -2.3605868816375732, "logps/chosen": -2.71144437789917, "logps/rejected": -6.43817138671875, "loss": 0.6458, "rewards/accuracies": 1.0, "rewards/chosen": 0.7826611995697021, "rewards/margins": 0.06728070974349976, "rewards/rejected": 0.7153804898262024, "step": 205 }, { "epoch": 0.52, "learning_rate": 9.469525827305512e-08, "logits/chosen": -2.1500437259674072, "logits/rejected": -2.1599135398864746, "logps/chosen": -2.7948310375213623, "logps/rejected": -7.780345916748047, "loss": 0.6235, "rewards/accuracies": 1.0, "rewards/chosen": 0.6897475123405457, "rewards/margins": 0.27750489115715027, "rewards/rejected": 0.4122426211833954, "step": 206 }, { "epoch": 0.52, "learning_rate": 9.463381024862114e-08, "logits/chosen": -2.308065176010132, "logits/rejected": -2.311396360397339, "logps/chosen": -2.1298129558563232, "logps/rejected": -4.639407157897949, "loss": 0.6765, "rewards/accuracies": 1.0, "rewards/chosen": 0.7520065307617188, "rewards/margins": 0.3451756536960602, "rewards/rejected": 0.40683087706565857, "step": 207 }, { "epoch": 0.53, "learning_rate": 9.457202854940913e-08, "logits/chosen": -2.2493679523468018, "logits/rejected": -2.266986608505249, "logps/chosen": -7.83462381362915, "logps/rejected": -10.941642761230469, "loss": 0.6369, "rewards/accuracies": 1.0, "rewards/chosen": 0.5506171584129333, "rewards/margins": 0.09854587912559509, "rewards/rejected": 0.45207127928733826, "step": 208 }, { "epoch": 0.53, "learning_rate": 9.450991363728856e-08, "logits/chosen": -2.1621015071868896, "logits/rejected": -2.1658177375793457, "logps/chosen": -3.1328980922698975, "logps/rejected": -3.3067643642425537, "loss": 0.7116, "rewards/accuracies": 1.0, "rewards/chosen": 0.7780746817588806, "rewards/margins": 0.2439262866973877, "rewards/rejected": 0.5341483950614929, "step": 209 }, { "epoch": 0.53, "learning_rate": 9.444746597661998e-08, "logits/chosen": -2.2782561779022217, "logits/rejected": -2.2770347595214844, "logps/chosen": -3.850724220275879, "logps/rejected": -15.645235061645508, "loss": 0.6988, "rewards/accuracies": 1.0, "rewards/chosen": 0.7378126978874207, "rewards/margins": 0.025168657302856445, "rewards/rejected": 0.7126440405845642, "step": 210 }, { "epoch": 0.53, "learning_rate": 9.438468603425146e-08, "logits/chosen": -2.392026424407959, "logits/rejected": -2.3915765285491943, "logps/chosen": -4.183090686798096, "logps/rejected": -10.899209976196289, "loss": 0.6317, "rewards/accuracies": 0.0, "rewards/chosen": 0.4870093762874603, "rewards/margins": -0.18720325827598572, "rewards/rejected": 0.674212634563446, "step": 211 }, { "epoch": 0.54, "learning_rate": 9.432157427951519e-08, "logits/chosen": -2.217898368835449, "logits/rejected": -2.213934898376465, "logps/chosen": -2.938810110092163, "logps/rejected": -5.625204086303711, "loss": 0.7518, "rewards/accuracies": 0.0, "rewards/chosen": 0.45133382081985474, "rewards/margins": -0.2135574221611023, "rewards/rejected": 0.664891242980957, "step": 212 }, { "epoch": 0.54, "learning_rate": 9.425813118422392e-08, "logits/chosen": -2.1239218711853027, "logits/rejected": -2.1278955936431885, "logps/chosen": -2.5129637718200684, "logps/rejected": -4.605852127075195, "loss": 0.7118, "rewards/accuracies": 1.0, "rewards/chosen": 0.7856445908546448, "rewards/margins": 0.31223559379577637, "rewards/rejected": 0.4734089970588684, "step": 213 }, { "epoch": 0.54, "learning_rate": 9.419435722266744e-08, "logits/chosen": -2.1352012157440186, "logits/rejected": -2.155285358428955, "logps/chosen": -3.9748265743255615, "logps/rejected": -5.131795883178711, "loss": 0.7473, "rewards/accuracies": 1.0, "rewards/chosen": 0.7639584541320801, "rewards/margins": 0.22913682460784912, "rewards/rejected": 0.534821629524231, "step": 214 }, { "epoch": 0.54, "learning_rate": 9.413025287160904e-08, "logits/chosen": -2.3970491886138916, "logits/rejected": -2.3958685398101807, "logps/chosen": -7.125813961029053, "logps/rejected": -3.317082643508911, "loss": 0.5508, "rewards/accuracies": 1.0, "rewards/chosen": 0.6677328944206238, "rewards/margins": 0.1956588625907898, "rewards/rejected": 0.472074031829834, "step": 215 }, { "epoch": 0.55, "learning_rate": 9.406581861028196e-08, "logits/chosen": -2.2382614612579346, "logits/rejected": -2.2592856884002686, "logps/chosen": -2.321164608001709, "logps/rejected": -6.17099666595459, "loss": 0.7367, "rewards/accuracies": 1.0, "rewards/chosen": 0.7900239825248718, "rewards/margins": 0.23833245038986206, "rewards/rejected": 0.5516915321350098, "step": 216 }, { "epoch": 0.55, "learning_rate": 9.40010549203858e-08, "logits/chosen": -2.2079074382781982, "logits/rejected": -2.2180678844451904, "logps/chosen": -2.903996229171753, "logps/rejected": -3.291684865951538, "loss": 0.5816, "rewards/accuracies": 1.0, "rewards/chosen": 0.7423021197319031, "rewards/margins": 0.23269224166870117, "rewards/rejected": 0.5096098780632019, "step": 217 }, { "epoch": 0.55, "learning_rate": 9.393596228608287e-08, "logits/chosen": -2.1711485385894775, "logits/rejected": -2.166309118270874, "logps/chosen": -2.203486204147339, "logps/rejected": -4.027561664581299, "loss": 0.8005, "rewards/accuracies": 0.0, "rewards/chosen": 0.670961320400238, "rewards/margins": -0.15272223949432373, "rewards/rejected": 0.8236835598945618, "step": 218 }, { "epoch": 0.55, "learning_rate": 9.387054119399464e-08, "logits/chosen": -2.298851251602173, "logits/rejected": -2.296642541885376, "logps/chosen": -1.7298816442489624, "logps/rejected": -4.520757675170898, "loss": 0.5896, "rewards/accuracies": 1.0, "rewards/chosen": 0.7538194060325623, "rewards/margins": 0.29853013157844543, "rewards/rejected": 0.4552892744541168, "step": 219 }, { "epoch": 0.56, "learning_rate": 9.38047921331981e-08, "logits/chosen": -2.2995851039886475, "logits/rejected": -2.304990291595459, "logps/chosen": -10.458675384521484, "logps/rejected": -3.5722603797912598, "loss": 0.6242, "rewards/accuracies": 1.0, "rewards/chosen": 0.7174793481826782, "rewards/margins": 0.17445224523544312, "rewards/rejected": 0.5430271029472351, "step": 220 }, { "epoch": 0.56, "learning_rate": 9.373871559522202e-08, "logits/chosen": -2.242748737335205, "logits/rejected": -2.228484630584717, "logps/chosen": -2.118150472640991, "logps/rejected": -7.651256084442139, "loss": 0.6426, "rewards/accuracies": 1.0, "rewards/chosen": 0.6126724481582642, "rewards/margins": 0.005226492881774902, "rewards/rejected": 0.6074459552764893, "step": 221 }, { "epoch": 0.56, "learning_rate": 9.367231207404339e-08, "logits/chosen": -2.2520878314971924, "logits/rejected": -2.2359554767608643, "logps/chosen": -2.239095687866211, "logps/rejected": -9.932409286499023, "loss": 0.6048, "rewards/accuracies": 1.0, "rewards/chosen": 0.5991827249526978, "rewards/margins": 0.00751572847366333, "rewards/rejected": 0.5916669964790344, "step": 222 }, { "epoch": 0.56, "learning_rate": 9.360558206608362e-08, "logits/chosen": -2.2392513751983643, "logits/rejected": -2.2437644004821777, "logps/chosen": -3.388023614883423, "logps/rejected": -3.0137135982513428, "loss": 0.6774, "rewards/accuracies": 1.0, "rewards/chosen": 0.7086884379386902, "rewards/margins": 0.2148737609386444, "rewards/rejected": 0.4938146770000458, "step": 223 }, { "epoch": 0.57, "learning_rate": 9.353852607020494e-08, "logits/chosen": -2.2763450145721436, "logits/rejected": -2.266753673553467, "logps/chosen": -3.889010429382324, "logps/rejected": -8.97239875793457, "loss": 0.7148, "rewards/accuracies": 0.0, "rewards/chosen": 0.48674678802490234, "rewards/margins": -0.1184396743774414, "rewards/rejected": 0.6051864624023438, "step": 224 }, { "epoch": 0.57, "learning_rate": 9.347114458770655e-08, "logits/chosen": -2.310457944869995, "logits/rejected": -2.304246425628662, "logps/chosen": -5.543255805969238, "logps/rejected": -10.102169036865234, "loss": 0.7635, "rewards/accuracies": 0.0, "rewards/chosen": 0.531202495098114, "rewards/margins": -0.0039038658142089844, "rewards/rejected": 0.535106360912323, "step": 225 }, { "epoch": 0.57, "learning_rate": 9.340343812232096e-08, "logits/chosen": -2.2479257583618164, "logits/rejected": -2.2388124465942383, "logps/chosen": -21.287940979003906, "logps/rejected": -5.858831882476807, "loss": 0.7041, "rewards/accuracies": 0.0, "rewards/chosen": 0.6741329431533813, "rewards/margins": -0.11739522218704224, "rewards/rejected": 0.7915281653404236, "step": 226 }, { "epoch": 0.57, "learning_rate": 9.333540718021022e-08, "logits/chosen": -2.263129949569702, "logits/rejected": -2.376617670059204, "logps/chosen": -1.6920462846755981, "logps/rejected": -19.850749969482422, "loss": 0.585, "rewards/accuracies": 1.0, "rewards/chosen": 0.6360112428665161, "rewards/margins": 0.4288012683391571, "rewards/rejected": 0.207209974527359, "step": 227 }, { "epoch": 0.58, "learning_rate": 9.326705226996205e-08, "logits/chosen": -2.2518551349639893, "logits/rejected": -2.2554478645324707, "logps/chosen": -4.940863609313965, "logps/rejected": -3.415524482727051, "loss": 0.581, "rewards/accuracies": 1.0, "rewards/chosen": 0.8010675311088562, "rewards/margins": 0.3065183460712433, "rewards/rejected": 0.4945491850376129, "step": 228 }, { "epoch": 0.58, "learning_rate": 9.319837390258617e-08, "logits/chosen": -2.2235045433044434, "logits/rejected": -2.26053524017334, "logps/chosen": -2.7059326171875, "logps/rejected": -7.542132377624512, "loss": 0.627, "rewards/accuracies": 1.0, "rewards/chosen": 0.7637090086936951, "rewards/margins": 0.3637966513633728, "rewards/rejected": 0.39991235733032227, "step": 229 }, { "epoch": 0.58, "learning_rate": 9.312937259151038e-08, "logits/chosen": -2.317666530609131, "logits/rejected": -2.315354824066162, "logps/chosen": -6.848742485046387, "logps/rejected": -3.9386961460113525, "loss": 0.6545, "rewards/accuracies": 1.0, "rewards/chosen": 0.8128674626350403, "rewards/margins": 0.2413855791091919, "rewards/rejected": 0.5714818835258484, "step": 230 }, { "epoch": 0.58, "learning_rate": 9.306004885257673e-08, "logits/chosen": -2.36811900138855, "logits/rejected": -2.358081340789795, "logps/chosen": -2.650899887084961, "logps/rejected": -7.529595375061035, "loss": 0.622, "rewards/accuracies": 1.0, "rewards/chosen": 0.5625593066215515, "rewards/margins": 0.08257541060447693, "rewards/rejected": 0.4799838960170746, "step": 231 }, { "epoch": 0.59, "learning_rate": 9.299040320403772e-08, "logits/chosen": -2.2204813957214355, "logits/rejected": -2.21943998336792, "logps/chosen": -2.857728958129883, "logps/rejected": -13.061132431030273, "loss": 0.7204, "rewards/accuracies": 0.0, "rewards/chosen": 0.5188934206962585, "rewards/margins": -0.02655869722366333, "rewards/rejected": 0.5454521179199219, "step": 232 }, { "epoch": 0.59, "learning_rate": 9.292043616655239e-08, "logits/chosen": -2.2850754261016846, "logits/rejected": -2.2934367656707764, "logps/chosen": -7.721728801727295, "logps/rejected": -3.9098360538482666, "loss": 0.5844, "rewards/accuracies": 1.0, "rewards/chosen": 0.742002546787262, "rewards/margins": 0.24628376960754395, "rewards/rejected": 0.495718777179718, "step": 233 }, { "epoch": 0.59, "learning_rate": 9.28501482631824e-08, "logits/chosen": -2.3122096061706543, "logits/rejected": -2.316354751586914, "logps/chosen": -2.4474189281463623, "logps/rejected": -1.9077773094177246, "loss": 0.6688, "rewards/accuracies": 1.0, "rewards/chosen": 0.9055013656616211, "rewards/margins": 0.3562697768211365, "rewards/rejected": 0.5492315888404846, "step": 234 }, { "epoch": 0.59, "learning_rate": 9.277954001938817e-08, "logits/chosen": -2.246101140975952, "logits/rejected": -2.248798131942749, "logps/chosen": -1.431087613105774, "logps/rejected": -3.068713903427124, "loss": 0.5824, "rewards/accuracies": 1.0, "rewards/chosen": 0.7407253384590149, "rewards/margins": 0.21344834566116333, "rewards/rejected": 0.5272769927978516, "step": 235 }, { "epoch": 0.6, "learning_rate": 9.270861196302493e-08, "logits/chosen": -2.3090176582336426, "logits/rejected": -2.3232340812683105, "logps/chosen": -0.9432051777839661, "logps/rejected": -13.955750465393066, "loss": 0.6466, "rewards/accuracies": 0.0, "rewards/chosen": 0.5854434370994568, "rewards/margins": -0.21173590421676636, "rewards/rejected": 0.7971793413162231, "step": 236 }, { "epoch": 0.6, "learning_rate": 9.263736462433878e-08, "logits/chosen": -2.2019522190093994, "logits/rejected": -2.1945042610168457, "logps/chosen": -3.8827946186065674, "logps/rejected": -3.809873104095459, "loss": 0.6661, "rewards/accuracies": 0.0, "rewards/chosen": 0.6243717074394226, "rewards/margins": -0.3225492835044861, "rewards/rejected": 0.9469209909439087, "step": 237 }, { "epoch": 0.6, "learning_rate": 9.256579853596271e-08, "logits/chosen": -2.2208356857299805, "logits/rejected": -2.2143936157226562, "logps/chosen": -4.663203716278076, "logps/rejected": -5.906563758850098, "loss": 0.6371, "rewards/accuracies": 1.0, "rewards/chosen": 0.7835738062858582, "rewards/margins": 0.21907877922058105, "rewards/rejected": 0.5644950270652771, "step": 238 }, { "epoch": 0.61, "learning_rate": 9.249391423291262e-08, "logits/chosen": -2.281773805618286, "logits/rejected": -2.2848427295684814, "logps/chosen": -3.37109637260437, "logps/rejected": -8.314682006835938, "loss": 0.7001, "rewards/accuracies": 0.0, "rewards/chosen": 0.6955689787864685, "rewards/margins": -0.2542906403541565, "rewards/rejected": 0.949859619140625, "step": 239 }, { "epoch": 0.61, "learning_rate": 9.242171225258335e-08, "logits/chosen": -2.197601318359375, "logits/rejected": -2.2027361392974854, "logps/chosen": -4.600227355957031, "logps/rejected": -4.562251567840576, "loss": 0.5885, "rewards/accuracies": 1.0, "rewards/chosen": 0.6626486778259277, "rewards/margins": 0.18144267797470093, "rewards/rejected": 0.4812059998512268, "step": 240 }, { "epoch": 0.61, "learning_rate": 9.234919313474463e-08, "logits/chosen": -2.236070156097412, "logits/rejected": -2.258148431777954, "logps/chosen": -2.41971755027771, "logps/rejected": -8.341658592224121, "loss": 0.7519, "rewards/accuracies": 1.0, "rewards/chosen": 0.6630970239639282, "rewards/margins": 0.1588544249534607, "rewards/rejected": 0.5042425990104675, "step": 241 }, { "epoch": 0.61, "learning_rate": 9.227635742153706e-08, "logits/chosen": -2.2417993545532227, "logits/rejected": -2.2374565601348877, "logps/chosen": -2.4195220470428467, "logps/rejected": -4.6854143142700195, "loss": 0.5938, "rewards/accuracies": 0.0, "rewards/chosen": 0.6256970763206482, "rewards/margins": -0.1383659839630127, "rewards/rejected": 0.7640630602836609, "step": 242 }, { "epoch": 0.62, "learning_rate": 9.220320565746804e-08, "logits/chosen": -2.3392460346221924, "logits/rejected": -2.3440794944763184, "logps/chosen": -3.4420876502990723, "logps/rejected": -4.998337745666504, "loss": 0.5886, "rewards/accuracies": 1.0, "rewards/chosen": 0.7443902492523193, "rewards/margins": 0.22851288318634033, "rewards/rejected": 0.515877366065979, "step": 243 }, { "epoch": 0.62, "learning_rate": 9.212973838940773e-08, "logits/chosen": -2.2198235988616943, "logits/rejected": -2.2167978286743164, "logps/chosen": -2.387033224105835, "logps/rejected": -2.7562639713287354, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": 0.8261524438858032, "rewards/margins": 0.3087369203567505, "rewards/rejected": 0.5174155235290527, "step": 244 }, { "epoch": 0.62, "learning_rate": 9.205595616658495e-08, "logits/chosen": -2.1957197189331055, "logits/rejected": -2.1927285194396973, "logps/chosen": -2.9076335430145264, "logps/rejected": -10.30256462097168, "loss": 0.6328, "rewards/accuracies": 0.0, "rewards/chosen": 0.5220659375190735, "rewards/margins": -0.003336310386657715, "rewards/rejected": 0.5254022479057312, "step": 245 }, { "epoch": 0.62, "learning_rate": 9.198185954058305e-08, "logits/chosen": -2.33577561378479, "logits/rejected": -2.3352737426757812, "logps/chosen": -2.1775155067443848, "logps/rejected": -7.098605155944824, "loss": 0.6419, "rewards/accuracies": 0.0, "rewards/chosen": 0.5748474597930908, "rewards/margins": -0.2919183373451233, "rewards/rejected": 0.8667657971382141, "step": 246 }, { "epoch": 0.63, "learning_rate": 9.190744906533577e-08, "logits/chosen": -2.299130916595459, "logits/rejected": -2.29768705368042, "logps/chosen": -0.9778789281845093, "logps/rejected": -5.514305114746094, "loss": 0.6667, "rewards/accuracies": 1.0, "rewards/chosen": 0.7034059166908264, "rewards/margins": 0.11766332387924194, "rewards/rejected": 0.5857425928115845, "step": 247 }, { "epoch": 0.63, "learning_rate": 9.183272529712323e-08, "logits/chosen": -2.2283146381378174, "logits/rejected": -2.25118350982666, "logps/chosen": -6.929495811462402, "logps/rejected": -10.350875854492188, "loss": 0.5892, "rewards/accuracies": 1.0, "rewards/chosen": 0.8321835398674011, "rewards/margins": 0.4411386251449585, "rewards/rejected": 0.3910449147224426, "step": 248 }, { "epoch": 0.63, "learning_rate": 9.175768879456758e-08, "logits/chosen": -2.1869630813598633, "logits/rejected": -2.1804416179656982, "logps/chosen": -0.749173104763031, "logps/rejected": -7.290002822875977, "loss": 0.7281, "rewards/accuracies": 0.0, "rewards/chosen": 0.5911769270896912, "rewards/margins": -0.6300265192985535, "rewards/rejected": 1.2212034463882446, "step": 249 }, { "epoch": 0.63, "learning_rate": 9.168234011862899e-08, "logits/chosen": -2.2606585025787354, "logits/rejected": -2.2601318359375, "logps/chosen": -1.650772213935852, "logps/rejected": -3.725111961364746, "loss": 0.8424, "rewards/accuracies": 0.0, "rewards/chosen": 0.7079368829727173, "rewards/margins": -0.20963448286056519, "rewards/rejected": 0.9175713658332825, "step": 250 }, { "epoch": 0.64, "learning_rate": 9.160667983260131e-08, "logits/chosen": -2.2957544326782227, "logits/rejected": -2.295085906982422, "logps/chosen": -2.0779223442077637, "logps/rejected": -3.2365341186523438, "loss": 0.6967, "rewards/accuracies": 0.0, "rewards/chosen": 0.6708118319511414, "rewards/margins": -0.34767740964889526, "rewards/rejected": 1.0184892416000366, "step": 251 }, { "epoch": 0.64, "learning_rate": 9.153070850210803e-08, "logits/chosen": -2.1925034523010254, "logits/rejected": -2.193441390991211, "logps/chosen": -1.9174015522003174, "logps/rejected": -3.551029682159424, "loss": 0.6108, "rewards/accuracies": 1.0, "rewards/chosen": 0.7031438946723938, "rewards/margins": 0.22674939036369324, "rewards/rejected": 0.47639450430870056, "step": 252 }, { "epoch": 0.64, "learning_rate": 9.145442669509786e-08, "logits/chosen": -2.284529209136963, "logits/rejected": -2.2801573276519775, "logps/chosen": -2.5591859817504883, "logps/rejected": -4.612924575805664, "loss": 0.7006, "rewards/accuracies": 0.0, "rewards/chosen": 0.6015567779541016, "rewards/margins": -0.409440279006958, "rewards/rejected": 1.0109970569610596, "step": 253 }, { "epoch": 0.64, "learning_rate": 9.137783498184064e-08, "logits/chosen": -2.3540947437286377, "logits/rejected": -2.3483006954193115, "logps/chosen": -2.1941146850585938, "logps/rejected": -2.548011064529419, "loss": 0.7014, "rewards/accuracies": 0.0, "rewards/chosen": 0.6107126474380493, "rewards/margins": -0.18776720762252808, "rewards/rejected": 0.7984798550605774, "step": 254 }, { "epoch": 0.65, "learning_rate": 9.130093393492299e-08, "logits/chosen": -2.2954978942871094, "logits/rejected": -2.2973074913024902, "logps/chosen": -4.103755950927734, "logps/rejected": -3.5608766078948975, "loss": 0.7059, "rewards/accuracies": 1.0, "rewards/chosen": 0.8694868087768555, "rewards/margins": 0.340085506439209, "rewards/rejected": 0.5294013023376465, "step": 255 }, { "epoch": 0.65, "learning_rate": 9.122372412924407e-08, "logits/chosen": -2.18109130859375, "logits/rejected": -2.2366979122161865, "logps/chosen": -2.3326473236083984, "logps/rejected": -9.155355453491211, "loss": 0.5612, "rewards/accuracies": 1.0, "rewards/chosen": 0.7286275029182434, "rewards/margins": 0.31147336959838867, "rewards/rejected": 0.41715413331985474, "step": 256 }, { "epoch": 0.65, "learning_rate": 9.114620614201127e-08, "logits/chosen": -2.211893320083618, "logits/rejected": -2.1985790729522705, "logps/chosen": -6.650203704833984, "logps/rejected": -3.172226905822754, "loss": 0.6155, "rewards/accuracies": 1.0, "rewards/chosen": 0.778830349445343, "rewards/margins": 0.10270035266876221, "rewards/rejected": 0.6761299967765808, "step": 257 }, { "epoch": 0.65, "learning_rate": 9.106838055273587e-08, "logits/chosen": -2.267921209335327, "logits/rejected": -2.2491986751556396, "logps/chosen": -3.0498030185699463, "logps/rejected": -12.772116661071777, "loss": 0.6452, "rewards/accuracies": 0.0, "rewards/chosen": 0.6061705946922302, "rewards/margins": -0.22316265106201172, "rewards/rejected": 0.8293332457542419, "step": 258 }, { "epoch": 0.66, "learning_rate": 9.099024794322874e-08, "logits/chosen": -2.131963014602661, "logits/rejected": -2.1402523517608643, "logps/chosen": -4.144197463989258, "logps/rejected": -3.735063314437866, "loss": 0.7179, "rewards/accuracies": 1.0, "rewards/chosen": 0.8132217526435852, "rewards/margins": 0.2908781170845032, "rewards/rejected": 0.522343635559082, "step": 259 }, { "epoch": 0.66, "learning_rate": 9.091180889759602e-08, "logits/chosen": -2.2722325325012207, "logits/rejected": -2.281471014022827, "logps/chosen": -2.2196860313415527, "logps/rejected": -5.404203414916992, "loss": 0.6051, "rewards/accuracies": 1.0, "rewards/chosen": 0.7383219599723816, "rewards/margins": 0.306484580039978, "rewards/rejected": 0.43183737993240356, "step": 260 }, { "epoch": 0.66, "learning_rate": 9.083306400223464e-08, "logits/chosen": -2.2768521308898926, "logits/rejected": -2.2833597660064697, "logps/chosen": -4.607175350189209, "logps/rejected": -2.890796661376953, "loss": 0.6573, "rewards/accuracies": 1.0, "rewards/chosen": 0.7391388416290283, "rewards/margins": 0.11015856266021729, "rewards/rejected": 0.628980278968811, "step": 261 }, { "epoch": 0.66, "learning_rate": 9.075401384582808e-08, "logits/chosen": -2.2697718143463135, "logits/rejected": -2.271639585494995, "logps/chosen": -3.651139497756958, "logps/rejected": -3.0294957160949707, "loss": 0.6552, "rewards/accuracies": 0.0, "rewards/chosen": 0.7090913653373718, "rewards/margins": -0.27413105964660645, "rewards/rejected": 0.9832224249839783, "step": 262 }, { "epoch": 0.67, "learning_rate": 9.067465901934186e-08, "logits/chosen": -2.2063684463500977, "logits/rejected": -2.2115297317504883, "logps/chosen": -2.0481011867523193, "logps/rejected": -4.4699788093566895, "loss": 0.5901, "rewards/accuracies": 1.0, "rewards/chosen": 0.918989360332489, "rewards/margins": 0.46916288137435913, "rewards/rejected": 0.4498264789581299, "step": 263 }, { "epoch": 0.67, "learning_rate": 9.059500011601917e-08, "logits/chosen": -2.226032018661499, "logits/rejected": -2.374464988708496, "logps/chosen": -3.850369691848755, "logps/rejected": -15.380311012268066, "loss": 0.622, "rewards/accuracies": 1.0, "rewards/chosen": 0.6227721571922302, "rewards/margins": 0.0029712915420532227, "rewards/rejected": 0.619800865650177, "step": 264 }, { "epoch": 0.67, "learning_rate": 9.051503773137645e-08, "logits/chosen": -2.158787488937378, "logits/rejected": -2.1572325229644775, "logps/chosen": -1.870935082435608, "logps/rejected": -8.25424861907959, "loss": 0.6021, "rewards/accuracies": 0.0, "rewards/chosen": 0.664730966091156, "rewards/margins": -0.08625936508178711, "rewards/rejected": 0.7509903311729431, "step": 265 }, { "epoch": 0.67, "learning_rate": 9.043477246319888e-08, "logits/chosen": -2.2477407455444336, "logits/rejected": -2.245894193649292, "logps/chosen": -8.948267936706543, "logps/rejected": -4.683255195617676, "loss": 0.7618, "rewards/accuracies": 1.0, "rewards/chosen": 0.8778912425041199, "rewards/margins": 0.3564663529396057, "rewards/rejected": 0.5214248895645142, "step": 266 }, { "epoch": 0.68, "learning_rate": 9.035420491153595e-08, "logits/chosen": -2.197430372238159, "logits/rejected": -2.195192575454712, "logps/chosen": -1.8723909854888916, "logps/rejected": -6.534840106964111, "loss": 0.7895, "rewards/accuracies": 0.0, "rewards/chosen": 0.615483283996582, "rewards/margins": -0.28769493103027344, "rewards/rejected": 0.9031782150268555, "step": 267 }, { "epoch": 0.68, "learning_rate": 9.0273335678697e-08, "logits/chosen": -2.3041152954101562, "logits/rejected": -2.300640106201172, "logps/chosen": -21.557344436645508, "logps/rejected": -12.09442138671875, "loss": 0.6071, "rewards/accuracies": 1.0, "rewards/chosen": 0.7279626727104187, "rewards/margins": 0.1254260540008545, "rewards/rejected": 0.6025366187095642, "step": 268 }, { "epoch": 0.68, "learning_rate": 9.019216536924666e-08, "logits/chosen": -2.242302417755127, "logits/rejected": -2.247436285018921, "logps/chosen": -1.2179433107376099, "logps/rejected": -5.1372575759887695, "loss": 0.6754, "rewards/accuracies": 1.0, "rewards/chosen": 0.7550104260444641, "rewards/margins": 0.31892919540405273, "rewards/rejected": 0.4360812306404114, "step": 269 }, { "epoch": 0.68, "learning_rate": 9.011069459000033e-08, "logits/chosen": -2.202958583831787, "logits/rejected": -2.2104039192199707, "logps/chosen": -6.1925835609436035, "logps/rejected": -4.0015363693237305, "loss": 0.6171, "rewards/accuracies": 1.0, "rewards/chosen": 0.8710241317749023, "rewards/margins": 0.35598665475845337, "rewards/rejected": 0.515037477016449, "step": 270 }, { "epoch": 0.69, "learning_rate": 9.002892395001977e-08, "logits/chosen": -2.3005030155181885, "logits/rejected": -2.303243398666382, "logps/chosen": -2.582320213317871, "logps/rejected": -6.89100456237793, "loss": 0.6185, "rewards/accuracies": 1.0, "rewards/chosen": 0.8467354774475098, "rewards/margins": 0.15313881635665894, "rewards/rejected": 0.6935966610908508, "step": 271 }, { "epoch": 0.69, "learning_rate": 8.994685406060836e-08, "logits/chosen": -2.2188594341278076, "logits/rejected": -2.2166199684143066, "logps/chosen": -2.9071896076202393, "logps/rejected": -8.088216781616211, "loss": 0.7324, "rewards/accuracies": 0.0, "rewards/chosen": 0.5205180048942566, "rewards/margins": -0.05365175008773804, "rewards/rejected": 0.5741697549819946, "step": 272 }, { "epoch": 0.69, "learning_rate": 8.986448553530663e-08, "logits/chosen": -2.1949219703674316, "logits/rejected": -2.267944574356079, "logps/chosen": -3.4666833877563477, "logps/rejected": -19.362464904785156, "loss": 0.6142, "rewards/accuracies": 1.0, "rewards/chosen": 0.6469718813896179, "rewards/margins": 0.1474132239818573, "rewards/rejected": 0.4995586574077606, "step": 273 }, { "epoch": 0.69, "learning_rate": 8.978181898988768e-08, "logits/chosen": -2.3451106548309326, "logits/rejected": -2.337130546569824, "logps/chosen": -1.2892738580703735, "logps/rejected": -8.217554092407227, "loss": 0.6617, "rewards/accuracies": 0.0, "rewards/chosen": 0.6488154530525208, "rewards/margins": -0.22743403911590576, "rewards/rejected": 0.8762494921684265, "step": 274 }, { "epoch": 0.7, "learning_rate": 8.969885504235255e-08, "logits/chosen": -2.192816734313965, "logits/rejected": -2.200608491897583, "logps/chosen": -2.2027900218963623, "logps/rejected": -6.217446327209473, "loss": 0.7197, "rewards/accuracies": 0.0, "rewards/chosen": 0.4527048170566559, "rewards/margins": -0.35998544096946716, "rewards/rejected": 0.812690258026123, "step": 275 }, { "epoch": 0.7, "learning_rate": 8.96155943129256e-08, "logits/chosen": -2.2263071537017822, "logits/rejected": -2.2343649864196777, "logps/chosen": -7.489384174346924, "logps/rejected": -4.902670860290527, "loss": 0.6746, "rewards/accuracies": 1.0, "rewards/chosen": 0.97487872838974, "rewards/margins": 0.47207826375961304, "rewards/rejected": 0.502800464630127, "step": 276 }, { "epoch": 0.7, "learning_rate": 8.95320374240499e-08, "logits/chosen": -2.1761434078216553, "logits/rejected": -2.17470645904541, "logps/chosen": -2.7073817253112793, "logps/rejected": -7.698543548583984, "loss": 0.6632, "rewards/accuracies": 1.0, "rewards/chosen": 0.800475537776947, "rewards/margins": 0.28763753175735474, "rewards/rejected": 0.5128380060195923, "step": 277 }, { "epoch": 0.7, "learning_rate": 8.944818500038256e-08, "logits/chosen": -2.2915360927581787, "logits/rejected": -2.281219959259033, "logps/chosen": -1.7643964290618896, "logps/rejected": -3.934894561767578, "loss": 0.739, "rewards/accuracies": 0.0, "rewards/chosen": 0.5358161330223083, "rewards/margins": -0.1926695704460144, "rewards/rejected": 0.7284857034683228, "step": 278 }, { "epoch": 0.71, "learning_rate": 8.936403766879002e-08, "logits/chosen": -2.2468655109405518, "logits/rejected": -2.255406141281128, "logps/chosen": -7.104115009307861, "logps/rejected": -4.11641788482666, "loss": 0.7026, "rewards/accuracies": 1.0, "rewards/chosen": 0.8683185577392578, "rewards/margins": 0.29565608501434326, "rewards/rejected": 0.5726624727249146, "step": 279 }, { "epoch": 0.71, "learning_rate": 8.927959605834345e-08, "logits/chosen": -2.30476975440979, "logits/rejected": -2.3007442951202393, "logps/chosen": -2.8865606784820557, "logps/rejected": -5.545173168182373, "loss": 0.5946, "rewards/accuracies": 1.0, "rewards/chosen": 0.7287570834159851, "rewards/margins": 0.036641836166381836, "rewards/rejected": 0.6921152472496033, "step": 280 }, { "epoch": 0.71, "learning_rate": 8.919486080031395e-08, "logits/chosen": -2.1894750595092773, "logits/rejected": -2.19014310836792, "logps/chosen": -1.7754311561584473, "logps/rejected": -4.7354655265808105, "loss": 0.6641, "rewards/accuracies": 1.0, "rewards/chosen": 0.7989866137504578, "rewards/margins": 0.3624809682369232, "rewards/rejected": 0.43650564551353455, "step": 281 }, { "epoch": 0.71, "learning_rate": 8.910983252816793e-08, "logits/chosen": -2.088463306427002, "logits/rejected": -2.099806308746338, "logps/chosen": -3.346522808074951, "logps/rejected": -9.435834884643555, "loss": 0.6356, "rewards/accuracies": 0.0, "rewards/chosen": 0.5163276195526123, "rewards/margins": -0.27450549602508545, "rewards/rejected": 0.7908331155776978, "step": 282 }, { "epoch": 0.72, "learning_rate": 8.902451187756226e-08, "logits/chosen": -2.2045493125915527, "logits/rejected": -2.2022905349731445, "logps/chosen": -2.297366142272949, "logps/rejected": -5.061419486999512, "loss": 0.7866, "rewards/accuracies": 0.0, "rewards/chosen": 0.4704340994358063, "rewards/margins": -0.6114003658294678, "rewards/rejected": 1.0818344354629517, "step": 283 }, { "epoch": 0.72, "learning_rate": 8.893889948633967e-08, "logits/chosen": -2.169530153274536, "logits/rejected": -2.1714301109313965, "logps/chosen": -4.691617012023926, "logps/rejected": -2.871093273162842, "loss": 0.7326, "rewards/accuracies": 0.0, "rewards/chosen": 0.5385773777961731, "rewards/margins": -0.23502248525619507, "rewards/rejected": 0.7735998630523682, "step": 284 }, { "epoch": 0.72, "learning_rate": 8.88529959945238e-08, "logits/chosen": -2.2673351764678955, "logits/rejected": -2.277390480041504, "logps/chosen": -9.750621795654297, "logps/rejected": -2.935088872909546, "loss": 0.6048, "rewards/accuracies": 1.0, "rewards/chosen": 0.828974723815918, "rewards/margins": 0.2540435791015625, "rewards/rejected": 0.5749311447143555, "step": 285 }, { "epoch": 0.72, "learning_rate": 8.876680204431459e-08, "logits/chosen": -2.233917236328125, "logits/rejected": -2.234483480453491, "logps/chosen": -4.78733491897583, "logps/rejected": -4.339794158935547, "loss": 0.6759, "rewards/accuracies": 0.0, "rewards/chosen": 0.6527506709098816, "rewards/margins": -0.18711942434310913, "rewards/rejected": 0.8398700952529907, "step": 286 }, { "epoch": 0.73, "learning_rate": 8.868031828008333e-08, "logits/chosen": -2.1624608039855957, "logits/rejected": -2.1676790714263916, "logps/chosen": -3.521009922027588, "logps/rejected": -4.670264720916748, "loss": 0.6342, "rewards/accuracies": 1.0, "rewards/chosen": 0.6993621587753296, "rewards/margins": 0.2728927731513977, "rewards/rejected": 0.4264693856239319, "step": 287 }, { "epoch": 0.73, "learning_rate": 8.859354534836796e-08, "logits/chosen": -2.2437984943389893, "logits/rejected": -2.246872901916504, "logps/chosen": -2.562162160873413, "logps/rejected": -4.5690507888793945, "loss": 0.6326, "rewards/accuracies": 1.0, "rewards/chosen": 0.8507823944091797, "rewards/margins": 0.2956143021583557, "rewards/rejected": 0.555168092250824, "step": 288 }, { "epoch": 0.73, "learning_rate": 8.850648389786816e-08, "logits/chosen": -2.2476389408111572, "logits/rejected": -2.2720062732696533, "logps/chosen": -1.1939027309417725, "logps/rejected": -18.24765968322754, "loss": 0.7421, "rewards/accuracies": 1.0, "rewards/chosen": 0.6840721368789673, "rewards/margins": 0.025335729122161865, "rewards/rejected": 0.6587364077568054, "step": 289 }, { "epoch": 0.73, "learning_rate": 8.841913457944052e-08, "logits/chosen": -2.1503076553344727, "logits/rejected": -2.1473329067230225, "logps/chosen": -2.0443990230560303, "logps/rejected": -9.103983879089355, "loss": 0.676, "rewards/accuracies": 0.0, "rewards/chosen": 0.6198959350585938, "rewards/margins": -0.2548322081565857, "rewards/rejected": 0.8747281432151794, "step": 290 }, { "epoch": 0.74, "learning_rate": 8.833149804609371e-08, "logits/chosen": -2.216059923171997, "logits/rejected": -2.208040475845337, "logps/chosen": -4.014065265655518, "logps/rejected": -8.034974098205566, "loss": 0.7227, "rewards/accuracies": 0.0, "rewards/chosen": 0.435029000043869, "rewards/margins": -0.45601287484169006, "rewards/rejected": 0.8910418748855591, "step": 291 }, { "epoch": 0.74, "learning_rate": 8.824357495298356e-08, "logits/chosen": -1.9906648397445679, "logits/rejected": -2.03056263923645, "logps/chosen": -1.4486112594604492, "logps/rejected": -10.201873779296875, "loss": 0.705, "rewards/accuracies": 1.0, "rewards/chosen": 0.7445021867752075, "rewards/margins": 0.21052801609039307, "rewards/rejected": 0.5339741706848145, "step": 292 }, { "epoch": 0.74, "learning_rate": 8.815536595740815e-08, "logits/chosen": -2.2995619773864746, "logits/rejected": -2.3040366172790527, "logps/chosen": -1.691622018814087, "logps/rejected": -4.516468048095703, "loss": 0.6669, "rewards/accuracies": 1.0, "rewards/chosen": 0.83912193775177, "rewards/margins": 0.33192873001098633, "rewards/rejected": 0.5071932077407837, "step": 293 }, { "epoch": 0.74, "learning_rate": 8.806687171880296e-08, "logits/chosen": -2.1120569705963135, "logits/rejected": -2.1234326362609863, "logps/chosen": -5.35554313659668, "logps/rejected": -13.485841751098633, "loss": 0.6099, "rewards/accuracies": 0.0, "rewards/chosen": 0.4443093240261078, "rewards/margins": -0.2817296087741852, "rewards/rejected": 0.726038932800293, "step": 294 }, { "epoch": 0.75, "learning_rate": 8.797809289873585e-08, "logits/chosen": -2.2712810039520264, "logits/rejected": -2.262749195098877, "logps/chosen": -1.5634406805038452, "logps/rejected": -4.342424392700195, "loss": 0.7169, "rewards/accuracies": 0.0, "rewards/chosen": 0.7479020953178406, "rewards/margins": -0.2845333218574524, "rewards/rejected": 1.032435417175293, "step": 295 }, { "epoch": 0.75, "learning_rate": 8.788903016090221e-08, "logits/chosen": -2.3442413806915283, "logits/rejected": -2.3560752868652344, "logps/chosen": -10.691391944885254, "logps/rejected": -1.5169049501419067, "loss": 0.7488, "rewards/accuracies": 1.0, "rewards/chosen": 0.7104541063308716, "rewards/margins": 0.08779740333557129, "rewards/rejected": 0.6226567029953003, "step": 296 }, { "epoch": 0.75, "learning_rate": 8.779968417111991e-08, "logits/chosen": -2.236016273498535, "logits/rejected": -2.2804949283599854, "logps/chosen": -5.9967851638793945, "logps/rejected": -14.459293365478516, "loss": 0.7163, "rewards/accuracies": 1.0, "rewards/chosen": 0.7345768213272095, "rewards/margins": 0.12779277563095093, "rewards/rejected": 0.6067840456962585, "step": 297 }, { "epoch": 0.75, "learning_rate": 8.771005559732439e-08, "logits/chosen": -2.253784656524658, "logits/rejected": -2.247710704803467, "logps/chosen": -1.512248158454895, "logps/rejected": -4.531370162963867, "loss": 0.7712, "rewards/accuracies": 0.0, "rewards/chosen": 0.6498987078666687, "rewards/margins": -0.21294701099395752, "rewards/rejected": 0.8628457188606262, "step": 298 }, { "epoch": 0.76, "learning_rate": 8.762014510956363e-08, "logits/chosen": -2.243318557739258, "logits/rejected": -2.244884967803955, "logps/chosen": -1.89686918258667, "logps/rejected": -4.153787612915039, "loss": 0.6238, "rewards/accuracies": 1.0, "rewards/chosen": 0.8080507516860962, "rewards/margins": 0.33519986271858215, "rewards/rejected": 0.47285088896751404, "step": 299 }, { "epoch": 0.76, "learning_rate": 8.752995337999315e-08, "logits/chosen": -2.183077812194824, "logits/rejected": -2.1815271377563477, "logps/chosen": -1.772191047668457, "logps/rejected": -8.900092124938965, "loss": 0.8265, "rewards/accuracies": 0.0, "rewards/chosen": 0.625726044178009, "rewards/margins": -0.26620084047317505, "rewards/rejected": 0.8919268846511841, "step": 300 }, { "epoch": 0.76, "learning_rate": 8.7439481082871e-08, "logits/chosen": -2.220825433731079, "logits/rejected": -2.227386951446533, "logps/chosen": -1.619027853012085, "logps/rejected": -9.162134170532227, "loss": 0.6057, "rewards/accuracies": 1.0, "rewards/chosen": 0.8023285865783691, "rewards/margins": 0.37555035948753357, "rewards/rejected": 0.42677822709083557, "step": 301 }, { "epoch": 0.76, "learning_rate": 8.734872889455268e-08, "logits/chosen": -2.2401137351989746, "logits/rejected": -2.237279176712036, "logps/chosen": -1.991098403930664, "logps/rejected": -5.623602867126465, "loss": 0.7042, "rewards/accuracies": 0.0, "rewards/chosen": 0.5893382430076599, "rewards/margins": -0.3182258605957031, "rewards/rejected": 0.907564103603363, "step": 302 }, { "epoch": 0.77, "learning_rate": 8.725769749348612e-08, "logits/chosen": -2.315666913986206, "logits/rejected": -2.313289165496826, "logps/chosen": -2.615548610687256, "logps/rejected": -4.389623641967773, "loss": 0.7381, "rewards/accuracies": 0.0, "rewards/chosen": 0.6097501516342163, "rewards/margins": -0.11332494020462036, "rewards/rejected": 0.7230750918388367, "step": 303 }, { "epoch": 0.77, "learning_rate": 8.71663875602066e-08, "logits/chosen": -2.3547582626342773, "logits/rejected": -2.34637188911438, "logps/chosen": -6.555727958679199, "logps/rejected": -9.53844928741455, "loss": 0.7529, "rewards/accuracies": 0.0, "rewards/chosen": 0.7363740801811218, "rewards/margins": -0.09462577104568481, "rewards/rejected": 0.8309998512268066, "step": 304 }, { "epoch": 0.77, "learning_rate": 8.707479977733168e-08, "logits/chosen": -2.152312994003296, "logits/rejected": -2.141751289367676, "logps/chosen": -9.752217292785645, "logps/rejected": -4.613440990447998, "loss": 0.6106, "rewards/accuracies": 1.0, "rewards/chosen": 0.8925997614860535, "rewards/margins": 0.295462429523468, "rewards/rejected": 0.5971373319625854, "step": 305 }, { "epoch": 0.77, "learning_rate": 8.698293482955604e-08, "logits/chosen": -2.1639552116394043, "logits/rejected": -2.1666717529296875, "logps/chosen": -2.5135862827301025, "logps/rejected": -5.422354221343994, "loss": 0.7272, "rewards/accuracies": 1.0, "rewards/chosen": 0.7608436942100525, "rewards/margins": 0.20702320337295532, "rewards/rejected": 0.5538204908370972, "step": 306 }, { "epoch": 0.78, "learning_rate": 8.689079340364643e-08, "logits/chosen": -2.318451166152954, "logits/rejected": -2.34077787399292, "logps/chosen": -6.480485916137695, "logps/rejected": -9.626208305358887, "loss": 0.6631, "rewards/accuracies": 0.0, "rewards/chosen": 0.5963859558105469, "rewards/margins": -0.12000703811645508, "rewards/rejected": 0.716392993927002, "step": 307 }, { "epoch": 0.78, "learning_rate": 8.679837618843645e-08, "logits/chosen": -2.1891772747039795, "logits/rejected": -2.3649330139160156, "logps/chosen": -2.072601795196533, "logps/rejected": -35.89158248901367, "loss": 0.6273, "rewards/accuracies": 1.0, "rewards/chosen": 0.6795799136161804, "rewards/margins": 0.6074092984199524, "rewards/rejected": 0.07217063754796982, "step": 308 }, { "epoch": 0.78, "learning_rate": 8.670568387482153e-08, "logits/chosen": -2.1883819103240967, "logits/rejected": -2.209129810333252, "logps/chosen": -9.703679084777832, "logps/rejected": -4.281970500946045, "loss": 0.5229, "rewards/accuracies": 1.0, "rewards/chosen": 0.941408097743988, "rewards/margins": 0.38216453790664673, "rewards/rejected": 0.5592435598373413, "step": 309 }, { "epoch": 0.78, "learning_rate": 8.661271715575362e-08, "logits/chosen": -2.345487117767334, "logits/rejected": -2.3659348487854004, "logps/chosen": -3.908374547958374, "logps/rejected": -4.900076866149902, "loss": 0.6509, "rewards/accuracies": 1.0, "rewards/chosen": 0.9498831033706665, "rewards/margins": 0.37324774265289307, "rewards/rejected": 0.5766353607177734, "step": 310 }, { "epoch": 0.79, "learning_rate": 8.651947672623612e-08, "logits/chosen": -2.286099672317505, "logits/rejected": -2.303679943084717, "logps/chosen": -6.183823108673096, "logps/rejected": -3.490060567855835, "loss": 0.656, "rewards/accuracies": 1.0, "rewards/chosen": 0.9999405145645142, "rewards/margins": 0.45399343967437744, "rewards/rejected": 0.5459470748901367, "step": 311 }, { "epoch": 0.79, "learning_rate": 8.642596328331863e-08, "logits/chosen": -2.27144455909729, "logits/rejected": -2.2780535221099854, "logps/chosen": -2.6168551445007324, "logps/rejected": -15.9560546875, "loss": 0.8027, "rewards/accuracies": 0.0, "rewards/chosen": 0.6765827536582947, "rewards/margins": -0.2104419469833374, "rewards/rejected": 0.8870247006416321, "step": 312 }, { "epoch": 0.79, "learning_rate": 8.633217752609177e-08, "logits/chosen": -2.1779916286468506, "logits/rejected": -2.177222967147827, "logps/chosen": -3.9453446865081787, "logps/rejected": -6.437819480895996, "loss": 0.6815, "rewards/accuracies": 0.0, "rewards/chosen": 0.679039478302002, "rewards/margins": -0.05795496702194214, "rewards/rejected": 0.7369944453239441, "step": 313 }, { "epoch": 0.79, "learning_rate": 8.623812015568188e-08, "logits/chosen": -2.279585361480713, "logits/rejected": -2.283596992492676, "logps/chosen": -2.8388237953186035, "logps/rejected": -4.447893142700195, "loss": 0.7664, "rewards/accuracies": 0.0, "rewards/chosen": 0.4745495319366455, "rewards/margins": -0.35512739419937134, "rewards/rejected": 0.8296769261360168, "step": 314 }, { "epoch": 0.8, "learning_rate": 8.614379187524592e-08, "logits/chosen": -2.242426872253418, "logits/rejected": -2.2272865772247314, "logps/chosen": -2.801212787628174, "logps/rejected": -6.297119617462158, "loss": 0.7181, "rewards/accuracies": 0.0, "rewards/chosen": 0.6242383718490601, "rewards/margins": -0.466957688331604, "rewards/rejected": 1.091196060180664, "step": 315 }, { "epoch": 0.8, "learning_rate": 8.604919338996603e-08, "logits/chosen": -2.3031816482543945, "logits/rejected": -2.31207013130188, "logps/chosen": -2.3892548084259033, "logps/rejected": -2.8086109161376953, "loss": 0.7614, "rewards/accuracies": 1.0, "rewards/chosen": 0.831723153591156, "rewards/margins": 0.23823118209838867, "rewards/rejected": 0.5934919714927673, "step": 316 }, { "epoch": 0.8, "learning_rate": 8.595432540704445e-08, "logits/chosen": -2.1956636905670166, "logits/rejected": -2.34889554977417, "logps/chosen": -2.585395574569702, "logps/rejected": -17.05449867248535, "loss": 0.5462, "rewards/accuracies": 1.0, "rewards/chosen": 0.7406458258628845, "rewards/margins": 0.2550129294395447, "rewards/rejected": 0.48563289642333984, "step": 317 }, { "epoch": 0.81, "learning_rate": 8.585918863569805e-08, "logits/chosen": -2.1783487796783447, "logits/rejected": -2.181511402130127, "logps/chosen": -1.9211138486862183, "logps/rejected": -3.6130125522613525, "loss": 0.5335, "rewards/accuracies": 1.0, "rewards/chosen": 0.7220198512077332, "rewards/margins": 0.3115347623825073, "rewards/rejected": 0.41048508882522583, "step": 318 }, { "epoch": 0.81, "learning_rate": 8.57637837871532e-08, "logits/chosen": -2.20171856880188, "logits/rejected": -2.202820062637329, "logps/chosen": -9.355513572692871, "logps/rejected": -8.173850059509277, "loss": 0.5931, "rewards/accuracies": 1.0, "rewards/chosen": 0.7938559651374817, "rewards/margins": 0.36337652802467346, "rewards/rejected": 0.4304794371128082, "step": 319 }, { "epoch": 0.81, "learning_rate": 8.566811157464031e-08, "logits/chosen": -2.1730847358703613, "logits/rejected": -2.1737451553344727, "logps/chosen": -9.176032066345215, "logps/rejected": -7.111884593963623, "loss": 0.5638, "rewards/accuracies": 1.0, "rewards/chosen": 0.7527291178703308, "rewards/margins": 0.17589688301086426, "rewards/rejected": 0.5768322348594666, "step": 320 }, { "epoch": 0.81, "learning_rate": 8.557217271338859e-08, "logits/chosen": -2.206467866897583, "logits/rejected": -2.2102999687194824, "logps/chosen": -2.20160174369812, "logps/rejected": -5.874699115753174, "loss": 0.7357, "rewards/accuracies": 1.0, "rewards/chosen": 0.7322577834129333, "rewards/margins": 0.06994742155075073, "rewards/rejected": 0.6623103618621826, "step": 321 }, { "epoch": 0.82, "learning_rate": 8.547596792062063e-08, "logits/chosen": -2.2414424419403076, "logits/rejected": -2.237563133239746, "logps/chosen": -2.4795403480529785, "logps/rejected": -4.077603340148926, "loss": 0.6753, "rewards/accuracies": 0.0, "rewards/chosen": 0.5272725820541382, "rewards/margins": -0.3390060067176819, "rewards/rejected": 0.8662785887718201, "step": 322 }, { "epoch": 0.82, "learning_rate": 8.537949791554713e-08, "logits/chosen": -2.097848892211914, "logits/rejected": -2.0950472354888916, "logps/chosen": -3.4305830001831055, "logps/rejected": -6.197892189025879, "loss": 0.767, "rewards/accuracies": 0.0, "rewards/chosen": 0.5065140724182129, "rewards/margins": -0.2865312695503235, "rewards/rejected": 0.7930453419685364, "step": 323 }, { "epoch": 0.82, "learning_rate": 8.528276341936144e-08, "logits/chosen": -2.2243618965148926, "logits/rejected": -2.212719202041626, "logps/chosen": -3.07175612449646, "logps/rejected": -7.576408386230469, "loss": 0.6722, "rewards/accuracies": 0.0, "rewards/chosen": 0.5696724057197571, "rewards/margins": -0.24895697832107544, "rewards/rejected": 0.8186293840408325, "step": 324 }, { "epoch": 0.82, "learning_rate": 8.518576515523422e-08, "logits/chosen": -2.2215206623077393, "logits/rejected": -2.2086329460144043, "logps/chosen": -5.074832439422607, "logps/rejected": -3.059757709503174, "loss": 0.5766, "rewards/accuracies": 1.0, "rewards/chosen": 0.9593103528022766, "rewards/margins": 0.2881268858909607, "rewards/rejected": 0.6711834669113159, "step": 325 }, { "epoch": 0.83, "learning_rate": 8.5088503848308e-08, "logits/chosen": -2.177495002746582, "logits/rejected": -2.178893804550171, "logps/chosen": -5.675402641296387, "logps/rejected": -1.3951776027679443, "loss": 0.7133, "rewards/accuracies": 0.0, "rewards/chosen": 0.607022762298584, "rewards/margins": -0.15751802921295166, "rewards/rejected": 0.7645407915115356, "step": 326 }, { "epoch": 0.83, "learning_rate": 8.499098022569176e-08, "logits/chosen": -2.1766927242279053, "logits/rejected": -2.180809736251831, "logps/chosen": -1.9685916900634766, "logps/rejected": -8.270740509033203, "loss": 0.7932, "rewards/accuracies": 0.0, "rewards/chosen": 0.6043161749839783, "rewards/margins": -0.3293313980102539, "rewards/rejected": 0.9336475729942322, "step": 327 }, { "epoch": 0.83, "learning_rate": 8.489319501645553e-08, "logits/chosen": -2.3208131790161133, "logits/rejected": -2.3312759399414062, "logps/chosen": -12.098590850830078, "logps/rejected": -3.11930513381958, "loss": 0.6304, "rewards/accuracies": 1.0, "rewards/chosen": 0.7884374856948853, "rewards/margins": 0.20320886373519897, "rewards/rejected": 0.5852286219596863, "step": 328 }, { "epoch": 0.83, "learning_rate": 8.479514895162494e-08, "logits/chosen": -2.1447744369506836, "logits/rejected": -2.1527352333068848, "logps/chosen": -8.293244361877441, "logps/rejected": -4.035048961639404, "loss": 0.6955, "rewards/accuracies": 1.0, "rewards/chosen": 0.9650313258171082, "rewards/margins": 0.40173208713531494, "rewards/rejected": 0.5632992386817932, "step": 329 }, { "epoch": 0.84, "learning_rate": 8.469684276417568e-08, "logits/chosen": -2.268338680267334, "logits/rejected": -2.2770936489105225, "logps/chosen": -2.582587718963623, "logps/rejected": -5.0640363693237305, "loss": 0.6226, "rewards/accuracies": 1.0, "rewards/chosen": 0.7391448616981506, "rewards/margins": 0.29030537605285645, "rewards/rejected": 0.4488394856452942, "step": 330 }, { "epoch": 0.84, "learning_rate": 8.459827718902808e-08, "logits/chosen": -2.157271385192871, "logits/rejected": -2.1668317317962646, "logps/chosen": -3.1677865982055664, "logps/rejected": -5.292725086212158, "loss": 0.7058, "rewards/accuracies": 1.0, "rewards/chosen": 0.6995550394058228, "rewards/margins": 0.22977682948112488, "rewards/rejected": 0.4697782099246979, "step": 331 }, { "epoch": 0.84, "learning_rate": 8.449945296304166e-08, "logits/chosen": -2.207061290740967, "logits/rejected": -2.2037620544433594, "logps/chosen": -7.992037773132324, "logps/rejected": -4.647127151489258, "loss": 0.6495, "rewards/accuracies": 1.0, "rewards/chosen": 0.839378297328949, "rewards/margins": 0.24182826280593872, "rewards/rejected": 0.5975500345230103, "step": 332 }, { "epoch": 0.84, "learning_rate": 8.440037082500952e-08, "logits/chosen": -2.072334051132202, "logits/rejected": -2.075462579727173, "logps/chosen": -1.9624052047729492, "logps/rejected": -1.9501581192016602, "loss": 0.6692, "rewards/accuracies": 1.0, "rewards/chosen": 0.8313722014427185, "rewards/margins": 0.18380874395370483, "rewards/rejected": 0.6475634574890137, "step": 333 }, { "epoch": 0.85, "learning_rate": 8.430103151565287e-08, "logits/chosen": -2.2372450828552246, "logits/rejected": -2.2629928588867188, "logps/chosen": -2.3961896896362305, "logps/rejected": -20.202756881713867, "loss": 0.5769, "rewards/accuracies": 1.0, "rewards/chosen": 0.8597552180290222, "rewards/margins": 1.0578105449676514, "rewards/rejected": -0.19805526733398438, "step": 334 }, { "epoch": 0.85, "learning_rate": 8.42014357776155e-08, "logits/chosen": -2.329502820968628, "logits/rejected": -2.3565011024475098, "logps/chosen": -3.054839849472046, "logps/rejected": -11.339593887329102, "loss": 0.849, "rewards/accuracies": 1.0, "rewards/chosen": 0.6365544199943542, "rewards/margins": 0.09596812725067139, "rewards/rejected": 0.5405862927436829, "step": 335 }, { "epoch": 0.85, "learning_rate": 8.410158435545824e-08, "logits/chosen": -2.333841323852539, "logits/rejected": -2.32265567779541, "logps/chosen": -2.101069211959839, "logps/rejected": -2.870487689971924, "loss": 0.7708, "rewards/accuracies": 0.0, "rewards/chosen": 0.5697774887084961, "rewards/margins": -0.17023199796676636, "rewards/rejected": 0.7400094866752625, "step": 336 }, { "epoch": 0.85, "learning_rate": 8.400147799565333e-08, "logits/chosen": -2.3242838382720947, "logits/rejected": -2.319779396057129, "logps/chosen": -3.8440628051757812, "logps/rejected": -2.7942957878112793, "loss": 0.65, "rewards/accuracies": 1.0, "rewards/chosen": 0.7411576509475708, "rewards/margins": 0.20476263761520386, "rewards/rejected": 0.5363950133323669, "step": 337 }, { "epoch": 0.86, "learning_rate": 8.390111744657891e-08, "logits/chosen": -2.216092586517334, "logits/rejected": -2.2044005393981934, "logps/chosen": -3.161015272140503, "logps/rejected": -19.4993839263916, "loss": 0.6047, "rewards/accuracies": 1.0, "rewards/chosen": 0.8554036021232605, "rewards/margins": 0.2972937226295471, "rewards/rejected": 0.5581098794937134, "step": 338 }, { "epoch": 0.86, "learning_rate": 8.380050345851335e-08, "logits/chosen": -2.2493059635162354, "logits/rejected": -2.249915599822998, "logps/chosen": -1.5115487575531006, "logps/rejected": -5.103246212005615, "loss": 0.7961, "rewards/accuracies": 1.0, "rewards/chosen": 0.7644926905632019, "rewards/margins": 0.25974392890930176, "rewards/rejected": 0.5047487616539001, "step": 339 }, { "epoch": 0.86, "learning_rate": 8.369963678362977e-08, "logits/chosen": -2.2681610584259033, "logits/rejected": -2.243525266647339, "logps/chosen": -15.088316917419434, "logps/rejected": -3.2392046451568604, "loss": 0.5946, "rewards/accuracies": 1.0, "rewards/chosen": 0.8756274580955505, "rewards/margins": 0.3693932890892029, "rewards/rejected": 0.5062341690063477, "step": 340 }, { "epoch": 0.86, "learning_rate": 8.359851817599026e-08, "logits/chosen": -2.2310595512390137, "logits/rejected": -2.236584186553955, "logps/chosen": -1.7890890836715698, "logps/rejected": -4.870375633239746, "loss": 0.6582, "rewards/accuracies": 1.0, "rewards/chosen": 0.8245722055435181, "rewards/margins": 0.33619511127471924, "rewards/rejected": 0.48837709426879883, "step": 341 }, { "epoch": 0.87, "learning_rate": 8.349714839154034e-08, "logits/chosen": -2.330826997756958, "logits/rejected": -2.330744981765747, "logps/chosen": -3.6830172538757324, "logps/rejected": -5.9170379638671875, "loss": 0.6235, "rewards/accuracies": 0.0, "rewards/chosen": 0.7273632884025574, "rewards/margins": -0.11318087577819824, "rewards/rejected": 0.8405441641807556, "step": 342 }, { "epoch": 0.87, "learning_rate": 8.339552818810329e-08, "logits/chosen": -2.0890519618988037, "logits/rejected": -2.09212064743042, "logps/chosen": -3.493119239807129, "logps/rejected": -4.464483737945557, "loss": 0.5955, "rewards/accuracies": 1.0, "rewards/chosen": 0.7394589781761169, "rewards/margins": 0.28785866498947144, "rewards/rejected": 0.4516003131866455, "step": 343 }, { "epoch": 0.87, "learning_rate": 8.329365832537447e-08, "logits/chosen": -2.224479913711548, "logits/rejected": -2.2259974479675293, "logps/chosen": -1.071984052658081, "logps/rejected": -5.222357273101807, "loss": 0.7151, "rewards/accuracies": 0.0, "rewards/chosen": 0.7189607620239258, "rewards/margins": -0.25483304262161255, "rewards/rejected": 0.9737938046455383, "step": 344 }, { "epoch": 0.87, "learning_rate": 8.319153956491567e-08, "logits/chosen": -2.177751064300537, "logits/rejected": -2.1795310974121094, "logps/chosen": -0.9932639598846436, "logps/rejected": -10.050455093383789, "loss": 0.6573, "rewards/accuracies": 0.0, "rewards/chosen": 0.6284673810005188, "rewards/margins": -0.4034338593482971, "rewards/rejected": 1.031901240348816, "step": 345 }, { "epoch": 0.88, "learning_rate": 8.308917267014939e-08, "logits/chosen": -2.2983384132385254, "logits/rejected": -2.292259931564331, "logps/chosen": -1.947515606880188, "logps/rejected": -4.760142803192139, "loss": 0.6118, "rewards/accuracies": 0.0, "rewards/chosen": 0.6720777750015259, "rewards/margins": -0.11295348405838013, "rewards/rejected": 0.785031259059906, "step": 346 }, { "epoch": 0.88, "learning_rate": 8.29865584063531e-08, "logits/chosen": -2.2273101806640625, "logits/rejected": -2.258272886276245, "logps/chosen": -1.222588300704956, "logps/rejected": -8.25588321685791, "loss": 0.5435, "rewards/accuracies": 1.0, "rewards/chosen": 0.7856465578079224, "rewards/margins": 0.41796019673347473, "rewards/rejected": 0.36768636107444763, "step": 347 }, { "epoch": 0.88, "learning_rate": 8.288369754065362e-08, "logits/chosen": -2.2041914463043213, "logits/rejected": -2.2100000381469727, "logps/chosen": -1.058562994003296, "logps/rejected": -4.4298248291015625, "loss": 0.8363, "rewards/accuracies": 1.0, "rewards/chosen": 0.829197347164154, "rewards/margins": 0.3479756712913513, "rewards/rejected": 0.48122167587280273, "step": 348 }, { "epoch": 0.88, "learning_rate": 8.278059084202129e-08, "logits/chosen": -2.1860742568969727, "logits/rejected": -2.1951208114624023, "logps/chosen": -2.8407633304595947, "logps/rejected": -4.3432512283325195, "loss": 0.6914, "rewards/accuracies": 1.0, "rewards/chosen": 0.8472048044204712, "rewards/margins": 0.3579486906528473, "rewards/rejected": 0.4892561137676239, "step": 349 }, { "epoch": 0.89, "learning_rate": 8.267723908126427e-08, "logits/chosen": -2.2929739952087402, "logits/rejected": -2.291891574859619, "logps/chosen": -0.9881130456924438, "logps/rejected": -6.422429084777832, "loss": 0.6095, "rewards/accuracies": 0.0, "rewards/chosen": 0.7391126155853271, "rewards/margins": -0.2996683120727539, "rewards/rejected": 1.038780927658081, "step": 350 }, { "epoch": 0.89, "learning_rate": 8.257364303102274e-08, "logits/chosen": -2.277364730834961, "logits/rejected": -2.2668213844299316, "logps/chosen": -0.7684206962585449, "logps/rejected": -9.32548713684082, "loss": 0.7455, "rewards/accuracies": 0.0, "rewards/chosen": 0.679717481136322, "rewards/margins": -0.2698183059692383, "rewards/rejected": 0.9495357871055603, "step": 351 }, { "epoch": 0.89, "learning_rate": 8.246980346576317e-08, "logits/chosen": -2.338639736175537, "logits/rejected": -2.346914768218994, "logps/chosen": -12.504185676574707, "logps/rejected": -1.8293689489364624, "loss": 0.5621, "rewards/accuracies": 1.0, "rewards/chosen": 0.9049844145774841, "rewards/margins": 0.17388981580734253, "rewards/rejected": 0.7310945987701416, "step": 352 }, { "epoch": 0.89, "learning_rate": 8.236572116177247e-08, "logits/chosen": -2.224057674407959, "logits/rejected": -2.2393956184387207, "logps/chosen": -7.281949043273926, "logps/rejected": -2.843614101409912, "loss": 0.7075, "rewards/accuracies": 1.0, "rewards/chosen": 0.9706084132194519, "rewards/margins": 0.4820570647716522, "rewards/rejected": 0.4885513484477997, "step": 353 }, { "epoch": 0.9, "learning_rate": 8.226139689715232e-08, "logits/chosen": -2.3584182262420654, "logits/rejected": -2.3579394817352295, "logps/chosen": -0.9786379337310791, "logps/rejected": -7.816969394683838, "loss": 0.6566, "rewards/accuracies": 0.0, "rewards/chosen": 0.6695057153701782, "rewards/margins": -0.29785096645355225, "rewards/rejected": 0.9673566818237305, "step": 354 }, { "epoch": 0.9, "learning_rate": 8.215683145181311e-08, "logits/chosen": -2.273664951324463, "logits/rejected": -2.2747459411621094, "logps/chosen": -3.820547580718994, "logps/rejected": -6.294139862060547, "loss": 0.7275, "rewards/accuracies": 0.0, "rewards/chosen": 0.6190044283866882, "rewards/margins": -0.3773841857910156, "rewards/rejected": 0.9963886141777039, "step": 355 }, { "epoch": 0.9, "learning_rate": 8.205202560746838e-08, "logits/chosen": -2.308523416519165, "logits/rejected": -2.331428050994873, "logps/chosen": -1.8979647159576416, "logps/rejected": -7.409216403961182, "loss": 0.7406, "rewards/accuracies": 1.0, "rewards/chosen": 0.9088670611381531, "rewards/margins": 0.43687260150909424, "rewards/rejected": 0.47199445962905884, "step": 356 }, { "epoch": 0.9, "learning_rate": 8.19469801476288e-08, "logits/chosen": -2.2135236263275146, "logits/rejected": -2.2204020023345947, "logps/chosen": -3.738497734069824, "logps/rejected": -1.800441026687622, "loss": 0.6252, "rewards/accuracies": 1.0, "rewards/chosen": 0.9885876774787903, "rewards/margins": 0.2910701036453247, "rewards/rejected": 0.6975175738334656, "step": 357 }, { "epoch": 0.91, "learning_rate": 8.184169585759637e-08, "logits/chosen": -2.2148451805114746, "logits/rejected": -2.2170610427856445, "logps/chosen": -2.6669840812683105, "logps/rejected": -7.289400577545166, "loss": 0.5775, "rewards/accuracies": 0.0, "rewards/chosen": 0.5533513426780701, "rewards/margins": -0.09924030303955078, "rewards/rejected": 0.6525916457176208, "step": 358 }, { "epoch": 0.91, "learning_rate": 8.173617352445852e-08, "logits/chosen": -2.1738789081573486, "logits/rejected": -2.1692800521850586, "logps/chosen": -2.6644935607910156, "logps/rejected": -2.839940309524536, "loss": 0.6348, "rewards/accuracies": 1.0, "rewards/chosen": 0.8727187514305115, "rewards/margins": 0.3496180772781372, "rewards/rejected": 0.5231006741523743, "step": 359 }, { "epoch": 0.91, "learning_rate": 8.16304139370823e-08, "logits/chosen": -2.200289011001587, "logits/rejected": -2.195071220397949, "logps/chosen": -2.1284477710723877, "logps/rejected": -9.23105525970459, "loss": 0.7009, "rewards/accuracies": 0.0, "rewards/chosen": 0.5320340394973755, "rewards/margins": -0.15240496397018433, "rewards/rejected": 0.6844390034675598, "step": 360 }, { "epoch": 0.91, "learning_rate": 8.152441788610842e-08, "logits/chosen": -2.202220916748047, "logits/rejected": -2.3553779125213623, "logps/chosen": -4.000711441040039, "logps/rejected": -15.499092102050781, "loss": 0.7002, "rewards/accuracies": 1.0, "rewards/chosen": 0.7180855870246887, "rewards/margins": 0.15628594160079956, "rewards/rejected": 0.5617996454238892, "step": 361 }, { "epoch": 0.92, "learning_rate": 8.14181861639453e-08, "logits/chosen": -2.1212961673736572, "logits/rejected": -2.1164891719818115, "logps/chosen": -3.4997830390930176, "logps/rejected": -5.338700771331787, "loss": 0.7171, "rewards/accuracies": 0.0, "rewards/chosen": 0.5904927849769592, "rewards/margins": -0.12679564952850342, "rewards/rejected": 0.7172884345054626, "step": 362 }, { "epoch": 0.92, "learning_rate": 8.131171956476327e-08, "logits/chosen": -2.23236083984375, "logits/rejected": -2.240609645843506, "logps/chosen": -1.9455822706222534, "logps/rejected": -2.1049888134002686, "loss": 0.5807, "rewards/accuracies": 1.0, "rewards/chosen": 0.9161492586135864, "rewards/margins": 0.348268985748291, "rewards/rejected": 0.5678802728652954, "step": 363 }, { "epoch": 0.92, "learning_rate": 8.120501888448851e-08, "logits/chosen": -2.3188841342926025, "logits/rejected": -2.326631546020508, "logps/chosen": -5.025043487548828, "logps/rejected": -2.7313549518585205, "loss": 0.5586, "rewards/accuracies": 1.0, "rewards/chosen": 0.7492130398750305, "rewards/margins": 0.1915358304977417, "rewards/rejected": 0.5576772093772888, "step": 364 }, { "epoch": 0.92, "learning_rate": 8.109808492079717e-08, "logits/chosen": -2.0827434062957764, "logits/rejected": -2.106131076812744, "logps/chosen": -5.696326732635498, "logps/rejected": -8.51596450805664, "loss": 0.6001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9885935187339783, "rewards/margins": 0.6211767196655273, "rewards/rejected": 0.36741676926612854, "step": 365 }, { "epoch": 0.93, "learning_rate": 8.099091847310939e-08, "logits/chosen": -2.380394458770752, "logits/rejected": -2.3773317337036133, "logps/chosen": -2.9979774951934814, "logps/rejected": -4.452147960662842, "loss": 0.7826, "rewards/accuracies": 0.0, "rewards/chosen": 0.5605355501174927, "rewards/margins": -0.45206522941589355, "rewards/rejected": 1.0126007795333862, "step": 366 }, { "epoch": 0.93, "learning_rate": 8.08835203425833e-08, "logits/chosen": -2.247316360473633, "logits/rejected": -2.2447993755340576, "logps/chosen": -4.119555473327637, "logps/rejected": -2.6000144481658936, "loss": 0.5357, "rewards/accuracies": 0.0, "rewards/chosen": 0.6690663695335388, "rewards/margins": -0.23955440521240234, "rewards/rejected": 0.9086207747459412, "step": 367 }, { "epoch": 0.93, "learning_rate": 8.07758913321091e-08, "logits/chosen": -2.328505754470825, "logits/rejected": -2.3388359546661377, "logps/chosen": -0.605301022529602, "logps/rejected": -11.985595703125, "loss": 0.8454, "rewards/accuracies": 0.0, "rewards/chosen": 0.7276735305786133, "rewards/margins": -0.04498577117919922, "rewards/rejected": 0.7726593017578125, "step": 368 }, { "epoch": 0.93, "learning_rate": 8.066803224630294e-08, "logits/chosen": -2.228696823120117, "logits/rejected": -2.241363525390625, "logps/chosen": -2.3438243865966797, "logps/rejected": -4.095362663269043, "loss": 0.5576, "rewards/accuracies": 1.0, "rewards/chosen": 0.7338985800743103, "rewards/margins": 0.3430877923965454, "rewards/rejected": 0.3908107876777649, "step": 369 }, { "epoch": 0.94, "learning_rate": 8.055994389150103e-08, "logits/chosen": -2.202580690383911, "logits/rejected": -2.2171823978424072, "logps/chosen": -5.542592525482178, "logps/rejected": -1.6025339365005493, "loss": 0.5683, "rewards/accuracies": 1.0, "rewards/chosen": 0.9187429547309875, "rewards/margins": 0.20233291387557983, "rewards/rejected": 0.7164100408554077, "step": 370 }, { "epoch": 0.94, "learning_rate": 8.045162707575353e-08, "logits/chosen": -2.2596538066864014, "logits/rejected": -2.259416341781616, "logps/chosen": -1.977871060371399, "logps/rejected": -2.7332265377044678, "loss": 0.6531, "rewards/accuracies": 0.0, "rewards/chosen": 0.6990774273872375, "rewards/margins": -0.2949962615966797, "rewards/rejected": 0.9940736889839172, "step": 371 }, { "epoch": 0.94, "learning_rate": 8.034308260881852e-08, "logits/chosen": -2.340092182159424, "logits/rejected": -2.338604688644409, "logps/chosen": -2.2602286338806152, "logps/rejected": -5.281639099121094, "loss": 0.608, "rewards/accuracies": 0.0, "rewards/chosen": 0.6336522698402405, "rewards/margins": -0.1550225019454956, "rewards/rejected": 0.7886747717857361, "step": 372 }, { "epoch": 0.94, "learning_rate": 8.023431130215605e-08, "logits/chosen": -2.423292398452759, "logits/rejected": -2.423827886581421, "logps/chosen": -4.642886638641357, "logps/rejected": -2.2274997234344482, "loss": 0.5628, "rewards/accuracies": 1.0, "rewards/chosen": 0.9482179880142212, "rewards/margins": 0.38083696365356445, "rewards/rejected": 0.5673810243606567, "step": 373 }, { "epoch": 0.95, "learning_rate": 8.012531396892184e-08, "logits/chosen": -2.360987663269043, "logits/rejected": -2.36016583442688, "logps/chosen": -0.5359525680541992, "logps/rejected": -5.5331010818481445, "loss": 0.7673, "rewards/accuracies": 0.0, "rewards/chosen": 0.5762252807617188, "rewards/margins": -0.39992016553878784, "rewards/rejected": 0.9761454463005066, "step": 374 }, { "epoch": 0.95, "learning_rate": 8.001609142396149e-08, "logits/chosen": -2.2617475986480713, "logits/rejected": -2.251276731491089, "logps/chosen": -2.081864356994629, "logps/rejected": -11.059087753295898, "loss": 0.7769, "rewards/accuracies": 0.0, "rewards/chosen": 0.5977030992507935, "rewards/margins": -0.1849154233932495, "rewards/rejected": 0.782618522644043, "step": 375 }, { "epoch": 0.95, "learning_rate": 7.99066444838041e-08, "logits/chosen": -2.269181251525879, "logits/rejected": -2.2780513763427734, "logps/chosen": -5.9703474044799805, "logps/rejected": -3.0404720306396484, "loss": 0.6736, "rewards/accuracies": 1.0, "rewards/chosen": 0.9039304852485657, "rewards/margins": 0.3886423110961914, "rewards/rejected": 0.5152881741523743, "step": 376 }, { "epoch": 0.95, "learning_rate": 7.979697396665648e-08, "logits/chosen": -2.2512214183807373, "logits/rejected": -2.2572855949401855, "logps/chosen": -6.030575752258301, "logps/rejected": -4.728835105895996, "loss": 0.5863, "rewards/accuracies": 1.0, "rewards/chosen": 0.9385878443717957, "rewards/margins": 0.4533027410507202, "rewards/rejected": 0.48528510332107544, "step": 377 }, { "epoch": 0.96, "learning_rate": 7.968708069239671e-08, "logits/chosen": -2.359306573867798, "logits/rejected": -2.423867702484131, "logps/chosen": -2.666351795196533, "logps/rejected": -26.87596893310547, "loss": 0.5746, "rewards/accuracies": 1.0, "rewards/chosen": 0.6657917499542236, "rewards/margins": 0.3256281316280365, "rewards/rejected": 0.34016361832618713, "step": 378 }, { "epoch": 0.96, "learning_rate": 7.957696548256827e-08, "logits/chosen": -2.231562376022339, "logits/rejected": -2.2302753925323486, "logps/chosen": -2.0480175018310547, "logps/rejected": -4.05470609664917, "loss": 0.7442, "rewards/accuracies": 1.0, "rewards/chosen": 0.7992755770683289, "rewards/margins": 0.29143762588500977, "rewards/rejected": 0.5078379511833191, "step": 379 }, { "epoch": 0.96, "learning_rate": 7.946662916037372e-08, "logits/chosen": -2.116694688796997, "logits/rejected": -2.1297640800476074, "logps/chosen": -2.833132266998291, "logps/rejected": -4.5728373527526855, "loss": 0.6899, "rewards/accuracies": 1.0, "rewards/chosen": 0.833818256855011, "rewards/margins": 0.26866620779037476, "rewards/rejected": 0.5651520490646362, "step": 380 }, { "epoch": 0.96, "learning_rate": 7.935607255066865e-08, "logits/chosen": -2.32173752784729, "logits/rejected": -2.3181471824645996, "logps/chosen": -1.6778948307037354, "logps/rejected": -2.02590012550354, "loss": 0.6828, "rewards/accuracies": 0.0, "rewards/chosen": 0.6788627505302429, "rewards/margins": -0.3151640295982361, "rewards/rejected": 0.994026780128479, "step": 381 }, { "epoch": 0.97, "learning_rate": 7.924529647995548e-08, "logits/chosen": -2.2112350463867188, "logits/rejected": -2.2820355892181396, "logps/chosen": -4.019567966461182, "logps/rejected": -19.789417266845703, "loss": 0.6592, "rewards/accuracies": 1.0, "rewards/chosen": 0.6489843130111694, "rewards/margins": 0.2044999897480011, "rewards/rejected": 0.44448432326316833, "step": 382 }, { "epoch": 0.97, "learning_rate": 7.91343017763773e-08, "logits/chosen": -2.246288776397705, "logits/rejected": -2.2497456073760986, "logps/chosen": -4.844240188598633, "logps/rejected": -4.943772792816162, "loss": 0.7941, "rewards/accuracies": 1.0, "rewards/chosen": 0.7458907961845398, "rewards/margins": 0.2725493609905243, "rewards/rejected": 0.4733414351940155, "step": 383 }, { "epoch": 0.97, "learning_rate": 7.902308926971165e-08, "logits/chosen": -2.2446107864379883, "logits/rejected": -2.2493741512298584, "logps/chosen": -2.2488977909088135, "logps/rejected": -3.7276408672332764, "loss": 0.6876, "rewards/accuracies": 1.0, "rewards/chosen": 0.852028489112854, "rewards/margins": 0.172602117061615, "rewards/rejected": 0.679426372051239, "step": 384 }, { "epoch": 0.97, "learning_rate": 7.891165979136428e-08, "logits/chosen": -2.2607409954071045, "logits/rejected": -2.247607707977295, "logps/chosen": -2.156735420227051, "logps/rejected": -7.693221092224121, "loss": 0.7544, "rewards/accuracies": 0.0, "rewards/chosen": 0.6366636157035828, "rewards/margins": -0.36613041162490845, "rewards/rejected": 1.0027940273284912, "step": 385 }, { "epoch": 0.98, "learning_rate": 7.880001417436308e-08, "logits/chosen": -2.2878150939941406, "logits/rejected": -2.2957236766815186, "logps/chosen": -2.6464500427246094, "logps/rejected": -6.2549333572387695, "loss": 0.652, "rewards/accuracies": 1.0, "rewards/chosen": 0.8429710268974304, "rewards/margins": 0.33660924434661865, "rewards/rejected": 0.5063617825508118, "step": 386 }, { "epoch": 0.98, "learning_rate": 7.868815325335168e-08, "logits/chosen": -2.219308614730835, "logits/rejected": -2.2283735275268555, "logps/chosen": -2.3667070865631104, "logps/rejected": -6.436010837554932, "loss": 0.703, "rewards/accuracies": 0.0, "rewards/chosen": 0.6740894317626953, "rewards/margins": -0.261465847492218, "rewards/rejected": 0.9355552792549133, "step": 387 }, { "epoch": 0.98, "learning_rate": 7.857607786458331e-08, "logits/chosen": -2.3103175163269043, "logits/rejected": -2.3189237117767334, "logps/chosen": -2.5321176052093506, "logps/rejected": -11.341991424560547, "loss": 0.6412, "rewards/accuracies": 0.0, "rewards/chosen": 0.7412481307983398, "rewards/margins": -0.0895269513130188, "rewards/rejected": 0.8307750821113586, "step": 388 }, { "epoch": 0.98, "learning_rate": 7.846378884591453e-08, "logits/chosen": -2.378457546234131, "logits/rejected": -2.3687820434570312, "logps/chosen": -1.3397738933563232, "logps/rejected": -7.507501125335693, "loss": 0.7214, "rewards/accuracies": 0.0, "rewards/chosen": 0.6187364459037781, "rewards/margins": -0.3701053857803345, "rewards/rejected": 0.9888418316841125, "step": 389 }, { "epoch": 0.99, "learning_rate": 7.835128703679896e-08, "logits/chosen": -2.253490924835205, "logits/rejected": -2.243727207183838, "logps/chosen": -3.899393320083618, "logps/rejected": -6.241485118865967, "loss": 0.6234, "rewards/accuracies": 0.0, "rewards/chosen": 0.6363806128501892, "rewards/margins": -0.343153178691864, "rewards/rejected": 0.9795337915420532, "step": 390 }, { "epoch": 0.99, "learning_rate": 7.823857327828098e-08, "logits/chosen": -2.2635836601257324, "logits/rejected": -2.2859184741973877, "logps/chosen": -1.5206947326660156, "logps/rejected": -10.410870552062988, "loss": 0.5212, "rewards/accuracies": 1.0, "rewards/chosen": 0.8542820811271667, "rewards/margins": 0.3765828013420105, "rewards/rejected": 0.47769927978515625, "step": 391 }, { "epoch": 0.99, "learning_rate": 7.812564841298951e-08, "logits/chosen": -2.2693920135498047, "logits/rejected": -2.2748806476593018, "logps/chosen": -4.373507022857666, "logps/rejected": -9.40855598449707, "loss": 0.776, "rewards/accuracies": 0.0, "rewards/chosen": 0.7026895880699158, "rewards/margins": -0.03365743160247803, "rewards/rejected": 0.7363470196723938, "step": 392 }, { "epoch": 0.99, "learning_rate": 7.801251328513164e-08, "logits/chosen": -2.2481489181518555, "logits/rejected": -2.251373052597046, "logps/chosen": -1.1319806575775146, "logps/rejected": -2.7873971462249756, "loss": 0.596, "rewards/accuracies": 1.0, "rewards/chosen": 0.8005113005638123, "rewards/margins": 0.28369438648223877, "rewards/rejected": 0.5168169140815735, "step": 393 }, { "epoch": 1.0, "learning_rate": 7.789916874048633e-08, "logits/chosen": -2.270541191101074, "logits/rejected": -2.2748405933380127, "logps/chosen": -2.4769484996795654, "logps/rejected": -5.688924312591553, "loss": 0.5866, "rewards/accuracies": 1.0, "rewards/chosen": 0.8210717439651489, "rewards/margins": 0.3988051116466522, "rewards/rejected": 0.4222666323184967, "step": 394 }, { "epoch": 1.0, "learning_rate": 7.778561562639817e-08, "logits/chosen": -2.259829521179199, "logits/rejected": -2.259749412536621, "logps/chosen": -3.8417892456054688, "logps/rejected": -5.474167346954346, "loss": 0.6722, "rewards/accuracies": 0.0, "rewards/chosen": 0.5837898254394531, "rewards/margins": -0.19307953119277954, "rewards/rejected": 0.7768693566322327, "step": 395 }, { "epoch": 1.0, "learning_rate": 7.767185479177093e-08, "logits/chosen": -2.147796154022217, "logits/rejected": -2.1894478797912598, "logps/chosen": -1.9993423223495483, "logps/rejected": -6.630356311798096, "loss": 0.7114, "rewards/accuracies": 0.0, "rewards/chosen": 0.5351231694221497, "rewards/margins": -0.3458634614944458, "rewards/rejected": 0.8809866309165955, "step": 396 }, { "epoch": 1.01, "learning_rate": 7.755788708706124e-08, "logits/chosen": -2.3079943656921387, "logits/rejected": -2.3205747604370117, "logps/chosen": -4.740902900695801, "logps/rejected": -4.6586737632751465, "loss": 0.4792, "rewards/accuracies": 1.0, "rewards/chosen": 0.9248821139335632, "rewards/margins": 0.3195008635520935, "rewards/rejected": 0.6053812503814697, "step": 397 }, { "epoch": 1.01, "learning_rate": 7.74437133642723e-08, "logits/chosen": -2.1954543590545654, "logits/rejected": -2.184659242630005, "logps/chosen": -1.5312540531158447, "logps/rejected": -6.643449783325195, "loss": 0.7927, "rewards/accuracies": 0.0, "rewards/chosen": 0.6587980389595032, "rewards/margins": -0.12481081485748291, "rewards/rejected": 0.7836088538169861, "step": 398 }, { "epoch": 1.01, "learning_rate": 7.732933447694747e-08, "logits/chosen": -2.2088463306427, "logits/rejected": -2.201355457305908, "logps/chosen": -4.010442733764648, "logps/rejected": -5.220598220825195, "loss": 0.6189, "rewards/accuracies": 1.0, "rewards/chosen": 0.848849892616272, "rewards/margins": 0.2157583236694336, "rewards/rejected": 0.6330915689468384, "step": 399 }, { "epoch": 1.01, "learning_rate": 7.721475128016385e-08, "logits/chosen": -2.210902690887451, "logits/rejected": -2.1996097564697266, "logps/chosen": -2.600698947906494, "logps/rejected": -5.94119119644165, "loss": 0.5951, "rewards/accuracies": 0.0, "rewards/chosen": 0.6099860072135925, "rewards/margins": -0.032867491245269775, "rewards/rejected": 0.6428534984588623, "step": 400 }, { "epoch": 1.02, "learning_rate": 7.709996463052595e-08, "logits/chosen": -2.331876754760742, "logits/rejected": -2.328106164932251, "logps/chosen": -3.1729655265808105, "logps/rejected": -3.6846489906311035, "loss": 0.7844, "rewards/accuracies": 0.0, "rewards/chosen": 0.6300426125526428, "rewards/margins": -0.15318238735198975, "rewards/rejected": 0.7832249999046326, "step": 401 }, { "epoch": 1.02, "learning_rate": 7.698497538615926e-08, "logits/chosen": -2.29887056350708, "logits/rejected": -2.300222635269165, "logps/chosen": -1.1026134490966797, "logps/rejected": -9.129067420959473, "loss": 0.7143, "rewards/accuracies": 0.0, "rewards/chosen": 0.6394963264465332, "rewards/margins": -0.2605976462364197, "rewards/rejected": 0.9000939726829529, "step": 402 }, { "epoch": 1.02, "learning_rate": 7.686978440670379e-08, "logits/chosen": -2.293678045272827, "logits/rejected": -2.2910656929016113, "logps/chosen": -3.2024128437042236, "logps/rejected": -3.834515333175659, "loss": 0.6887, "rewards/accuracies": 1.0, "rewards/chosen": 0.8833382725715637, "rewards/margins": 0.2858904004096985, "rewards/rejected": 0.5974478721618652, "step": 403 }, { "epoch": 1.02, "learning_rate": 7.675439255330776e-08, "logits/chosen": -2.2779901027679443, "logits/rejected": -2.280423879623413, "logps/chosen": -0.823506236076355, "logps/rejected": -5.224335193634033, "loss": 0.7191, "rewards/accuracies": 1.0, "rewards/chosen": 0.7973812222480774, "rewards/margins": 0.38007017970085144, "rewards/rejected": 0.41731104254722595, "step": 404 }, { "epoch": 1.03, "learning_rate": 7.663880068862105e-08, "logits/chosen": -2.2388579845428467, "logits/rejected": -2.231778144836426, "logps/chosen": -1.5682412385940552, "logps/rejected": -5.487204551696777, "loss": 0.7303, "rewards/accuracies": 0.0, "rewards/chosen": 0.7137226462364197, "rewards/margins": -0.0501747727394104, "rewards/rejected": 0.7638974189758301, "step": 405 }, { "epoch": 1.03, "learning_rate": 7.652300967678873e-08, "logits/chosen": -2.183035373687744, "logits/rejected": -2.176054000854492, "logps/chosen": -2.814701795578003, "logps/rejected": -8.01062297821045, "loss": 0.6162, "rewards/accuracies": 0.0, "rewards/chosen": 0.6211733222007751, "rewards/margins": -0.16233432292938232, "rewards/rejected": 0.7835076451301575, "step": 406 }, { "epoch": 1.03, "learning_rate": 7.64070203834448e-08, "logits/chosen": -2.237717628479004, "logits/rejected": -2.2390244007110596, "logps/chosen": -7.854271411895752, "logps/rejected": -5.203117847442627, "loss": 0.5323, "rewards/accuracies": 1.0, "rewards/chosen": 0.9872909784317017, "rewards/margins": 0.5178523659706116, "rewards/rejected": 0.4694386124610901, "step": 407 }, { "epoch": 1.03, "learning_rate": 7.629083367570545e-08, "logits/chosen": -2.184540033340454, "logits/rejected": -2.1884050369262695, "logps/chosen": -3.819587230682373, "logps/rejected": -4.163711071014404, "loss": 0.5626, "rewards/accuracies": 1.0, "rewards/chosen": 0.7407127022743225, "rewards/margins": 0.21965265274047852, "rewards/rejected": 0.521060049533844, "step": 408 }, { "epoch": 1.04, "learning_rate": 7.617445042216278e-08, "logits/chosen": -2.2015137672424316, "logits/rejected": -2.1980772018432617, "logps/chosen": -1.4585899114608765, "logps/rejected": -9.02827262878418, "loss": 0.6098, "rewards/accuracies": 1.0, "rewards/chosen": 0.8687307238578796, "rewards/margins": 0.06818217039108276, "rewards/rejected": 0.8005485534667969, "step": 409 }, { "epoch": 1.04, "learning_rate": 7.605787149287818e-08, "logits/chosen": -2.19966721534729, "logits/rejected": -2.235529661178589, "logps/chosen": -1.3607633113861084, "logps/rejected": -8.718774795532227, "loss": 0.5871, "rewards/accuracies": 1.0, "rewards/chosen": 0.873500645160675, "rewards/margins": 0.05634152889251709, "rewards/rejected": 0.817159116268158, "step": 410 }, { "epoch": 1.04, "learning_rate": 7.594109775937594e-08, "logits/chosen": -2.3401715755462646, "logits/rejected": -2.3330955505371094, "logps/chosen": -1.5145022869110107, "logps/rejected": -4.36183500289917, "loss": 0.7143, "rewards/accuracies": 0.0, "rewards/chosen": 0.691491425037384, "rewards/margins": -0.22489672899246216, "rewards/rejected": 0.9163881540298462, "step": 411 }, { "epoch": 1.04, "learning_rate": 7.582413009463664e-08, "logits/chosen": -2.2396678924560547, "logits/rejected": -2.24534010887146, "logps/chosen": -1.9427504539489746, "logps/rejected": -5.391425132751465, "loss": 0.5982, "rewards/accuracies": 1.0, "rewards/chosen": 0.875008225440979, "rewards/margins": 0.31692832708358765, "rewards/rejected": 0.5580798983573914, "step": 412 }, { "epoch": 1.05, "learning_rate": 7.570696937309063e-08, "logits/chosen": -2.2236053943634033, "logits/rejected": -2.2085838317871094, "logps/chosen": -2.8373310565948486, "logps/rejected": -7.7028584480285645, "loss": 0.6832, "rewards/accuracies": 0.0, "rewards/chosen": 0.593114972114563, "rewards/margins": -0.21286934614181519, "rewards/rejected": 0.8059843182563782, "step": 413 }, { "epoch": 1.05, "learning_rate": 7.558961647061155e-08, "logits/chosen": -2.4276812076568604, "logits/rejected": -2.415883779525757, "logps/chosen": -3.419950485229492, "logps/rejected": -8.453217506408691, "loss": 0.8205, "rewards/accuracies": 0.0, "rewards/chosen": 0.5604274868965149, "rewards/margins": -0.2869265675544739, "rewards/rejected": 0.8473540544509888, "step": 414 }, { "epoch": 1.05, "learning_rate": 7.547207226450979e-08, "logits/chosen": -2.2608091831207275, "logits/rejected": -2.2481865882873535, "logps/chosen": -2.1075029373168945, "logps/rejected": -7.647358417510986, "loss": 0.7485, "rewards/accuracies": 0.0, "rewards/chosen": 0.6415868997573853, "rewards/margins": -0.36579346656799316, "rewards/rejected": 1.0073803663253784, "step": 415 }, { "epoch": 1.05, "learning_rate": 7.53543376335258e-08, "logits/chosen": -2.278745651245117, "logits/rejected": -2.288760185241699, "logps/chosen": -3.2052764892578125, "logps/rejected": -4.218389511108398, "loss": 0.6392, "rewards/accuracies": 1.0, "rewards/chosen": 0.7811346054077148, "rewards/margins": 0.2040243148803711, "rewards/rejected": 0.5771102905273438, "step": 416 }, { "epoch": 1.06, "learning_rate": 7.523641345782373e-08, "logits/chosen": -2.1489460468292236, "logits/rejected": -2.1269562244415283, "logps/chosen": -5.211057662963867, "logps/rejected": -3.028846502304077, "loss": 0.6664, "rewards/accuracies": 1.0, "rewards/chosen": 0.7045688033103943, "rewards/margins": 0.18030500411987305, "rewards/rejected": 0.5242637991905212, "step": 417 }, { "epoch": 1.06, "learning_rate": 7.511830061898463e-08, "logits/chosen": -2.334894895553589, "logits/rejected": -2.328826427459717, "logps/chosen": -3.432152032852173, "logps/rejected": -4.685551643371582, "loss": 0.6775, "rewards/accuracies": 1.0, "rewards/chosen": 0.748375415802002, "rewards/margins": 0.20568257570266724, "rewards/rejected": 0.5426928400993347, "step": 418 }, { "epoch": 1.06, "learning_rate": 7.5e-08, "logits/chosen": -2.34116792678833, "logits/rejected": -2.3468828201293945, "logps/chosen": -5.566479206085205, "logps/rejected": -3.6845037937164307, "loss": 0.6258, "rewards/accuracies": 1.0, "rewards/chosen": 1.2184444665908813, "rewards/margins": 0.7216495275497437, "rewards/rejected": 0.4967949092388153, "step": 419 }, { "epoch": 1.06, "learning_rate": 7.488151248526517e-08, "logits/chosen": -2.286562442779541, "logits/rejected": -2.2942206859588623, "logps/chosen": -2.5666422843933105, "logps/rejected": -6.284739017486572, "loss": 0.6231, "rewards/accuracies": 1.0, "rewards/chosen": 0.8509517908096313, "rewards/margins": 0.34757059812545776, "rewards/rejected": 0.5033811926841736, "step": 420 }, { "epoch": 1.07, "learning_rate": 7.476283896057266e-08, "logits/chosen": -2.318882703781128, "logits/rejected": -2.3301331996917725, "logps/chosen": -7.529684066772461, "logps/rejected": -3.3515453338623047, "loss": 0.6276, "rewards/accuracies": 1.0, "rewards/chosen": 0.9218055605888367, "rewards/margins": 0.3159545660018921, "rewards/rejected": 0.6058509945869446, "step": 421 }, { "epoch": 1.07, "learning_rate": 7.464398031310555e-08, "logits/chosen": -2.3713150024414062, "logits/rejected": -2.3674919605255127, "logps/chosen": -2.3346445560455322, "logps/rejected": -3.396857738494873, "loss": 0.4851, "rewards/accuracies": 1.0, "rewards/chosen": 0.8829626441001892, "rewards/margins": 0.372031569480896, "rewards/rejected": 0.5109310746192932, "step": 422 }, { "epoch": 1.07, "learning_rate": 7.452493743143091e-08, "logits/chosen": -2.1731021404266357, "logits/rejected": -2.1739602088928223, "logps/chosen": -1.0090785026550293, "logps/rejected": -8.194581985473633, "loss": 0.6187, "rewards/accuracies": 0.0, "rewards/chosen": 0.5841904282569885, "rewards/margins": -0.4898114800453186, "rewards/rejected": 1.0740019083023071, "step": 423 }, { "epoch": 1.07, "learning_rate": 7.440571120549309e-08, "logits/chosen": -2.1983349323272705, "logits/rejected": -2.245516300201416, "logps/chosen": -1.6940808296203613, "logps/rejected": -14.969877243041992, "loss": 0.7105, "rewards/accuracies": 0.0, "rewards/chosen": 0.6493611931800842, "rewards/margins": -0.1862185001373291, "rewards/rejected": 0.8355796933174133, "step": 424 }, { "epoch": 1.08, "learning_rate": 7.428630252660703e-08, "logits/chosen": -2.1944820880889893, "logits/rejected": -2.194586753845215, "logps/chosen": -0.9716635942459106, "logps/rejected": -8.299695014953613, "loss": 0.7751, "rewards/accuracies": 0.0, "rewards/chosen": 0.6911590695381165, "rewards/margins": -0.26962900161743164, "rewards/rejected": 0.9607880711555481, "step": 425 }, { "epoch": 1.08, "learning_rate": 7.416671228745181e-08, "logits/chosen": -2.184051275253296, "logits/rejected": -2.180347204208374, "logps/chosen": -3.0353171825408936, "logps/rejected": -4.377131938934326, "loss": 0.6521, "rewards/accuracies": 1.0, "rewards/chosen": 0.7848613858222961, "rewards/margins": 0.29651960730552673, "rewards/rejected": 0.4883417785167694, "step": 426 }, { "epoch": 1.08, "learning_rate": 7.404694138206365e-08, "logits/chosen": -2.1579971313476562, "logits/rejected": -2.156703472137451, "logps/chosen": -0.35063108801841736, "logps/rejected": -6.386979103088379, "loss": 0.7686, "rewards/accuracies": 0.0, "rewards/chosen": 0.6635522246360779, "rewards/margins": -0.20755267143249512, "rewards/rejected": 0.871104896068573, "step": 427 }, { "epoch": 1.08, "learning_rate": 7.39269907058295e-08, "logits/chosen": -2.2227797508239746, "logits/rejected": -2.224160671234131, "logps/chosen": -0.9675483703613281, "logps/rejected": -5.769791603088379, "loss": 0.7595, "rewards/accuracies": 0.0, "rewards/chosen": 0.7294043898582458, "rewards/margins": -0.1896459460258484, "rewards/rejected": 0.9190503358840942, "step": 428 }, { "epoch": 1.09, "learning_rate": 7.380686115548022e-08, "logits/chosen": -2.358299970626831, "logits/rejected": -2.4231507778167725, "logps/chosen": -2.6138248443603516, "logps/rejected": -26.852420806884766, "loss": 0.599, "rewards/accuracies": 1.0, "rewards/chosen": 0.6710444688796997, "rewards/margins": 0.3285260498523712, "rewards/rejected": 0.3425184190273285, "step": 429 }, { "epoch": 1.09, "learning_rate": 7.368655362908393e-08, "logits/chosen": -2.34767484664917, "logits/rejected": -2.3478028774261475, "logps/chosen": -3.214966297149658, "logps/rejected": -10.294122695922852, "loss": 0.8088, "rewards/accuracies": 0.0, "rewards/chosen": 0.6480867266654968, "rewards/margins": -0.2041914463043213, "rewards/rejected": 0.8522781729698181, "step": 430 }, { "epoch": 1.09, "learning_rate": 7.356606902603923e-08, "logits/chosen": -2.2733685970306396, "logits/rejected": -2.268369674682617, "logps/chosen": -2.382572889328003, "logps/rejected": -4.689136028289795, "loss": 0.7477, "rewards/accuracies": 0.0, "rewards/chosen": 0.6192180514335632, "rewards/margins": -0.38415783643722534, "rewards/rejected": 1.0033758878707886, "step": 431 }, { "epoch": 1.09, "learning_rate": 7.344540824706854e-08, "logits/chosen": -2.1794683933258057, "logits/rejected": -2.1762070655822754, "logps/chosen": -1.44678795337677, "logps/rejected": -9.245704650878906, "loss": 0.6006, "rewards/accuracies": 0.0, "rewards/chosen": 0.6582663655281067, "rewards/margins": -0.19909924268722534, "rewards/rejected": 0.857365608215332, "step": 432 }, { "epoch": 1.1, "learning_rate": 7.332457219421131e-08, "logits/chosen": -2.1857259273529053, "logits/rejected": -2.198824167251587, "logps/chosen": -6.2409539222717285, "logps/rejected": -2.767082452774048, "loss": 0.5269, "rewards/accuracies": 1.0, "rewards/chosen": 1.0129231214523315, "rewards/margins": 0.414367139339447, "rewards/rejected": 0.5985559821128845, "step": 433 }, { "epoch": 1.1, "learning_rate": 7.320356177081735e-08, "logits/chosen": -2.271841526031494, "logits/rejected": -2.2641708850860596, "logps/chosen": -6.559440612792969, "logps/rejected": -9.985321044921875, "loss": 0.6433, "rewards/accuracies": 1.0, "rewards/chosen": 0.7649713754653931, "rewards/margins": 0.31638747453689575, "rewards/rejected": 0.4485839009284973, "step": 434 }, { "epoch": 1.1, "learning_rate": 7.308237788154001e-08, "logits/chosen": -2.250264883041382, "logits/rejected": -2.2430176734924316, "logps/chosen": -12.374456405639648, "logps/rejected": -3.885540723800659, "loss": 0.6285, "rewards/accuracies": 1.0, "rewards/chosen": 0.8200832605361938, "rewards/margins": 0.21598732471466064, "rewards/rejected": 0.6040959358215332, "step": 435 }, { "epoch": 1.1, "learning_rate": 7.296102143232948e-08, "logits/chosen": -2.2271995544433594, "logits/rejected": -2.2342915534973145, "logps/chosen": -1.496322512626648, "logps/rejected": -4.944387912750244, "loss": 0.6052, "rewards/accuracies": 1.0, "rewards/chosen": 0.8538488745689392, "rewards/margins": 0.3728730082511902, "rewards/rejected": 0.480975866317749, "step": 436 }, { "epoch": 1.11, "learning_rate": 7.283949333042585e-08, "logits/chosen": -2.138822317123413, "logits/rejected": -2.1396985054016113, "logps/chosen": -2.170694351196289, "logps/rejected": -11.440605163574219, "loss": 0.5709, "rewards/accuracies": 1.0, "rewards/chosen": 0.710054874420166, "rewards/margins": 0.11639642715454102, "rewards/rejected": 0.593658447265625, "step": 437 }, { "epoch": 1.11, "learning_rate": 7.271779448435264e-08, "logits/chosen": -2.1575968265533447, "logits/rejected": -2.1759068965911865, "logps/chosen": -2.566617965698242, "logps/rejected": -5.721077919006348, "loss": 0.7509, "rewards/accuracies": 1.0, "rewards/chosen": 0.8752450942993164, "rewards/margins": 0.2942301630973816, "rewards/rejected": 0.5810149312019348, "step": 438 }, { "epoch": 1.11, "learning_rate": 7.259592580390972e-08, "logits/chosen": -2.20961594581604, "logits/rejected": -2.22550106048584, "logps/chosen": -3.0505592823028564, "logps/rejected": -7.807707786560059, "loss": 0.675, "rewards/accuracies": 1.0, "rewards/chosen": 0.9743784070014954, "rewards/margins": 0.5134822130203247, "rewards/rejected": 0.46089622378349304, "step": 439 }, { "epoch": 1.11, "learning_rate": 7.247388820016661e-08, "logits/chosen": -2.1461308002471924, "logits/rejected": -2.139718770980835, "logps/chosen": -1.4943029880523682, "logps/rejected": -3.4815833568573, "loss": 0.612, "rewards/accuracies": 0.0, "rewards/chosen": 0.6355882883071899, "rewards/margins": -0.12573575973510742, "rewards/rejected": 0.7613240480422974, "step": 440 }, { "epoch": 1.12, "learning_rate": 7.235168258545568e-08, "logits/chosen": -2.243900775909424, "logits/rejected": -2.233948230743408, "logps/chosen": -1.7057651281356812, "logps/rejected": -5.721144676208496, "loss": 0.6339, "rewards/accuracies": 0.0, "rewards/chosen": 0.6885883808135986, "rewards/margins": -0.3222137689590454, "rewards/rejected": 1.010802149772644, "step": 441 }, { "epoch": 1.12, "learning_rate": 7.222930987336536e-08, "logits/chosen": -2.367863416671753, "logits/rejected": -2.3702104091644287, "logps/chosen": -3.6396219730377197, "logps/rejected": -9.670515060424805, "loss": 0.7505, "rewards/accuracies": 0.0, "rewards/chosen": 0.5413562655448914, "rewards/margins": -0.2557258605957031, "rewards/rejected": 0.7970821261405945, "step": 442 }, { "epoch": 1.12, "learning_rate": 7.210677097873323e-08, "logits/chosen": -2.2917873859405518, "logits/rejected": -2.3035826683044434, "logps/chosen": -5.9895830154418945, "logps/rejected": -3.589228868484497, "loss": 0.6101, "rewards/accuracies": 0.0, "rewards/chosen": 0.6731056571006775, "rewards/margins": -0.10138803720474243, "rewards/rejected": 0.7744936943054199, "step": 443 }, { "epoch": 1.12, "learning_rate": 7.198406681763924e-08, "logits/chosen": -2.275235176086426, "logits/rejected": -2.2676734924316406, "logps/chosen": -3.0497171878814697, "logps/rejected": -9.129111289978027, "loss": 0.7238, "rewards/accuracies": 0.0, "rewards/chosen": 0.6216263175010681, "rewards/margins": -0.17046499252319336, "rewards/rejected": 0.7920913100242615, "step": 444 }, { "epoch": 1.13, "learning_rate": 7.186119830739882e-08, "logits/chosen": -2.3576245307922363, "logits/rejected": -2.35186505317688, "logps/chosen": -1.4388766288757324, "logps/rejected": -4.422859191894531, "loss": 0.7205, "rewards/accuracies": 1.0, "rewards/chosen": 0.855704128742218, "rewards/margins": 0.37633848190307617, "rewards/rejected": 0.47936564683914185, "step": 445 }, { "epoch": 1.13, "learning_rate": 7.17381663665561e-08, "logits/chosen": -2.1764440536499023, "logits/rejected": -2.1685593128204346, "logps/chosen": -2.0368432998657227, "logps/rejected": -5.0534563064575195, "loss": 0.7595, "rewards/accuracies": 0.0, "rewards/chosen": 0.5956791639328003, "rewards/margins": -0.2097707986831665, "rewards/rejected": 0.8054499626159668, "step": 446 }, { "epoch": 1.13, "learning_rate": 7.161497191487692e-08, "logits/chosen": -2.2676713466644287, "logits/rejected": -2.306861162185669, "logps/chosen": -0.9920608997344971, "logps/rejected": -8.222949028015137, "loss": 0.6125, "rewards/accuracies": 1.0, "rewards/chosen": 0.7753880620002747, "rewards/margins": 0.17062777280807495, "rewards/rejected": 0.6047602891921997, "step": 447 }, { "epoch": 1.13, "learning_rate": 7.149161587334208e-08, "logits/chosen": -2.201514482498169, "logits/rejected": -2.208895444869995, "logps/chosen": -5.891104698181152, "logps/rejected": -4.252450942993164, "loss": 0.447, "rewards/accuracies": 1.0, "rewards/chosen": 0.9596306681632996, "rewards/margins": 0.37029409408569336, "rewards/rejected": 0.5893365740776062, "step": 448 }, { "epoch": 1.14, "learning_rate": 7.136809916414038e-08, "logits/chosen": -2.1650452613830566, "logits/rejected": -2.1592350006103516, "logps/chosen": -2.313579559326172, "logps/rejected": -4.840192794799805, "loss": 0.7163, "rewards/accuracies": 0.0, "rewards/chosen": 0.6752592921257019, "rewards/margins": -0.3262602686882019, "rewards/rejected": 1.0015195608139038, "step": 449 }, { "epoch": 1.14, "learning_rate": 7.124442271066174e-08, "logits/chosen": -2.332003593444824, "logits/rejected": -2.3277201652526855, "logps/chosen": -2.238095283508301, "logps/rejected": -7.701940536499023, "loss": 0.7126, "rewards/accuracies": 0.0, "rewards/chosen": 0.6335790753364563, "rewards/margins": -0.14909541606903076, "rewards/rejected": 0.7826744914054871, "step": 450 }, { "epoch": 1.14, "learning_rate": 7.112058743749027e-08, "logits/chosen": -2.2369778156280518, "logits/rejected": -2.2256503105163574, "logps/chosen": -1.141296625137329, "logps/rejected": -7.863115310668945, "loss": 0.6796, "rewards/accuracies": 0.0, "rewards/chosen": 0.7327295541763306, "rewards/margins": -0.31033968925476074, "rewards/rejected": 1.0430692434310913, "step": 451 }, { "epoch": 1.14, "learning_rate": 7.099659427039747e-08, "logits/chosen": -2.2257139682769775, "logits/rejected": -2.223325729370117, "logps/chosen": -0.9186335802078247, "logps/rejected": -7.983006477355957, "loss": 0.6927, "rewards/accuracies": 0.0, "rewards/chosen": 0.6325750350952148, "rewards/margins": -0.1723790168762207, "rewards/rejected": 0.8049540519714355, "step": 452 }, { "epoch": 1.15, "learning_rate": 7.087244413633515e-08, "logits/chosen": -2.2293612957000732, "logits/rejected": -2.2386574745178223, "logps/chosen": -3.1057326793670654, "logps/rejected": -3.5127179622650146, "loss": 0.6118, "rewards/accuracies": 0.0, "rewards/chosen": 0.7457091212272644, "rewards/margins": -0.20879346132278442, "rewards/rejected": 0.9545025825500488, "step": 453 }, { "epoch": 1.15, "learning_rate": 7.074813796342861e-08, "logits/chosen": -2.1868815422058105, "logits/rejected": -2.1859123706817627, "logps/chosen": -1.590223789215088, "logps/rejected": -6.386327266693115, "loss": 0.6466, "rewards/accuracies": 0.0, "rewards/chosen": 0.6437000036239624, "rewards/margins": -0.27432942390441895, "rewards/rejected": 0.9180294275283813, "step": 454 }, { "epoch": 1.15, "learning_rate": 7.062367668096966e-08, "logits/chosen": -2.0822362899780273, "logits/rejected": -2.0749149322509766, "logps/chosen": -2.4143850803375244, "logps/rejected": -8.60872745513916, "loss": 0.6228, "rewards/accuracies": 0.0, "rewards/chosen": 0.7360954284667969, "rewards/margins": -0.023930072784423828, "rewards/rejected": 0.7600255012512207, "step": 455 }, { "epoch": 1.15, "learning_rate": 7.049906121940972e-08, "logits/chosen": -2.2418124675750732, "logits/rejected": -2.2444891929626465, "logps/chosen": -2.3591363430023193, "logps/rejected": -7.701749801635742, "loss": 0.6612, "rewards/accuracies": 0.0, "rewards/chosen": 0.7784228324890137, "rewards/margins": -0.04603683948516846, "rewards/rejected": 0.8244596719741821, "step": 456 }, { "epoch": 1.16, "learning_rate": 7.037429251035279e-08, "logits/chosen": -2.1285767555236816, "logits/rejected": -2.1163244247436523, "logps/chosen": -1.2581613063812256, "logps/rejected": -9.608253479003906, "loss": 0.7163, "rewards/accuracies": 0.0, "rewards/chosen": 0.6829932332038879, "rewards/margins": -0.31970030069351196, "rewards/rejected": 1.0026935338974, "step": 457 }, { "epoch": 1.16, "learning_rate": 7.02493714865485e-08, "logits/chosen": -2.2051239013671875, "logits/rejected": -2.2431745529174805, "logps/chosen": -1.782170057296753, "logps/rejected": -5.237239360809326, "loss": 0.5798, "rewards/accuracies": 1.0, "rewards/chosen": 0.8560852408409119, "rewards/margins": 0.2256835699081421, "rewards/rejected": 0.6304016709327698, "step": 458 }, { "epoch": 1.16, "learning_rate": 7.012429908188522e-08, "logits/chosen": -2.2587897777557373, "logits/rejected": -2.2545382976531982, "logps/chosen": -3.710650682449341, "logps/rejected": -3.2064208984375, "loss": 0.6748, "rewards/accuracies": 0.0, "rewards/chosen": 0.7712178230285645, "rewards/margins": -0.1981469988822937, "rewards/rejected": 0.9693648219108582, "step": 459 }, { "epoch": 1.16, "learning_rate": 6.999907623138295e-08, "logits/chosen": -2.2870168685913086, "logits/rejected": -2.287191867828369, "logps/chosen": -2.8269665241241455, "logps/rejected": -6.169318675994873, "loss": 0.6526, "rewards/accuracies": 0.0, "rewards/chosen": 0.5605472922325134, "rewards/margins": -0.1430261731147766, "rewards/rejected": 0.70357346534729, "step": 460 }, { "epoch": 1.17, "learning_rate": 6.987370387118648e-08, "logits/chosen": -2.2921905517578125, "logits/rejected": -2.296830654144287, "logps/chosen": -1.1838322877883911, "logps/rejected": -4.812824726104736, "loss": 0.5674, "rewards/accuracies": 1.0, "rewards/chosen": 0.8899008631706238, "rewards/margins": 0.4123433530330658, "rewards/rejected": 0.477557510137558, "step": 461 }, { "epoch": 1.17, "learning_rate": 6.974818293855821e-08, "logits/chosen": -2.2733805179595947, "logits/rejected": -2.2779541015625, "logps/chosen": -2.714081287384033, "logps/rejected": -4.20384407043457, "loss": 0.7835, "rewards/accuracies": 0.0, "rewards/chosen": 0.48702380061149597, "rewards/margins": -0.36705800890922546, "rewards/rejected": 0.8540818095207214, "step": 462 }, { "epoch": 1.17, "learning_rate": 6.962251437187135e-08, "logits/chosen": -2.1486589908599854, "logits/rejected": -2.138028144836426, "logps/chosen": -8.85605239868164, "logps/rejected": -4.795083999633789, "loss": 0.7001, "rewards/accuracies": 1.0, "rewards/chosen": 0.9822162985801697, "rewards/margins": 0.4032433032989502, "rewards/rejected": 0.5789729952812195, "step": 463 }, { "epoch": 1.17, "learning_rate": 6.94966991106027e-08, "logits/chosen": -2.2720706462860107, "logits/rejected": -2.2656145095825195, "logps/chosen": -1.2571114301681519, "logps/rejected": -2.8843438625335693, "loss": 0.8209, "rewards/accuracies": 0.0, "rewards/chosen": 0.6164069175720215, "rewards/margins": -0.42756664752960205, "rewards/rejected": 1.0439735651016235, "step": 464 }, { "epoch": 1.18, "learning_rate": 6.93707380953258e-08, "logits/chosen": -2.068445920944214, "logits/rejected": -2.0732502937316895, "logps/chosen": -1.702563762664795, "logps/rejected": -1.877366065979004, "loss": 0.6316, "rewards/accuracies": 1.0, "rewards/chosen": 0.8573562502861023, "rewards/margins": 0.20251357555389404, "rewards/rejected": 0.6548426747322083, "step": 465 }, { "epoch": 1.18, "learning_rate": 6.924463226770375e-08, "logits/chosen": -2.182884693145752, "logits/rejected": -2.1921942234039307, "logps/chosen": -2.721543073654175, "logps/rejected": -4.157277584075928, "loss": 0.7541, "rewards/accuracies": 1.0, "rewards/chosen": 0.8591267466545105, "rewards/margins": 0.3512732982635498, "rewards/rejected": 0.5078534483909607, "step": 466 }, { "epoch": 1.18, "learning_rate": 6.911838257048231e-08, "logits/chosen": -2.290792942047119, "logits/rejected": -2.2995898723602295, "logps/chosen": -1.944982886314392, "logps/rejected": -21.29033660888672, "loss": 0.8499, "rewards/accuracies": 1.0, "rewards/chosen": 0.6765357851982117, "rewards/margins": 0.012370824813842773, "rewards/rejected": 0.6641649603843689, "step": 467 }, { "epoch": 1.18, "learning_rate": 6.899198994748273e-08, "logits/chosen": -2.2439396381378174, "logits/rejected": -2.2367031574249268, "logps/chosen": -2.6317334175109863, "logps/rejected": -2.864553928375244, "loss": 0.7442, "rewards/accuracies": 0.0, "rewards/chosen": 0.8003193140029907, "rewards/margins": -0.18814831972122192, "rewards/rejected": 0.9884676337242126, "step": 468 }, { "epoch": 1.19, "learning_rate": 6.88654553435948e-08, "logits/chosen": -2.227318525314331, "logits/rejected": -2.228973865509033, "logps/chosen": -1.1721733808517456, "logps/rejected": -3.259420156478882, "loss": 0.6691, "rewards/accuracies": 1.0, "rewards/chosen": 0.7666167616844177, "rewards/margins": 0.25841039419174194, "rewards/rejected": 0.5082063674926758, "step": 469 }, { "epoch": 1.19, "learning_rate": 6.87387797047697e-08, "logits/chosen": -2.1819911003112793, "logits/rejected": -2.1794111728668213, "logps/chosen": -2.107039451599121, "logps/rejected": -2.823983669281006, "loss": 0.5717, "rewards/accuracies": 1.0, "rewards/chosen": 0.902751624584198, "rewards/margins": 0.30937427282333374, "rewards/rejected": 0.5933773517608643, "step": 470 }, { "epoch": 1.19, "learning_rate": 6.861196397801296e-08, "logits/chosen": -2.33176851272583, "logits/rejected": -2.3133394718170166, "logps/chosen": -8.345985412597656, "logps/rejected": -6.617383003234863, "loss": 0.5925, "rewards/accuracies": 0.0, "rewards/chosen": 0.741992175579071, "rewards/margins": -0.011717736721038818, "rewards/rejected": 0.7537099123001099, "step": 471 }, { "epoch": 1.19, "learning_rate": 6.84850091113774e-08, "logits/chosen": -2.1039340496063232, "logits/rejected": -2.1069369316101074, "logps/chosen": -1.2365727424621582, "logps/rejected": -4.110511302947998, "loss": 0.7953, "rewards/accuracies": 1.0, "rewards/chosen": 0.9132837653160095, "rewards/margins": 0.3903406858444214, "rewards/rejected": 0.5229430794715881, "step": 472 }, { "epoch": 1.2, "learning_rate": 6.835791605395605e-08, "logits/chosen": -2.2133889198303223, "logits/rejected": -2.223891496658325, "logps/chosen": -3.055039405822754, "logps/rejected": -1.8774410486221313, "loss": 0.6643, "rewards/accuracies": 1.0, "rewards/chosen": 0.9990748763084412, "rewards/margins": 0.281585693359375, "rewards/rejected": 0.7174891829490662, "step": 473 }, { "epoch": 1.2, "learning_rate": 6.823068575587495e-08, "logits/chosen": -2.279773235321045, "logits/rejected": -2.2708935737609863, "logps/chosen": -1.978010892868042, "logps/rejected": -6.115780830383301, "loss": 0.6727, "rewards/accuracies": 0.0, "rewards/chosen": 0.6355380415916443, "rewards/margins": -0.1131325364112854, "rewards/rejected": 0.7486705780029297, "step": 474 }, { "epoch": 1.2, "learning_rate": 6.810331916828622e-08, "logits/chosen": -2.1651620864868164, "logits/rejected": -2.1658546924591064, "logps/chosen": -9.100569725036621, "logps/rejected": -3.2179436683654785, "loss": 0.5605, "rewards/accuracies": 1.0, "rewards/chosen": 0.7420892119407654, "rewards/margins": 0.07072103023529053, "rewards/rejected": 0.6713681817054749, "step": 475 }, { "epoch": 1.21, "learning_rate": 6.79758172433608e-08, "logits/chosen": -2.2489960193634033, "logits/rejected": -2.240178108215332, "logps/chosen": -3.070923328399658, "logps/rejected": -2.445274591445923, "loss": 0.6658, "rewards/accuracies": 0.0, "rewards/chosen": 0.6083433628082275, "rewards/margins": -0.3074725866317749, "rewards/rejected": 0.9158159494400024, "step": 476 }, { "epoch": 1.21, "learning_rate": 6.784818093428143e-08, "logits/chosen": -2.2410190105438232, "logits/rejected": -2.2279601097106934, "logps/chosen": -1.2731900215148926, "logps/rejected": -4.185803413391113, "loss": 0.6087, "rewards/accuracies": 0.0, "rewards/chosen": 0.6483250260353088, "rewards/margins": -0.2197207808494568, "rewards/rejected": 0.8680458068847656, "step": 477 }, { "epoch": 1.21, "learning_rate": 6.772041119523544e-08, "logits/chosen": -2.2699503898620605, "logits/rejected": -2.2705800533294678, "logps/chosen": -2.19321608543396, "logps/rejected": -6.070474624633789, "loss": 0.5974, "rewards/accuracies": 1.0, "rewards/chosen": 0.8494449853897095, "rewards/margins": 0.46533337235450745, "rewards/rejected": 0.384111613035202, "step": 478 }, { "epoch": 1.21, "learning_rate": 6.759250898140767e-08, "logits/chosen": -2.3196144104003906, "logits/rejected": -2.4327776432037354, "logps/chosen": -3.4943196773529053, "logps/rejected": -18.51805877685547, "loss": 0.5596, "rewards/accuracies": 1.0, "rewards/chosen": 0.6866759657859802, "rewards/margins": 0.2811618149280548, "rewards/rejected": 0.4055141508579254, "step": 479 }, { "epoch": 1.22, "learning_rate": 6.746447524897333e-08, "logits/chosen": -2.2903316020965576, "logits/rejected": -2.2897164821624756, "logps/chosen": -2.4384899139404297, "logps/rejected": -3.3649652004241943, "loss": 0.7641, "rewards/accuracies": 0.0, "rewards/chosen": 0.6223568320274353, "rewards/margins": -0.15442615747451782, "rewards/rejected": 0.7767829895019531, "step": 480 }, { "epoch": 1.22, "learning_rate": 6.733631095509087e-08, "logits/chosen": -2.250373601913452, "logits/rejected": -2.253168821334839, "logps/chosen": -1.4615689516067505, "logps/rejected": -2.912261486053467, "loss": 0.6357, "rewards/accuracies": 1.0, "rewards/chosen": 0.8072097897529602, "rewards/margins": 0.2584798336029053, "rewards/rejected": 0.5487299561500549, "step": 481 }, { "epoch": 1.22, "learning_rate": 6.720801705789474e-08, "logits/chosen": -2.2923154830932617, "logits/rejected": -2.286020040512085, "logps/chosen": -1.5126930475234985, "logps/rejected": -2.536071300506592, "loss": 0.6251, "rewards/accuracies": 1.0, "rewards/chosen": 0.8841708302497864, "rewards/margins": 0.3732401132583618, "rewards/rejected": 0.5109307169914246, "step": 482 }, { "epoch": 1.22, "learning_rate": 6.707959451648829e-08, "logits/chosen": -2.2699928283691406, "logits/rejected": -2.2710556983947754, "logps/chosen": -3.1620025634765625, "logps/rejected": -5.180259704589844, "loss": 0.5761, "rewards/accuracies": 0.0, "rewards/chosen": 0.6431179046630859, "rewards/margins": -0.11963814496994019, "rewards/rejected": 0.7627560496330261, "step": 483 }, { "epoch": 1.23, "learning_rate": 6.695104429093664e-08, "logits/chosen": -2.255892276763916, "logits/rejected": -2.2881641387939453, "logps/chosen": -5.77867317199707, "logps/rejected": -9.30422592163086, "loss": 0.6164, "rewards/accuracies": 1.0, "rewards/chosen": 1.0988839864730835, "rewards/margins": 0.563220739364624, "rewards/rejected": 0.5356632471084595, "step": 484 }, { "epoch": 1.23, "learning_rate": 6.682236734225943e-08, "logits/chosen": -2.262896776199341, "logits/rejected": -2.27290940284729, "logps/chosen": -2.56331467628479, "logps/rejected": -5.466264247894287, "loss": 0.6114, "rewards/accuracies": 1.0, "rewards/chosen": 0.7410721778869629, "rewards/margins": 0.33245548605918884, "rewards/rejected": 0.40861669182777405, "step": 485 }, { "epoch": 1.23, "learning_rate": 6.669356463242362e-08, "logits/chosen": -2.282442569732666, "logits/rejected": -2.330777883529663, "logps/chosen": -1.0346131324768066, "logps/rejected": -6.3295159339904785, "loss": 0.6721, "rewards/accuracies": 1.0, "rewards/chosen": 0.8337416052818298, "rewards/margins": 0.19218558073043823, "rewards/rejected": 0.6415560245513916, "step": 486 }, { "epoch": 1.23, "learning_rate": 6.656463712433638e-08, "logits/chosen": -2.121232032775879, "logits/rejected": -2.109668016433716, "logps/chosen": -1.3045612573623657, "logps/rejected": -5.784634590148926, "loss": 0.6778, "rewards/accuracies": 0.0, "rewards/chosen": 0.6904523372650146, "rewards/margins": -0.33670246601104736, "rewards/rejected": 1.027154803276062, "step": 487 }, { "epoch": 1.24, "learning_rate": 6.643558578183787e-08, "logits/chosen": -2.2345170974731445, "logits/rejected": -2.2231531143188477, "logps/chosen": -0.9089756608009338, "logps/rejected": -11.854474067687988, "loss": 0.8434, "rewards/accuracies": 0.0, "rewards/chosen": 0.6509145498275757, "rewards/margins": -0.13141357898712158, "rewards/rejected": 0.7823281288146973, "step": 488 }, { "epoch": 1.24, "learning_rate": 6.630641156969396e-08, "logits/chosen": -2.237004518508911, "logits/rejected": -2.246035575866699, "logps/chosen": -6.070693016052246, "logps/rejected": -4.147693634033203, "loss": 0.6046, "rewards/accuracies": 1.0, "rewards/chosen": 0.9716607332229614, "rewards/margins": 0.40212583541870117, "rewards/rejected": 0.5695348978042603, "step": 489 }, { "epoch": 1.24, "learning_rate": 6.617711545358913e-08, "logits/chosen": -2.3423266410827637, "logits/rejected": -2.33760142326355, "logps/chosen": -1.6820833683013916, "logps/rejected": -1.8051559925079346, "loss": 0.6811, "rewards/accuracies": 0.0, "rewards/chosen": 0.6619157791137695, "rewards/margins": -0.2108495831489563, "rewards/rejected": 0.8727653622627258, "step": 490 }, { "epoch": 1.24, "learning_rate": 6.604769840011912e-08, "logits/chosen": -2.3593661785125732, "logits/rejected": -2.363175630569458, "logps/chosen": -4.428257942199707, "logps/rejected": -3.8629727363586426, "loss": 0.6426, "rewards/accuracies": 1.0, "rewards/chosen": 0.7186751365661621, "rewards/margins": 0.018140733242034912, "rewards/rejected": 0.7005344033241272, "step": 491 }, { "epoch": 1.25, "learning_rate": 6.591816137678388e-08, "logits/chosen": -2.3431499004364014, "logits/rejected": -2.336465835571289, "logps/chosen": -2.37893009185791, "logps/rejected": -5.6049017906188965, "loss": 0.6825, "rewards/accuracies": 1.0, "rewards/chosen": 0.7320478558540344, "rewards/margins": 0.035688817501068115, "rewards/rejected": 0.6963590383529663, "step": 492 }, { "epoch": 1.25, "learning_rate": 6.578850535198013e-08, "logits/chosen": -2.172792911529541, "logits/rejected": -2.1698222160339355, "logps/chosen": -1.0043011903762817, "logps/rejected": -8.975671768188477, "loss": 0.6894, "rewards/accuracies": 0.0, "rewards/chosen": 0.715568482875824, "rewards/margins": -0.07596653699874878, "rewards/rejected": 0.7915350198745728, "step": 493 }, { "epoch": 1.25, "learning_rate": 6.565873129499431e-08, "logits/chosen": -2.3004422187805176, "logits/rejected": -2.3077213764190674, "logps/chosen": -1.6833996772766113, "logps/rejected": -3.7081422805786133, "loss": 0.6772, "rewards/accuracies": 1.0, "rewards/chosen": 0.8733673095703125, "rewards/margins": 0.3714304566383362, "rewards/rejected": 0.5019368529319763, "step": 494 }, { "epoch": 1.25, "learning_rate": 6.552884017599516e-08, "logits/chosen": -2.208627939224243, "logits/rejected": -2.2208423614501953, "logps/chosen": -3.0791640281677246, "logps/rejected": -3.3095486164093018, "loss": 0.6408, "rewards/accuracies": 1.0, "rewards/chosen": 0.8230960965156555, "rewards/margins": 0.2946218252182007, "rewards/rejected": 0.5284742712974548, "step": 495 }, { "epoch": 1.26, "learning_rate": 6.539883296602663e-08, "logits/chosen": -2.1497437953948975, "logits/rejected": -2.144315719604492, "logps/chosen": -1.4540774822235107, "logps/rejected": -2.861867666244507, "loss": 0.6786, "rewards/accuracies": 0.0, "rewards/chosen": 0.7459021806716919, "rewards/margins": -0.1943507194519043, "rewards/rejected": 0.9402529001235962, "step": 496 }, { "epoch": 1.26, "learning_rate": 6.526871063700055e-08, "logits/chosen": -2.257270336151123, "logits/rejected": -2.2566099166870117, "logps/chosen": -3.624889373779297, "logps/rejected": -5.531881332397461, "loss": 0.7982, "rewards/accuracies": 0.0, "rewards/chosen": 0.6054798364639282, "rewards/margins": -0.16561812162399292, "rewards/rejected": 0.7710979580879211, "step": 497 }, { "epoch": 1.26, "learning_rate": 6.513847416168928e-08, "logits/chosen": -2.3112752437591553, "logits/rejected": -2.3106985092163086, "logps/chosen": -1.1799514293670654, "logps/rejected": -11.885190963745117, "loss": 0.7334, "rewards/accuracies": 1.0, "rewards/chosen": 0.6694757342338562, "rewards/margins": 0.09215962886810303, "rewards/rejected": 0.5773161053657532, "step": 498 }, { "epoch": 1.26, "learning_rate": 6.500812451371861e-08, "logits/chosen": -2.314762830734253, "logits/rejected": -2.315852642059326, "logps/chosen": -0.6326242685317993, "logps/rejected": -9.72809886932373, "loss": 0.8192, "rewards/accuracies": 0.0, "rewards/chosen": 0.649986743927002, "rewards/margins": -0.3514336347579956, "rewards/rejected": 1.0014203786849976, "step": 499 }, { "epoch": 1.27, "learning_rate": 6.487766266756032e-08, "logits/chosen": -2.226947784423828, "logits/rejected": -2.2213551998138428, "logps/chosen": -2.012589931488037, "logps/rejected": -3.9623937606811523, "loss": 0.5736, "rewards/accuracies": 0.0, "rewards/chosen": 0.6663902997970581, "rewards/margins": -0.16997486352920532, "rewards/rejected": 0.8363651633262634, "step": 500 }, { "epoch": 1.27, "learning_rate": 6.474708959852503e-08, "logits/chosen": -2.3105974197387695, "logits/rejected": -2.315295696258545, "logps/chosen": -1.7462493181228638, "logps/rejected": -4.390326023101807, "loss": 0.5739, "rewards/accuracies": 1.0, "rewards/chosen": 0.7423882484436035, "rewards/margins": 0.3494809567928314, "rewards/rejected": 0.3929072916507721, "step": 501 }, { "epoch": 1.27, "learning_rate": 6.461640628275478e-08, "logits/chosen": -2.178964614868164, "logits/rejected": -2.185434579849243, "logps/chosen": -1.508894920349121, "logps/rejected": -4.857551574707031, "loss": 0.676, "rewards/accuracies": 1.0, "rewards/chosen": 0.8771344423294067, "rewards/margins": 0.37474310398101807, "rewards/rejected": 0.5023913383483887, "step": 502 }, { "epoch": 1.27, "learning_rate": 6.448561369721582e-08, "logits/chosen": -2.176419734954834, "logits/rejected": -2.1817588806152344, "logps/chosen": -1.7254085540771484, "logps/rejected": -3.3918874263763428, "loss": 0.5537, "rewards/accuracies": 1.0, "rewards/chosen": 0.9366044998168945, "rewards/margins": 0.4111253619194031, "rewards/rejected": 0.5254791378974915, "step": 503 }, { "epoch": 1.28, "learning_rate": 6.435471281969132e-08, "logits/chosen": -2.1860718727111816, "logits/rejected": -2.18168044090271, "logps/chosen": -2.6175222396850586, "logps/rejected": -4.941519737243652, "loss": 0.7924, "rewards/accuracies": 1.0, "rewards/chosen": 0.6774362921714783, "rewards/margins": 0.005999326705932617, "rewards/rejected": 0.6714369654655457, "step": 504 }, { "epoch": 1.28, "learning_rate": 6.422370462877396e-08, "logits/chosen": -2.2589309215545654, "logits/rejected": -2.28840970993042, "logps/chosen": -21.562957763671875, "logps/rejected": -15.844413757324219, "loss": 0.6242, "rewards/accuracies": 1.0, "rewards/chosen": 0.6853412985801697, "rewards/margins": 0.04984283447265625, "rewards/rejected": 0.6354984641075134, "step": 505 }, { "epoch": 1.28, "learning_rate": 6.40925901038587e-08, "logits/chosen": -2.216935396194458, "logits/rejected": -2.207611322402954, "logps/chosen": -0.8825362920761108, "logps/rejected": -6.8505988121032715, "loss": 0.8395, "rewards/accuracies": 0.0, "rewards/chosen": 0.5820043683052063, "rewards/margins": -0.38035809993743896, "rewards/rejected": 0.9623624682426453, "step": 506 }, { "epoch": 1.28, "learning_rate": 6.396137022513545e-08, "logits/chosen": -2.196991205215454, "logits/rejected": -2.1988561153411865, "logps/chosen": -6.711649417877197, "logps/rejected": -3.022738456726074, "loss": 0.6475, "rewards/accuracies": 1.0, "rewards/chosen": 0.7763642072677612, "rewards/margins": 0.34826943278312683, "rewards/rejected": 0.4280947744846344, "step": 507 }, { "epoch": 1.29, "learning_rate": 6.383004597358172e-08, "logits/chosen": -2.1855850219726562, "logits/rejected": -2.173264980316162, "logps/chosen": -2.555391311645508, "logps/rejected": -8.276449203491211, "loss": 0.7131, "rewards/accuracies": 0.0, "rewards/chosen": 0.6614101529121399, "rewards/margins": -0.1265503168106079, "rewards/rejected": 0.7879604697227478, "step": 508 }, { "epoch": 1.29, "learning_rate": 6.36986183309553e-08, "logits/chosen": -2.288700819015503, "logits/rejected": -2.2853429317474365, "logps/chosen": -2.2654757499694824, "logps/rejected": -3.245319366455078, "loss": 0.68, "rewards/accuracies": 0.0, "rewards/chosen": 0.5657629370689392, "rewards/margins": -0.1270042061805725, "rewards/rejected": 0.6927671432495117, "step": 509 }, { "epoch": 1.29, "learning_rate": 6.356708827978687e-08, "logits/chosen": -2.1997992992401123, "logits/rejected": -2.2058329582214355, "logps/chosen": -3.6104016304016113, "logps/rejected": -5.418994903564453, "loss": 0.5918, "rewards/accuracies": 1.0, "rewards/chosen": 0.9011756777763367, "rewards/margins": 0.2572307586669922, "rewards/rejected": 0.6439449191093445, "step": 510 }, { "epoch": 1.29, "learning_rate": 6.343545680337278e-08, "logits/chosen": -2.1937005519866943, "logits/rejected": -2.1820340156555176, "logps/chosen": -1.473406434059143, "logps/rejected": -3.0309300422668457, "loss": 0.6863, "rewards/accuracies": 0.0, "rewards/chosen": 0.6707512736320496, "rewards/margins": -0.09795844554901123, "rewards/rejected": 0.7687097191810608, "step": 511 }, { "epoch": 1.3, "learning_rate": 6.330372488576753e-08, "logits/chosen": -2.2663493156433105, "logits/rejected": -2.2719104290008545, "logps/chosen": -4.014585018157959, "logps/rejected": -9.641522407531738, "loss": 0.5699, "rewards/accuracies": 1.0, "rewards/chosen": 0.7385818362236023, "rewards/margins": 0.025531470775604248, "rewards/rejected": 0.713050365447998, "step": 512 }, { "epoch": 1.3, "learning_rate": 6.317189351177657e-08, "logits/chosen": -2.2471776008605957, "logits/rejected": -2.2386670112609863, "logps/chosen": -3.003607988357544, "logps/rejected": -8.0536470413208, "loss": 0.648, "rewards/accuracies": 0.0, "rewards/chosen": 0.6244827508926392, "rewards/margins": -0.26410871744155884, "rewards/rejected": 0.888591468334198, "step": 513 }, { "epoch": 1.3, "learning_rate": 6.303996366694882e-08, "logits/chosen": -2.273747205734253, "logits/rejected": -2.264918327331543, "logps/chosen": -2.335582733154297, "logps/rejected": -3.061958074569702, "loss": 0.7875, "rewards/accuracies": 1.0, "rewards/chosen": 0.8715581297874451, "rewards/margins": 0.41796180605888367, "rewards/rejected": 0.4535963237285614, "step": 514 }, { "epoch": 1.3, "learning_rate": 6.290793633756939e-08, "logits/chosen": -2.2263777256011963, "logits/rejected": -2.20150089263916, "logps/chosen": -4.177455425262451, "logps/rejected": -7.948122978210449, "loss": 0.6362, "rewards/accuracies": 0.0, "rewards/chosen": 0.5803854465484619, "rewards/margins": -0.23012185096740723, "rewards/rejected": 0.8105072975158691, "step": 515 }, { "epoch": 1.31, "learning_rate": 6.277581251065215e-08, "logits/chosen": -2.2802140712738037, "logits/rejected": -2.280677556991577, "logps/chosen": -13.740386009216309, "logps/rejected": -4.151416778564453, "loss": 0.7119, "rewards/accuracies": 1.0, "rewards/chosen": 0.920005738735199, "rewards/margins": 0.1064881682395935, "rewards/rejected": 0.8135175704956055, "step": 516 }, { "epoch": 1.31, "learning_rate": 6.264359317393237e-08, "logits/chosen": -2.3391385078430176, "logits/rejected": -2.359748363494873, "logps/chosen": -3.3167688846588135, "logps/rejected": -4.64133882522583, "loss": 0.5819, "rewards/accuracies": 1.0, "rewards/chosen": 1.0090436935424805, "rewards/margins": 0.40653449296951294, "rewards/rejected": 0.6025092005729675, "step": 517 }, { "epoch": 1.31, "learning_rate": 6.251127931585932e-08, "logits/chosen": -2.2135374546051025, "logits/rejected": -2.213125228881836, "logps/chosen": -2.453941583633423, "logps/rejected": -7.351408958435059, "loss": 0.6072, "rewards/accuracies": 0.0, "rewards/chosen": 0.5746556520462036, "rewards/margins": -0.0717352032661438, "rewards/rejected": 0.6463908553123474, "step": 518 }, { "epoch": 1.31, "learning_rate": 6.237887192558893e-08, "logits/chosen": -2.2628486156463623, "logits/rejected": -2.259596586227417, "logps/chosen": -1.7179170846939087, "logps/rejected": -5.674816608428955, "loss": 0.599, "rewards/accuracies": 1.0, "rewards/chosen": 0.9326538443565369, "rewards/margins": 0.20722776651382446, "rewards/rejected": 0.7254260778427124, "step": 519 }, { "epoch": 1.32, "learning_rate": 6.224637199297632e-08, "logits/chosen": -2.248669147491455, "logits/rejected": -2.25468373298645, "logps/chosen": -2.04215145111084, "logps/rejected": -4.072841167449951, "loss": 0.5764, "rewards/accuracies": 1.0, "rewards/chosen": 0.9093918204307556, "rewards/margins": 0.37348705530166626, "rewards/rejected": 0.5359047651290894, "step": 520 }, { "epoch": 1.32, "learning_rate": 6.21137805085685e-08, "logits/chosen": -2.3099148273468018, "logits/rejected": -2.308332920074463, "logps/chosen": -2.6205813884735107, "logps/rejected": -4.652151107788086, "loss": 0.7403, "rewards/accuracies": 0.0, "rewards/chosen": 0.6092468500137329, "rewards/margins": -0.08757549524307251, "rewards/rejected": 0.6968223452568054, "step": 521 }, { "epoch": 1.32, "learning_rate": 6.198109846359682e-08, "logits/chosen": -2.093421697616577, "logits/rejected": -2.089931011199951, "logps/chosen": -3.248396396636963, "logps/rejected": -6.553476810455322, "loss": 0.7694, "rewards/accuracies": 0.0, "rewards/chosen": 0.524732768535614, "rewards/margins": -0.23275411128997803, "rewards/rejected": 0.757486879825592, "step": 522 }, { "epoch": 1.32, "learning_rate": 6.184832684996971e-08, "logits/chosen": -2.2908706665039062, "logits/rejected": -2.291823148727417, "logps/chosen": -2.158189535140991, "logps/rejected": -6.760686874389648, "loss": 0.7183, "rewards/accuracies": 1.0, "rewards/chosen": 0.8891485333442688, "rewards/margins": 0.1825200915336609, "rewards/rejected": 0.7066284418106079, "step": 523 }, { "epoch": 1.33, "learning_rate": 6.171546666026522e-08, "logits/chosen": -2.171873092651367, "logits/rejected": -2.1716580390930176, "logps/chosen": -7.0503339767456055, "logps/rejected": -1.7665461301803589, "loss": 0.616, "rewards/accuracies": 0.0, "rewards/chosen": 0.8159140944480896, "rewards/margins": -0.041160762310028076, "rewards/rejected": 0.8570748567581177, "step": 524 }, { "epoch": 1.33, "learning_rate": 6.158251888772349e-08, "logits/chosen": -2.1655213832855225, "logits/rejected": -2.170290231704712, "logps/chosen": -2.1828508377075195, "logps/rejected": -8.628729820251465, "loss": 0.8084, "rewards/accuracies": 0.0, "rewards/chosen": 0.7480493783950806, "rewards/margins": -0.14196306467056274, "rewards/rejected": 0.8900124430656433, "step": 525 }, { "epoch": 1.33, "learning_rate": 6.144948452623949e-08, "logits/chosen": -2.1916167736053467, "logits/rejected": -2.194492816925049, "logps/chosen": -8.080187797546387, "logps/rejected": -3.0827836990356445, "loss": 0.6724, "rewards/accuracies": 0.0, "rewards/chosen": 0.6341820955276489, "rewards/margins": -0.13308066129684448, "rewards/rejected": 0.7672627568244934, "step": 526 }, { "epoch": 1.33, "learning_rate": 6.13163645703555e-08, "logits/chosen": -2.29048490524292, "logits/rejected": -2.303396701812744, "logps/chosen": -2.5529944896698, "logps/rejected": -2.8961405754089355, "loss": 0.6736, "rewards/accuracies": 1.0, "rewards/chosen": 0.72004634141922, "rewards/margins": 0.1787458062171936, "rewards/rejected": 0.5413005352020264, "step": 527 }, { "epoch": 1.34, "learning_rate": 6.118316001525367e-08, "logits/chosen": -2.214914321899414, "logits/rejected": -2.3643958568573, "logps/chosen": -3.4905519485473633, "logps/rejected": -14.587916374206543, "loss": 0.5994, "rewards/accuracies": 0.0, "rewards/chosen": 0.6587538719177246, "rewards/margins": -0.040286481380462646, "rewards/rejected": 0.6990403532981873, "step": 528 }, { "epoch": 1.34, "learning_rate": 6.104987185674863e-08, "logits/chosen": -2.184711456298828, "logits/rejected": -2.1817235946655273, "logps/chosen": -2.9247145652770996, "logps/rejected": -10.072103500366211, "loss": 0.6396, "rewards/accuracies": 0.0, "rewards/chosen": 0.5203578472137451, "rewards/margins": -0.02809053659439087, "rewards/rejected": 0.548448383808136, "step": 529 }, { "epoch": 1.34, "learning_rate": 6.091650109127994e-08, "logits/chosen": -2.201249122619629, "logits/rejected": -2.190155029296875, "logps/chosen": -1.8232146501541138, "logps/rejected": -6.259138107299805, "loss": 0.6905, "rewards/accuracies": 0.0, "rewards/chosen": 0.6254016160964966, "rewards/margins": -0.1436644196510315, "rewards/rejected": 0.7690660357475281, "step": 530 }, { "epoch": 1.34, "learning_rate": 6.078304871590483e-08, "logits/chosen": -2.3189334869384766, "logits/rejected": -2.325549364089966, "logps/chosen": -2.042750358581543, "logps/rejected": -1.831380009651184, "loss": 0.5052, "rewards/accuracies": 1.0, "rewards/chosen": 0.9243583679199219, "rewards/margins": 0.3591632843017578, "rewards/rejected": 0.5651950836181641, "step": 531 }, { "epoch": 1.35, "learning_rate": 6.064951572829055e-08, "logits/chosen": -2.2384262084960938, "logits/rejected": -2.2406065464019775, "logps/chosen": -4.973108291625977, "logps/rejected": -4.150612831115723, "loss": 0.5352, "rewards/accuracies": 1.0, "rewards/chosen": 0.8236209154129028, "rewards/margins": 0.31872081756591797, "rewards/rejected": 0.5049000978469849, "step": 532 }, { "epoch": 1.35, "learning_rate": 6.051590312670703e-08, "logits/chosen": -2.2330760955810547, "logits/rejected": -2.218400478363037, "logps/chosen": -4.724791526794434, "logps/rejected": -5.486517906188965, "loss": 0.6938, "rewards/accuracies": 0.0, "rewards/chosen": 0.5218074917793274, "rewards/margins": -0.4154103398323059, "rewards/rejected": 0.9372178316116333, "step": 533 }, { "epoch": 1.35, "learning_rate": 6.038221191001934e-08, "logits/chosen": -2.224928617477417, "logits/rejected": -2.231135368347168, "logps/chosen": -2.3639955520629883, "logps/rejected": -4.191851615905762, "loss": 0.7387, "rewards/accuracies": 0.0, "rewards/chosen": 0.7303715944290161, "rewards/margins": -0.11586970090866089, "rewards/rejected": 0.846241295337677, "step": 534 }, { "epoch": 1.35, "learning_rate": 6.024844307768031e-08, "logits/chosen": -2.3400862216949463, "logits/rejected": -2.3337013721466064, "logps/chosen": -1.1010397672653198, "logps/rejected": -5.238539218902588, "loss": 0.6338, "rewards/accuracies": 0.0, "rewards/chosen": 0.6685128211975098, "rewards/margins": -0.28459614515304565, "rewards/rejected": 0.9531089663505554, "step": 535 }, { "epoch": 1.36, "learning_rate": 6.011459762972299e-08, "logits/chosen": -2.1993985176086426, "logits/rejected": -2.1900675296783447, "logps/chosen": -1.8319181203842163, "logps/rejected": -5.664493083953857, "loss": 0.6069, "rewards/accuracies": 0.0, "rewards/chosen": 0.7085617780685425, "rewards/margins": -0.14777714014053345, "rewards/rejected": 0.8563389182090759, "step": 536 }, { "epoch": 1.36, "learning_rate": 5.998067656675318e-08, "logits/chosen": -2.1167638301849365, "logits/rejected": -2.1290595531463623, "logps/chosen": -2.4597253799438477, "logps/rejected": -4.129587173461914, "loss": 0.6809, "rewards/accuracies": 1.0, "rewards/chosen": 0.871159017086029, "rewards/margins": 0.2616819739341736, "rewards/rejected": 0.6094770431518555, "step": 537 }, { "epoch": 1.36, "learning_rate": 5.984668088994198e-08, "logits/chosen": -2.3217971324920654, "logits/rejected": -2.3172051906585693, "logps/chosen": -1.5682337284088135, "logps/rejected": -3.9668047428131104, "loss": 0.6456, "rewards/accuracies": 0.0, "rewards/chosen": 0.724389374256134, "rewards/margins": -0.07785356044769287, "rewards/rejected": 0.8022429347038269, "step": 538 }, { "epoch": 1.36, "learning_rate": 5.971261160101832e-08, "logits/chosen": -2.252687931060791, "logits/rejected": -2.2408597469329834, "logps/chosen": -3.834409713745117, "logps/rejected": -6.763755798339844, "loss": 0.7737, "rewards/accuracies": 0.0, "rewards/chosen": 0.6428789496421814, "rewards/margins": -0.28442782163619995, "rewards/rejected": 0.9273067712783813, "step": 539 }, { "epoch": 1.37, "learning_rate": 5.957846970226139e-08, "logits/chosen": -2.22450852394104, "logits/rejected": -2.2258644104003906, "logps/chosen": -1.0327675342559814, "logps/rejected": -10.386926651000977, "loss": 0.7157, "rewards/accuracies": 0.0, "rewards/chosen": 0.6344941258430481, "rewards/margins": -0.32082605361938477, "rewards/rejected": 0.9553201794624329, "step": 540 }, { "epoch": 1.37, "learning_rate": 5.944425619649322e-08, "logits/chosen": -2.2762744426727295, "logits/rejected": -2.2909433841705322, "logps/chosen": -9.188576698303223, "logps/rejected": -3.0212044715881348, "loss": 0.5487, "rewards/accuracies": 1.0, "rewards/chosen": 1.0926486253738403, "rewards/margins": 0.5288285613059998, "rewards/rejected": 0.5638200640678406, "step": 541 }, { "epoch": 1.37, "learning_rate": 5.930997208707119e-08, "logits/chosen": -2.238095760345459, "logits/rejected": -2.250952959060669, "logps/chosen": -2.501750946044922, "logps/rejected": -6.609541893005371, "loss": 0.6127, "rewards/accuracies": 1.0, "rewards/chosen": 0.8053849339485168, "rewards/margins": 0.18059271574020386, "rewards/rejected": 0.624792218208313, "step": 542 }, { "epoch": 1.37, "learning_rate": 5.9175618377880453e-08, "logits/chosen": -2.168123960494995, "logits/rejected": -2.1830434799194336, "logps/chosen": -9.499953269958496, "logps/rejected": -3.0830609798431396, "loss": 0.6084, "rewards/accuracies": 1.0, "rewards/chosen": 1.0199579000473022, "rewards/margins": 0.3971008062362671, "rewards/rejected": 0.6228570938110352, "step": 543 }, { "epoch": 1.38, "learning_rate": 5.9041196073326506e-08, "logits/chosen": -2.3709840774536133, "logits/rejected": -2.3709867000579834, "logps/chosen": -1.120492696762085, "logps/rejected": -8.52432632446289, "loss": 0.6973, "rewards/accuracies": 0.0, "rewards/chosen": 0.758424699306488, "rewards/margins": -0.084658682346344, "rewards/rejected": 0.843083381652832, "step": 544 }, { "epoch": 1.38, "learning_rate": 5.890670617832764e-08, "logits/chosen": -2.2693612575531006, "logits/rejected": -2.2725305557250977, "logps/chosen": -0.8977566957473755, "logps/rejected": -5.177832126617432, "loss": 0.6317, "rewards/accuracies": 1.0, "rewards/chosen": 0.7775722742080688, "rewards/margins": 0.4341036379337311, "rewards/rejected": 0.34346863627433777, "step": 545 }, { "epoch": 1.38, "learning_rate": 5.877214969830745e-08, "logits/chosen": -2.330418348312378, "logits/rejected": -2.3299758434295654, "logps/chosen": -2.9012837409973145, "logps/rejected": -10.172378540039062, "loss": 0.6034, "rewards/accuracies": 0.0, "rewards/chosen": 0.521855890750885, "rewards/margins": -0.2709961533546448, "rewards/rejected": 0.7928520441055298, "step": 546 }, { "epoch": 1.38, "learning_rate": 5.863752763918731e-08, "logits/chosen": -2.2449731826782227, "logits/rejected": -2.240194797515869, "logps/chosen": -1.820672869682312, "logps/rejected": -5.284181118011475, "loss": 0.6312, "rewards/accuracies": 0.0, "rewards/chosen": 0.7524957060813904, "rewards/margins": -0.10631436109542847, "rewards/rejected": 0.8588100671768188, "step": 547 }, { "epoch": 1.39, "learning_rate": 5.8502841007378865e-08, "logits/chosen": -2.330442428588867, "logits/rejected": -2.317666530609131, "logps/chosen": -1.5774492025375366, "logps/rejected": -2.7766988277435303, "loss": 0.7465, "rewards/accuracies": 0.0, "rewards/chosen": 0.6221394538879395, "rewards/margins": -0.1272488832473755, "rewards/rejected": 0.7493883371353149, "step": 548 }, { "epoch": 1.39, "learning_rate": 5.836809080977644e-08, "logits/chosen": -2.2323665618896484, "logits/rejected": -2.2234435081481934, "logps/chosen": -1.5142762660980225, "logps/rejected": -6.686519622802734, "loss": 0.7859, "rewards/accuracies": 0.0, "rewards/chosen": 0.623046875, "rewards/margins": -0.4689139127731323, "rewards/rejected": 1.0919607877731323, "step": 549 }, { "epoch": 1.39, "learning_rate": 5.8233278053749646e-08, "logits/chosen": -2.2340481281280518, "logits/rejected": -2.249342203140259, "logps/chosen": -2.4310731887817383, "logps/rejected": -2.9488279819488525, "loss": 0.6605, "rewards/accuracies": 1.0, "rewards/chosen": 0.7746878862380981, "rewards/margins": 0.26135820150375366, "rewards/rejected": 0.5133296847343445, "step": 550 }, { "epoch": 1.39, "learning_rate": 5.80984037471357e-08, "logits/chosen": -2.243039608001709, "logits/rejected": -2.2405922412872314, "logps/chosen": -5.956752777099609, "logps/rejected": -1.886809229850769, "loss": 0.7408, "rewards/accuracies": 0.0, "rewards/chosen": 0.6533142924308777, "rewards/margins": -0.13774776458740234, "rewards/rejected": 0.79106205701828, "step": 551 }, { "epoch": 1.4, "learning_rate": 5.796346889823202e-08, "logits/chosen": -2.3819499015808105, "logits/rejected": -2.384887456893921, "logps/chosen": -2.4660000801086426, "logps/rejected": -2.7274699211120605, "loss": 0.5225, "rewards/accuracies": 1.0, "rewards/chosen": 0.964054524898529, "rewards/margins": 0.4453045129776001, "rewards/rejected": 0.518750011920929, "step": 552 }, { "epoch": 1.4, "learning_rate": 5.782847451578857e-08, "logits/chosen": -2.175015687942505, "logits/rejected": -2.1691434383392334, "logps/chosen": -0.5909135937690735, "logps/rejected": -7.762685298919678, "loss": 0.6803, "rewards/accuracies": 0.0, "rewards/chosen": 0.607002854347229, "rewards/margins": -0.5669323205947876, "rewards/rejected": 1.1739351749420166, "step": 553 }, { "epoch": 1.4, "learning_rate": 5.769342160900043e-08, "logits/chosen": -2.2497732639312744, "logits/rejected": -2.258159875869751, "logps/chosen": -3.770343780517578, "logps/rejected": -2.7848618030548096, "loss": 0.5574, "rewards/accuracies": 1.0, "rewards/chosen": 0.7061451077461243, "rewards/margins": 0.2620810568332672, "rewards/rejected": 0.44406405091285706, "step": 554 }, { "epoch": 1.41, "learning_rate": 5.7558311187500154e-08, "logits/chosen": -2.280803680419922, "logits/rejected": -2.287670135498047, "logps/chosen": -1.3211404085159302, "logps/rejected": -2.169008255004883, "loss": 0.7326, "rewards/accuracies": 1.0, "rewards/chosen": 0.9482070803642273, "rewards/margins": 0.36917924880981445, "rewards/rejected": 0.5790278315544128, "step": 555 }, { "epoch": 1.41, "learning_rate": 5.7423144261350286e-08, "logits/chosen": -2.2410058975219727, "logits/rejected": -2.2380013465881348, "logps/chosen": -0.49872246384620667, "logps/rejected": -7.84991455078125, "loss": 0.735, "rewards/accuracies": 0.0, "rewards/chosen": 0.6422314643859863, "rewards/margins": -0.21527093648910522, "rewards/rejected": 0.8575024008750916, "step": 556 }, { "epoch": 1.41, "learning_rate": 5.728792184103579e-08, "logits/chosen": -2.1829659938812256, "logits/rejected": -2.2408628463745117, "logps/chosen": -1.0209599733352661, "logps/rejected": -7.763177871704102, "loss": 0.579, "rewards/accuracies": 1.0, "rewards/chosen": 0.7806575894355774, "rewards/margins": 0.17953431606292725, "rewards/rejected": 0.6011232733726501, "step": 557 }, { "epoch": 1.41, "learning_rate": 5.7152644937456515e-08, "logits/chosen": -2.1732113361358643, "logits/rejected": -2.1756021976470947, "logps/chosen": -3.7013165950775146, "logps/rejected": -2.9242045879364014, "loss": 0.7033, "rewards/accuracies": 1.0, "rewards/chosen": 0.9212247729301453, "rewards/margins": 0.3749960660934448, "rewards/rejected": 0.5462287068367004, "step": 558 }, { "epoch": 1.42, "learning_rate": 5.7017314561919574e-08, "logits/chosen": -2.238787889480591, "logits/rejected": -2.235203504562378, "logps/chosen": -2.3931076526641846, "logps/rejected": -7.433586120605469, "loss": 0.6406, "rewards/accuracies": 0.0, "rewards/chosen": 0.6636382937431335, "rewards/margins": -0.46150368452072144, "rewards/rejected": 1.125141978263855, "step": 559 }, { "epoch": 1.42, "learning_rate": 5.6881931726131846e-08, "logits/chosen": -2.336242914199829, "logits/rejected": -2.334716558456421, "logps/chosen": -2.197573661804199, "logps/rejected": -5.144317626953125, "loss": 0.6338, "rewards/accuracies": 0.0, "rewards/chosen": 0.63991779088974, "rewards/margins": -0.162489116191864, "rewards/rejected": 0.802406907081604, "step": 560 }, { "epoch": 1.42, "learning_rate": 5.674649744219242e-08, "logits/chosen": -2.260596990585327, "logits/rejected": -2.2504384517669678, "logps/chosen": -1.9318071603775024, "logps/rejected": -11.540029525756836, "loss": 0.6055, "rewards/accuracies": 0.0, "rewards/chosen": 0.6127088069915771, "rewards/margins": -0.12181556224822998, "rewards/rejected": 0.7345243692398071, "step": 561 }, { "epoch": 1.42, "learning_rate": 5.6611012722584974e-08, "logits/chosen": -2.2629966735839844, "logits/rejected": -2.26458477973938, "logps/chosen": -2.7617335319519043, "logps/rejected": -5.494078159332275, "loss": 0.5186, "rewards/accuracies": 1.0, "rewards/chosen": 0.7926439046859741, "rewards/margins": 0.375247597694397, "rewards/rejected": 0.41739630699157715, "step": 562 }, { "epoch": 1.43, "learning_rate": 5.647547858017021e-08, "logits/chosen": -2.3184053897857666, "logits/rejected": -2.3140170574188232, "logps/chosen": -1.4490995407104492, "logps/rejected": -2.059527635574341, "loss": 0.6262, "rewards/accuracies": 0.0, "rewards/chosen": 0.7017422914505005, "rewards/margins": -0.2889217138290405, "rewards/rejected": 0.990664005279541, "step": 563 }, { "epoch": 1.43, "learning_rate": 5.633989602817837e-08, "logits/chosen": -2.244335651397705, "logits/rejected": -2.244739532470703, "logps/chosen": -2.3566455841064453, "logps/rejected": -7.839695930480957, "loss": 0.7467, "rewards/accuracies": 0.0, "rewards/chosen": 0.4862414300441742, "rewards/margins": -0.4490552842617035, "rewards/rejected": 0.9352967143058777, "step": 564 }, { "epoch": 1.43, "learning_rate": 5.6204266080201556e-08, "logits/chosen": -2.2959606647491455, "logits/rejected": -2.345130205154419, "logps/chosen": -2.474231243133545, "logps/rejected": -8.305815696716309, "loss": 0.6191, "rewards/accuracies": 1.0, "rewards/chosen": 0.8424085974693298, "rewards/margins": 0.32273149490356445, "rewards/rejected": 0.5196771025657654, "step": 565 }, { "epoch": 1.43, "learning_rate": 5.60685897501862e-08, "logits/chosen": -2.3359861373901367, "logits/rejected": -2.344546318054199, "logps/chosen": -12.943925857543945, "logps/rejected": -1.749173879623413, "loss": 0.556, "rewards/accuracies": 1.0, "rewards/chosen": 0.8610103726387024, "rewards/margins": 0.12189626693725586, "rewards/rejected": 0.7391141057014465, "step": 566 }, { "epoch": 1.44, "learning_rate": 5.593286805242549e-08, "logits/chosen": -2.320543050765991, "logits/rejected": -2.315741539001465, "logps/chosen": -3.822888135910034, "logps/rejected": -2.90328311920166, "loss": 0.7444, "rewards/accuracies": 1.0, "rewards/chosen": 0.7432751059532166, "rewards/margins": 0.21777880191802979, "rewards/rejected": 0.5254963040351868, "step": 567 }, { "epoch": 1.44, "learning_rate": 5.5797102001551744e-08, "logits/chosen": -2.2635529041290283, "logits/rejected": -2.2497899532318115, "logps/chosen": -1.1436370611190796, "logps/rejected": -5.34222412109375, "loss": 0.6387, "rewards/accuracies": 0.0, "rewards/chosen": 0.5989008545875549, "rewards/margins": -0.12641167640686035, "rewards/rejected": 0.7253125309944153, "step": 568 }, { "epoch": 1.44, "learning_rate": 5.5661292612528896e-08, "logits/chosen": -2.1106913089752197, "logits/rejected": -2.13019061088562, "logps/chosen": -1.6664373874664307, "logps/rejected": -6.814711570739746, "loss": 0.6098, "rewards/accuracies": 1.0, "rewards/chosen": 0.8665334582328796, "rewards/margins": 0.36274009943008423, "rewards/rejected": 0.5037933588027954, "step": 569 }, { "epoch": 1.44, "learning_rate": 5.552544090064487e-08, "logits/chosen": -2.1902718544006348, "logits/rejected": -2.2030551433563232, "logps/chosen": -0.790667712688446, "logps/rejected": -17.842708587646484, "loss": 0.6298, "rewards/accuracies": 1.0, "rewards/chosen": 0.7861666083335876, "rewards/margins": 0.016671955585479736, "rewards/rejected": 0.7694946527481079, "step": 570 }, { "epoch": 1.45, "learning_rate": 5.538954788150394e-08, "logits/chosen": -2.2384068965911865, "logits/rejected": -2.2466464042663574, "logps/chosen": -2.1807613372802734, "logps/rejected": -6.81248664855957, "loss": 0.6994, "rewards/accuracies": 1.0, "rewards/chosen": 0.7573047876358032, "rewards/margins": 0.296542763710022, "rewards/rejected": 0.46076202392578125, "step": 571 }, { "epoch": 1.45, "learning_rate": 5.5253614571019224e-08, "logits/chosen": -2.2396399974823, "logits/rejected": -2.265101671218872, "logps/chosen": -0.8314979672431946, "logps/rejected": -17.252544403076172, "loss": 0.6693, "rewards/accuracies": 0.0, "rewards/chosen": 0.7203125953674316, "rewards/margins": -0.03793537616729736, "rewards/rejected": 0.758247971534729, "step": 572 }, { "epoch": 1.45, "learning_rate": 5.511764198540505e-08, "logits/chosen": -2.282503128051758, "logits/rejected": -2.2721493244171143, "logps/chosen": -1.0902388095855713, "logps/rejected": -4.039065837860107, "loss": 0.6736, "rewards/accuracies": 0.0, "rewards/chosen": 0.6032319068908691, "rewards/margins": -0.11483663320541382, "rewards/rejected": 0.718068540096283, "step": 573 }, { "epoch": 1.45, "learning_rate": 5.498163114116935e-08, "logits/chosen": -2.2482473850250244, "logits/rejected": -2.253175973892212, "logps/chosen": -1.5153155326843262, "logps/rejected": -0.992299497127533, "loss": 0.5945, "rewards/accuracies": 1.0, "rewards/chosen": 0.8647821545600891, "rewards/margins": 0.23634952306747437, "rewards/rejected": 0.6284326314926147, "step": 574 }, { "epoch": 1.46, "learning_rate": 5.484558305510608e-08, "logits/chosen": -2.1656641960144043, "logits/rejected": -2.165128707885742, "logps/chosen": -3.1872456073760986, "logps/rejected": -2.926471710205078, "loss": 0.5489, "rewards/accuracies": 1.0, "rewards/chosen": 0.9361162185668945, "rewards/margins": 0.452314168214798, "rewards/rejected": 0.48380205035209656, "step": 575 }, { "epoch": 1.46, "learning_rate": 5.4709498744287596e-08, "logits/chosen": -2.2452902793884277, "logits/rejected": -2.2462854385375977, "logps/chosen": -1.4347599744796753, "logps/rejected": -6.703640937805176, "loss": 0.6277, "rewards/accuracies": 1.0, "rewards/chosen": 0.7811260223388672, "rewards/margins": 0.26223647594451904, "rewards/rejected": 0.5188895463943481, "step": 576 }, { "epoch": 1.46, "learning_rate": 5.457337922605708e-08, "logits/chosen": -2.278371572494507, "logits/rejected": -2.280014991760254, "logps/chosen": -2.818272590637207, "logps/rejected": -6.000096797943115, "loss": 0.5961, "rewards/accuracies": 0.0, "rewards/chosen": 0.7719919085502625, "rewards/margins": -0.14013826847076416, "rewards/rejected": 0.9121301770210266, "step": 577 }, { "epoch": 1.46, "learning_rate": 5.44372255180209e-08, "logits/chosen": -2.25089430809021, "logits/rejected": -2.300205945968628, "logps/chosen": -0.9958873391151428, "logps/rejected": -9.35323715209961, "loss": 0.6387, "rewards/accuracies": 1.0, "rewards/chosen": 0.8103393912315369, "rewards/margins": 0.4616853892803192, "rewards/rejected": 0.34865400195121765, "step": 578 }, { "epoch": 1.47, "learning_rate": 5.4301038638041064e-08, "logits/chosen": -2.260033369064331, "logits/rejected": -2.269066333770752, "logps/chosen": -1.9342745542526245, "logps/rejected": -5.442183017730713, "loss": 0.6785, "rewards/accuracies": 1.0, "rewards/chosen": 0.7668630480766296, "rewards/margins": 0.3388236463069916, "rewards/rejected": 0.42803940176963806, "step": 579 }, { "epoch": 1.47, "learning_rate": 5.416481960422747e-08, "logits/chosen": -2.20043683052063, "logits/rejected": -2.259963274002075, "logps/chosen": -2.9809961318969727, "logps/rejected": -8.61999225616455, "loss": 0.6995, "rewards/accuracies": 1.0, "rewards/chosen": 0.9748644232749939, "rewards/margins": 0.16988444328308105, "rewards/rejected": 0.8049799799919128, "step": 580 }, { "epoch": 1.47, "learning_rate": 5.4028569434930526e-08, "logits/chosen": -2.27370023727417, "logits/rejected": -2.3263485431671143, "logps/chosen": -1.0471731424331665, "logps/rejected": -7.05737829208374, "loss": 0.6078, "rewards/accuracies": 1.0, "rewards/chosen": 0.8791790008544922, "rewards/margins": 0.23825162649154663, "rewards/rejected": 0.6409273743629456, "step": 581 }, { "epoch": 1.47, "learning_rate": 5.389228914873333e-08, "logits/chosen": -2.1871228218078613, "logits/rejected": -2.2067151069641113, "logps/chosen": -1.4410825967788696, "logps/rejected": -9.767035484313965, "loss": 0.6265, "rewards/accuracies": 1.0, "rewards/chosen": 0.8198006749153137, "rewards/margins": 0.4992988705635071, "rewards/rejected": 0.32050180435180664, "step": 582 }, { "epoch": 1.48, "learning_rate": 5.375597976444409e-08, "logits/chosen": -2.350390911102295, "logits/rejected": -2.3398354053497314, "logps/chosen": -1.6381845474243164, "logps/rejected": -6.145118713378906, "loss": 0.6288, "rewards/accuracies": 1.0, "rewards/chosen": 0.6638308763504028, "rewards/margins": 0.04539930820465088, "rewards/rejected": 0.618431568145752, "step": 583 }, { "epoch": 1.48, "learning_rate": 5.361964230108862e-08, "logits/chosen": -2.2490148544311523, "logits/rejected": -2.254380464553833, "logps/chosen": -3.3422608375549316, "logps/rejected": -10.890284538269043, "loss": 0.7328, "rewards/accuracies": 0.0, "rewards/chosen": 0.7614522576332092, "rewards/margins": -0.12033361196517944, "rewards/rejected": 0.8817858695983887, "step": 584 }, { "epoch": 1.48, "learning_rate": 5.3483277777902613e-08, "logits/chosen": -2.3022282123565674, "logits/rejected": -2.289189338684082, "logps/chosen": -1.1108834743499756, "logps/rejected": -8.040579795837402, "loss": 0.6041, "rewards/accuracies": 0.0, "rewards/chosen": 0.6858320236206055, "rewards/margins": -0.28896474838256836, "rewards/rejected": 0.9747967720031738, "step": 585 }, { "epoch": 1.48, "learning_rate": 5.334688721432408e-08, "logits/chosen": -2.348942279815674, "logits/rejected": -2.338711977005005, "logps/chosen": -1.3524112701416016, "logps/rejected": -5.840670108795166, "loss": 0.6999, "rewards/accuracies": 1.0, "rewards/chosen": 0.7908832430839539, "rewards/margins": 0.008172929286956787, "rewards/rejected": 0.7827103137969971, "step": 586 }, { "epoch": 1.49, "learning_rate": 5.321047162998568e-08, "logits/chosen": -2.250138759613037, "logits/rejected": -2.249035596847534, "logps/chosen": -4.627227783203125, "logps/rejected": -3.582585096359253, "loss": 0.6671, "rewards/accuracies": 1.0, "rewards/chosen": 0.7767885327339172, "rewards/margins": 0.3203277587890625, "rewards/rejected": 0.45646077394485474, "step": 587 }, { "epoch": 1.49, "learning_rate": 5.3074032044707107e-08, "logits/chosen": -2.155755043029785, "logits/rejected": -2.151623249053955, "logps/chosen": -2.403184175491333, "logps/rejected": -7.570554733276367, "loss": 0.5957, "rewards/accuracies": 1.0, "rewards/chosen": 0.7994750142097473, "rewards/margins": 0.3136289715766907, "rewards/rejected": 0.48584604263305664, "step": 588 }, { "epoch": 1.49, "learning_rate": 5.293756947848754e-08, "logits/chosen": -2.217984676361084, "logits/rejected": -2.2229859828948975, "logps/chosen": -2.1955599784851074, "logps/rejected": -6.834612846374512, "loss": 0.585, "rewards/accuracies": 0.0, "rewards/chosen": 0.6912041306495667, "rewards/margins": -0.2044910192489624, "rewards/rejected": 0.895695149898529, "step": 589 }, { "epoch": 1.49, "learning_rate": 5.280108495149792e-08, "logits/chosen": -2.2589242458343506, "logits/rejected": -2.2689948081970215, "logps/chosen": -10.028499603271484, "logps/rejected": -2.7168333530426025, "loss": 0.6279, "rewards/accuracies": 1.0, "rewards/chosen": 0.8011869788169861, "rewards/margins": 0.2044302225112915, "rewards/rejected": 0.5967567563056946, "step": 590 }, { "epoch": 1.5, "learning_rate": 5.266457948407336e-08, "logits/chosen": -2.257161855697632, "logits/rejected": -2.2469303607940674, "logps/chosen": -3.5314478874206543, "logps/rejected": -6.812479496002197, "loss": 0.714, "rewards/accuracies": 0.0, "rewards/chosen": 0.5225030779838562, "rewards/margins": -0.2986753582954407, "rewards/rejected": 0.8211784362792969, "step": 591 }, { "epoch": 1.5, "learning_rate": 5.252805409670553e-08, "logits/chosen": -2.2833938598632812, "logits/rejected": -2.2909512519836426, "logps/chosen": -4.275683879852295, "logps/rejected": -10.593108177185059, "loss": 0.5593, "rewards/accuracies": 1.0, "rewards/chosen": 0.9478815197944641, "rewards/margins": 0.47846660017967224, "rewards/rejected": 0.46941491961479187, "step": 592 }, { "epoch": 1.5, "learning_rate": 5.239150981003502e-08, "logits/chosen": -2.1979403495788574, "logits/rejected": -2.1929094791412354, "logps/chosen": -1.6922829151153564, "logps/rejected": -9.216059684753418, "loss": 0.5913, "rewards/accuracies": 0.0, "rewards/chosen": 0.5756505131721497, "rewards/margins": -0.11028802394866943, "rewards/rejected": 0.6859385371208191, "step": 593 }, { "epoch": 1.5, "learning_rate": 5.225494764484373e-08, "logits/chosen": -2.1243174076080322, "logits/rejected": -2.1168265342712402, "logps/chosen": -1.1902977228164673, "logps/rejected": -6.01706600189209, "loss": 0.8575, "rewards/accuracies": 0.0, "rewards/chosen": 0.598423182964325, "rewards/margins": -0.31458520889282227, "rewards/rejected": 0.9130083918571472, "step": 594 }, { "epoch": 1.51, "learning_rate": 5.211836862204715e-08, "logits/chosen": -2.177043914794922, "logits/rejected": -2.1793394088745117, "logps/chosen": -1.9561488628387451, "logps/rejected": -3.3270535469055176, "loss": 0.5678, "rewards/accuracies": 1.0, "rewards/chosen": 0.6992691159248352, "rewards/margins": 0.2004769742488861, "rewards/rejected": 0.4987921416759491, "step": 595 }, { "epoch": 1.51, "learning_rate": 5.1981773762686855e-08, "logits/chosen": -2.2720110416412354, "logits/rejected": -2.2738986015319824, "logps/chosen": -0.887351393699646, "logps/rejected": -3.8076229095458984, "loss": 0.6687, "rewards/accuracies": 1.0, "rewards/chosen": 0.7760934233665466, "rewards/margins": 0.36406201124191284, "rewards/rejected": 0.4120314121246338, "step": 596 }, { "epoch": 1.51, "learning_rate": 5.18451640879228e-08, "logits/chosen": -2.22946834564209, "logits/rejected": -2.227626085281372, "logps/chosen": -2.341085433959961, "logps/rejected": -3.683094024658203, "loss": 0.5983, "rewards/accuracies": 0.0, "rewards/chosen": 0.6564964652061462, "rewards/margins": -0.28313660621643066, "rewards/rejected": 0.9396330714225769, "step": 597 }, { "epoch": 1.51, "learning_rate": 5.170854061902569e-08, "logits/chosen": -2.179401159286499, "logits/rejected": -2.1995036602020264, "logps/chosen": -8.441075325012207, "logps/rejected": -4.327276706695557, "loss": 0.5026, "rewards/accuracies": 1.0, "rewards/chosen": 1.0676684379577637, "rewards/margins": 0.5129554867744446, "rewards/rejected": 0.5547129511833191, "step": 598 }, { "epoch": 1.52, "learning_rate": 5.157190437736935e-08, "logits/chosen": -2.2470619678497314, "logits/rejected": -2.246727705001831, "logps/chosen": -1.4223216772079468, "logps/rejected": -5.484673500061035, "loss": 0.6356, "rewards/accuracies": 1.0, "rewards/chosen": 0.7734154462814331, "rewards/margins": 0.3068093955516815, "rewards/rejected": 0.4666060507297516, "step": 599 }, { "epoch": 1.52, "learning_rate": 5.143525638442309e-08, "logits/chosen": -2.234792470932007, "logits/rejected": -2.24253511428833, "logps/chosen": -1.2659214735031128, "logps/rejected": -3.3906052112579346, "loss": 0.5374, "rewards/accuracies": 1.0, "rewards/chosen": 0.9727340936660767, "rewards/margins": 0.4891490042209625, "rewards/rejected": 0.48358508944511414, "step": 600 }, { "epoch": 1.52, "learning_rate": 5.129859766174409e-08, "logits/chosen": -2.2521121501922607, "logits/rejected": -2.262389659881592, "logps/chosen": -4.9758758544921875, "logps/rejected": -4.288949489593506, "loss": 0.7602, "rewards/accuracies": 1.0, "rewards/chosen": 0.8934826254844666, "rewards/margins": 0.501994252204895, "rewards/rejected": 0.3914884030818939, "step": 601 }, { "epoch": 1.52, "learning_rate": 5.1161929230969724e-08, "logits/chosen": -2.2073659896850586, "logits/rejected": -2.199570417404175, "logps/chosen": -2.634582996368408, "logps/rejected": -8.329639434814453, "loss": 0.714, "rewards/accuracies": 0.0, "rewards/chosen": 0.5729772448539734, "rewards/margins": -0.28859806060791016, "rewards/rejected": 0.8615753054618835, "step": 602 }, { "epoch": 1.53, "learning_rate": 5.102525211380994e-08, "logits/chosen": -2.26304030418396, "logits/rejected": -2.2554478645324707, "logps/chosen": -1.5547515153884888, "logps/rejected": -4.226092338562012, "loss": 0.7872, "rewards/accuracies": 0.0, "rewards/chosen": 0.7487710118293762, "rewards/margins": -0.29529768228530884, "rewards/rejected": 1.044068694114685, "step": 603 }, { "epoch": 1.53, "learning_rate": 5.088856733203963e-08, "logits/chosen": -2.1716792583465576, "logits/rejected": -2.1744868755340576, "logps/chosen": -0.3682188093662262, "logps/rejected": -8.616371154785156, "loss": 0.7842, "rewards/accuracies": 0.0, "rewards/chosen": 0.5855211615562439, "rewards/margins": -0.3542172908782959, "rewards/rejected": 0.9397384524345398, "step": 604 }, { "epoch": 1.53, "learning_rate": 5.0751875907491e-08, "logits/chosen": -2.304729700088501, "logits/rejected": -2.3289268016815186, "logps/chosen": -1.5023796558380127, "logps/rejected": -7.434080600738525, "loss": 0.5715, "rewards/accuracies": 1.0, "rewards/chosen": 0.9484255909919739, "rewards/margins": 0.47891756892204285, "rewards/rejected": 0.46950802206993103, "step": 605 }, { "epoch": 1.53, "learning_rate": 5.0615178862045915e-08, "logits/chosen": -2.311959743499756, "logits/rejected": -2.3156604766845703, "logps/chosen": -1.7057974338531494, "logps/rejected": -3.7333426475524902, "loss": 0.586, "rewards/accuracies": 1.0, "rewards/chosen": 0.7732904553413391, "rewards/margins": 0.30327001214027405, "rewards/rejected": 0.47002044320106506, "step": 606 }, { "epoch": 1.54, "learning_rate": 5.04784772176282e-08, "logits/chosen": -2.245584487915039, "logits/rejected": -2.252749443054199, "logps/chosen": -6.414439678192139, "logps/rejected": -4.157635688781738, "loss": 0.6397, "rewards/accuracies": 1.0, "rewards/chosen": 1.0488938093185425, "rewards/margins": 0.5006129145622253, "rewards/rejected": 0.5482808947563171, "step": 607 }, { "epoch": 1.54, "learning_rate": 5.034177199619617e-08, "logits/chosen": -2.0937724113464355, "logits/rejected": -2.0985701084136963, "logps/chosen": -2.7599034309387207, "logps/rejected": -2.593078374862671, "loss": 0.5503, "rewards/accuracies": 1.0, "rewards/chosen": 0.9703807830810547, "rewards/margins": 0.4452740550041199, "rewards/rejected": 0.5251067280769348, "step": 608 }, { "epoch": 1.54, "learning_rate": 5.020506421973479e-08, "logits/chosen": -2.2010462284088135, "logits/rejected": -2.204556703567505, "logps/chosen": -2.079317331314087, "logps/rejected": -5.646229267120361, "loss": 0.6708, "rewards/accuracies": 1.0, "rewards/chosen": 0.7444862723350525, "rewards/margins": 0.05932891368865967, "rewards/rejected": 0.6851573586463928, "step": 609 }, { "epoch": 1.54, "learning_rate": 5.006835491024816e-08, "logits/chosen": -2.1974117755889893, "logits/rejected": -2.193011999130249, "logps/chosen": -2.5218472480773926, "logps/rejected": -4.198387145996094, "loss": 0.627, "rewards/accuracies": 0.0, "rewards/chosen": 0.4930300712585449, "rewards/margins": -0.31454288959503174, "rewards/rejected": 0.8075729608535767, "step": 610 }, { "epoch": 1.55, "learning_rate": 4.993164508975184e-08, "logits/chosen": -2.2598822116851807, "logits/rejected": -2.291147232055664, "logps/chosen": -1.151219129562378, "logps/rejected": -6.401294231414795, "loss": 0.6639, "rewards/accuracies": 1.0, "rewards/chosen": 0.8070917129516602, "rewards/margins": 0.2615811824798584, "rewards/rejected": 0.5455105304718018, "step": 611 }, { "epoch": 1.55, "learning_rate": 4.979493578026522e-08, "logits/chosen": -2.2324681282043457, "logits/rejected": -2.2288804054260254, "logps/chosen": -4.812717437744141, "logps/rejected": -5.481943130493164, "loss": 0.6392, "rewards/accuracies": 1.0, "rewards/chosen": 0.8365947604179382, "rewards/margins": 0.2910289764404297, "rewards/rejected": 0.5455657839775085, "step": 612 }, { "epoch": 1.55, "learning_rate": 4.965822800380383e-08, "logits/chosen": -2.2273552417755127, "logits/rejected": -2.251175880432129, "logps/chosen": -2.2739222049713135, "logps/rejected": -8.695854187011719, "loss": 0.6866, "rewards/accuracies": 1.0, "rewards/chosen": 0.7556077837944031, "rewards/margins": 0.025131404399871826, "rewards/rejected": 0.7304763793945312, "step": 613 }, { "epoch": 1.55, "learning_rate": 4.9521522782371794e-08, "logits/chosen": -2.2657077312469482, "logits/rejected": -2.2450058460235596, "logps/chosen": -4.094313621520996, "logps/rejected": -4.975306510925293, "loss": 0.5775, "rewards/accuracies": 0.0, "rewards/chosen": 0.7015708088874817, "rewards/margins": -0.09870892763137817, "rewards/rejected": 0.8002797365188599, "step": 614 }, { "epoch": 1.56, "learning_rate": 4.93848211379541e-08, "logits/chosen": -2.272484540939331, "logits/rejected": -2.2753078937530518, "logps/chosen": -0.6502484083175659, "logps/rejected": -7.998456001281738, "loss": 0.5304, "rewards/accuracies": 0.0, "rewards/chosen": 0.7230857014656067, "rewards/margins": -0.09969902038574219, "rewards/rejected": 0.8227847218513489, "step": 615 }, { "epoch": 1.56, "learning_rate": 4.9248124092508986e-08, "logits/chosen": -2.342775821685791, "logits/rejected": -2.358232259750366, "logps/chosen": -7.476038455963135, "logps/rejected": -4.144865036010742, "loss": 0.7115, "rewards/accuracies": 0.0, "rewards/chosen": 0.6477977633476257, "rewards/margins": -0.04892045259475708, "rewards/rejected": 0.6967182159423828, "step": 616 }, { "epoch": 1.56, "learning_rate": 4.9111432667960376e-08, "logits/chosen": -2.3024544715881348, "logits/rejected": -2.304225206375122, "logps/chosen": -1.457822322845459, "logps/rejected": -3.1410017013549805, "loss": 0.6009, "rewards/accuracies": 1.0, "rewards/chosen": 0.8447713851928711, "rewards/margins": 0.3507594168186188, "rewards/rejected": 0.4940119683742523, "step": 617 }, { "epoch": 1.56, "learning_rate": 4.8974747886190065e-08, "logits/chosen": -2.3383171558380127, "logits/rejected": -2.3304126262664795, "logps/chosen": -3.1310036182403564, "logps/rejected": -8.710413932800293, "loss": 0.7254, "rewards/accuracies": 0.0, "rewards/chosen": 0.49202901124954224, "rewards/margins": -0.20610791444778442, "rewards/rejected": 0.6981369256973267, "step": 618 }, { "epoch": 1.57, "learning_rate": 4.8838070769030285e-08, "logits/chosen": -2.2639825344085693, "logits/rejected": -2.2752268314361572, "logps/chosen": -4.688891410827637, "logps/rejected": -4.1521477699279785, "loss": 0.5772, "rewards/accuracies": 1.0, "rewards/chosen": 1.0452862977981567, "rewards/margins": 0.5737987160682678, "rewards/rejected": 0.4714875817298889, "step": 619 }, { "epoch": 1.57, "learning_rate": 4.870140233825591e-08, "logits/chosen": -2.306670904159546, "logits/rejected": -2.29649019241333, "logps/chosen": -0.9499667882919312, "logps/rejected": -5.173892498016357, "loss": 0.7093, "rewards/accuracies": 0.0, "rewards/chosen": 0.5862790942192078, "rewards/margins": -0.24832385778427124, "rewards/rejected": 0.834602952003479, "step": 620 }, { "epoch": 1.57, "learning_rate": 4.856474361557691e-08, "logits/chosen": -2.274463176727295, "logits/rejected": -2.2890992164611816, "logps/chosen": -2.185833692550659, "logps/rejected": -20.575037002563477, "loss": 0.5829, "rewards/accuracies": 0.0, "rewards/chosen": 0.6818470358848572, "rewards/margins": -0.027339696884155273, "rewards/rejected": 0.7091867327690125, "step": 621 }, { "epoch": 1.57, "learning_rate": 4.8428095622630655e-08, "logits/chosen": -2.247779369354248, "logits/rejected": -2.252610683441162, "logps/chosen": -1.674074411392212, "logps/rejected": -7.051548957824707, "loss": 0.7554, "rewards/accuracies": 0.0, "rewards/chosen": 0.721851646900177, "rewards/margins": -0.039531052112579346, "rewards/rejected": 0.7613826990127563, "step": 622 }, { "epoch": 1.58, "learning_rate": 4.8291459380974306e-08, "logits/chosen": -2.1814544200897217, "logits/rejected": -2.1842708587646484, "logps/chosen": -4.146368980407715, "logps/rejected": -2.1944963932037354, "loss": 0.605, "rewards/accuracies": 1.0, "rewards/chosen": 0.8745499849319458, "rewards/margins": 0.28691625595092773, "rewards/rejected": 0.5876337289810181, "step": 623 }, { "epoch": 1.58, "learning_rate": 4.81548359120772e-08, "logits/chosen": -2.218573808670044, "logits/rejected": -2.2228682041168213, "logps/chosen": -2.000185489654541, "logps/rejected": -5.185663223266602, "loss": 0.6647, "rewards/accuracies": 1.0, "rewards/chosen": 0.9465015530586243, "rewards/margins": 0.5110371112823486, "rewards/rejected": 0.435464471578598, "step": 624 }, { "epoch": 1.58, "learning_rate": 4.801822623731316e-08, "logits/chosen": -2.2277679443359375, "logits/rejected": -2.2926878929138184, "logps/chosen": -6.4043779373168945, "logps/rejected": -14.34355354309082, "loss": 0.7566, "rewards/accuracies": 1.0, "rewards/chosen": 0.8007414937019348, "rewards/margins": 0.566681981086731, "rewards/rejected": 0.23405952751636505, "step": 625 }, { "epoch": 1.58, "learning_rate": 4.7881631377952854e-08, "logits/chosen": -2.284310817718506, "logits/rejected": -2.3554909229278564, "logps/chosen": -1.5243277549743652, "logps/rejected": -26.199363708496094, "loss": 0.6255, "rewards/accuracies": 1.0, "rewards/chosen": 0.7290665507316589, "rewards/margins": 0.13057029247283936, "rewards/rejected": 0.5984962582588196, "step": 626 }, { "epoch": 1.59, "learning_rate": 4.7745052355156274e-08, "logits/chosen": -2.1971960067749023, "logits/rejected": -2.1945619583129883, "logps/chosen": -1.9084453582763672, "logps/rejected": -5.870922088623047, "loss": 0.7106, "rewards/accuracies": 0.0, "rewards/chosen": 0.509326159954071, "rewards/margins": -0.4915580153465271, "rewards/rejected": 1.0008841753005981, "step": 627 }, { "epoch": 1.59, "learning_rate": 4.7608490189964975e-08, "logits/chosen": -2.121462106704712, "logits/rejected": -2.2600481510162354, "logps/chosen": -1.3628662824630737, "logps/rejected": -22.607507705688477, "loss": 0.6971, "rewards/accuracies": 1.0, "rewards/chosen": 0.7292923927307129, "rewards/margins": 0.17726927995681763, "rewards/rejected": 0.5520231127738953, "step": 628 }, { "epoch": 1.59, "learning_rate": 4.7471945903294486e-08, "logits/chosen": -2.2458300590515137, "logits/rejected": -2.24537992477417, "logps/chosen": -1.2029781341552734, "logps/rejected": -2.126154661178589, "loss": 0.712, "rewards/accuracies": 1.0, "rewards/chosen": 0.894443154335022, "rewards/margins": 0.23182451725006104, "rewards/rejected": 0.6626186370849609, "step": 629 }, { "epoch": 1.59, "learning_rate": 4.7335420515926645e-08, "logits/chosen": -2.1735496520996094, "logits/rejected": -2.174062490463257, "logps/chosen": -11.307557106018066, "logps/rejected": -4.321300029754639, "loss": 0.5977, "rewards/accuracies": 1.0, "rewards/chosen": 1.3236254453659058, "rewards/margins": 0.5754734873771667, "rewards/rejected": 0.748151957988739, "step": 630 }, { "epoch": 1.6, "learning_rate": 4.7198915048502087e-08, "logits/chosen": -2.200571060180664, "logits/rejected": -2.1902341842651367, "logps/chosen": -3.5868871212005615, "logps/rejected": -2.478477716445923, "loss": 0.5881, "rewards/accuracies": 1.0, "rewards/chosen": 0.8819932341575623, "rewards/margins": 0.26719725131988525, "rewards/rejected": 0.614795982837677, "step": 631 }, { "epoch": 1.6, "learning_rate": 4.706243052151247e-08, "logits/chosen": -2.2519547939300537, "logits/rejected": -2.2415859699249268, "logps/chosen": -0.9537286162376404, "logps/rejected": -5.3051371574401855, "loss": 0.8201, "rewards/accuracies": 0.0, "rewards/chosen": 0.7038772106170654, "rewards/margins": -0.4431644678115845, "rewards/rejected": 1.14704167842865, "step": 632 }, { "epoch": 1.6, "learning_rate": 4.692596795529289e-08, "logits/chosen": -2.270209312438965, "logits/rejected": -2.2719197273254395, "logps/chosen": -3.529463052749634, "logps/rejected": -7.147891521453857, "loss": 0.6577, "rewards/accuracies": 0.0, "rewards/chosen": 0.6481128931045532, "rewards/margins": -0.26290053129196167, "rewards/rejected": 0.9110134243965149, "step": 633 }, { "epoch": 1.61, "learning_rate": 4.6789528370014325e-08, "logits/chosen": -2.160151481628418, "logits/rejected": -2.1630167961120605, "logps/chosen": -1.4370038509368896, "logps/rejected": -8.150344848632812, "loss": 0.7146, "rewards/accuracies": 0.0, "rewards/chosen": 0.7551178932189941, "rewards/margins": -0.210873544216156, "rewards/rejected": 0.9659914374351501, "step": 634 }, { "epoch": 1.61, "learning_rate": 4.665311278567592e-08, "logits/chosen": -2.2272167205810547, "logits/rejected": -2.2239861488342285, "logps/chosen": -2.600299596786499, "logps/rejected": -4.037546157836914, "loss": 0.6641, "rewards/accuracies": 0.0, "rewards/chosen": 0.6309739351272583, "rewards/margins": -0.18041402101516724, "rewards/rejected": 0.8113879561424255, "step": 635 }, { "epoch": 1.61, "learning_rate": 4.6516722222097375e-08, "logits/chosen": -2.2296643257141113, "logits/rejected": -2.2127926349639893, "logps/chosen": -1.4523742198944092, "logps/rejected": -9.06702709197998, "loss": 0.5971, "rewards/accuracies": 0.0, "rewards/chosen": 0.6778548359870911, "rewards/margins": -0.0003503561019897461, "rewards/rejected": 0.6782051920890808, "step": 636 }, { "epoch": 1.61, "learning_rate": 4.638035769891138e-08, "logits/chosen": -2.293264150619507, "logits/rejected": -2.3039181232452393, "logps/chosen": -6.797041416168213, "logps/rejected": -0.8417494297027588, "loss": 0.6462, "rewards/accuracies": 1.0, "rewards/chosen": 0.913195788860321, "rewards/margins": 0.15535902976989746, "rewards/rejected": 0.7578367590904236, "step": 637 }, { "epoch": 1.62, "learning_rate": 4.624402023555591e-08, "logits/chosen": -2.1713085174560547, "logits/rejected": -2.176954507827759, "logps/chosen": -1.2268322706222534, "logps/rejected": -7.059919357299805, "loss": 0.6084, "rewards/accuracies": 1.0, "rewards/chosen": 0.8473292589187622, "rewards/margins": 0.4409445822238922, "rewards/rejected": 0.40638467669487, "step": 638 }, { "epoch": 1.62, "learning_rate": 4.610771085126669e-08, "logits/chosen": -2.1004674434661865, "logits/rejected": -2.091999053955078, "logps/chosen": -1.5612305402755737, "logps/rejected": -2.8839268684387207, "loss": 0.7741, "rewards/accuracies": 0.0, "rewards/chosen": 0.6450486183166504, "rewards/margins": -0.3964780569076538, "rewards/rejected": 1.0415266752243042, "step": 639 }, { "epoch": 1.62, "learning_rate": 4.5971430565069457e-08, "logits/chosen": -2.372856378555298, "logits/rejected": -2.5030295848846436, "logps/chosen": -1.892558217048645, "logps/rejected": -14.477828979492188, "loss": 0.6322, "rewards/accuracies": 1.0, "rewards/chosen": 0.7573800086975098, "rewards/margins": 0.05282086133956909, "rewards/rejected": 0.7045591473579407, "step": 640 }, { "epoch": 1.62, "learning_rate": 4.583518039577252e-08, "logits/chosen": -2.1173081398010254, "logits/rejected": -2.111046075820923, "logps/chosen": -1.9616144895553589, "logps/rejected": -4.4884748458862305, "loss": 0.704, "rewards/accuracies": 0.0, "rewards/chosen": 0.5410465598106384, "rewards/margins": -0.48893243074417114, "rewards/rejected": 1.0299789905548096, "step": 641 }, { "epoch": 1.63, "learning_rate": 4.569896136195895e-08, "logits/chosen": -2.125168800354004, "logits/rejected": -2.1367011070251465, "logps/chosen": -2.231745958328247, "logps/rejected": -7.651500701904297, "loss": 0.724, "rewards/accuracies": 1.0, "rewards/chosen": 0.746056079864502, "rewards/margins": 0.3209289610385895, "rewards/rejected": 0.4251271188259125, "step": 642 }, { "epoch": 1.63, "learning_rate": 4.556277448197908e-08, "logits/chosen": -2.2339062690734863, "logits/rejected": -2.2156989574432373, "logps/chosen": -1.4572975635528564, "logps/rejected": -3.4600369930267334, "loss": 0.6685, "rewards/accuracies": 0.0, "rewards/chosen": 0.6904759407043457, "rewards/margins": -0.2162289023399353, "rewards/rejected": 0.906704843044281, "step": 643 }, { "epoch": 1.63, "learning_rate": 4.542662077394292e-08, "logits/chosen": -2.329294443130493, "logits/rejected": -2.4282333850860596, "logps/chosen": -10.631451606750488, "logps/rejected": -25.83261489868164, "loss": 0.5836, "rewards/accuracies": 1.0, "rewards/chosen": 0.8698520064353943, "rewards/margins": 0.47774118185043335, "rewards/rejected": 0.39211082458496094, "step": 644 }, { "epoch": 1.63, "learning_rate": 4.529050125571241e-08, "logits/chosen": -2.1207566261291504, "logits/rejected": -2.128788709640503, "logps/chosen": -3.4658639430999756, "logps/rejected": -3.3107709884643555, "loss": 0.649, "rewards/accuracies": 1.0, "rewards/chosen": 0.8810550570487976, "rewards/margins": 0.31628215312957764, "rewards/rejected": 0.56477290391922, "step": 645 }, { "epoch": 1.64, "learning_rate": 4.515441694489393e-08, "logits/chosen": -2.1416962146759033, "logits/rejected": -2.157191514968872, "logps/chosen": -5.066739559173584, "logps/rejected": -14.033540725708008, "loss": 0.6509, "rewards/accuracies": 1.0, "rewards/chosen": 0.6893501877784729, "rewards/margins": 0.04064333438873291, "rewards/rejected": 0.64870685338974, "step": 646 }, { "epoch": 1.64, "learning_rate": 4.501836885883064e-08, "logits/chosen": -2.2177646160125732, "logits/rejected": -2.2196366786956787, "logps/chosen": -2.57896089553833, "logps/rejected": -5.638298034667969, "loss": 0.7522, "rewards/accuracies": 0.0, "rewards/chosen": 0.5942735075950623, "rewards/margins": -0.1828402280807495, "rewards/rejected": 0.7771137356758118, "step": 647 }, { "epoch": 1.64, "learning_rate": 4.488235801459495e-08, "logits/chosen": -2.2892820835113525, "logits/rejected": -2.3052327632904053, "logps/chosen": -2.6371986865997314, "logps/rejected": -12.952171325683594, "loss": 0.6355, "rewards/accuracies": 1.0, "rewards/chosen": 0.7452912330627441, "rewards/margins": 0.5187880396842957, "rewards/rejected": 0.2265031784772873, "step": 648 }, { "epoch": 1.64, "learning_rate": 4.474638542898078e-08, "logits/chosen": -2.2374255657196045, "logits/rejected": -2.2465403079986572, "logps/chosen": -5.258137226104736, "logps/rejected": -1.964174509048462, "loss": 0.5761, "rewards/accuracies": 0.0, "rewards/chosen": 0.6018245220184326, "rewards/margins": -0.0656731128692627, "rewards/rejected": 0.6674976348876953, "step": 649 }, { "epoch": 1.65, "learning_rate": 4.461045211849605e-08, "logits/chosen": -2.1592140197753906, "logits/rejected": -2.1423041820526123, "logps/chosen": -9.49968147277832, "logps/rejected": -2.651745080947876, "loss": 0.7221, "rewards/accuracies": 0.0, "rewards/chosen": 0.513927161693573, "rewards/margins": -0.3640943765640259, "rewards/rejected": 0.8780215382575989, "step": 650 }, { "epoch": 1.65, "learning_rate": 4.447455909935513e-08, "logits/chosen": -2.251946449279785, "logits/rejected": -2.256498098373413, "logps/chosen": -18.571998596191406, "logps/rejected": -11.95301342010498, "loss": 0.7047, "rewards/accuracies": 0.0, "rewards/chosen": 0.6227094531059265, "rewards/margins": -0.39316290616989136, "rewards/rejected": 1.0158723592758179, "step": 651 }, { "epoch": 1.65, "learning_rate": 4.4338707387471106e-08, "logits/chosen": -2.352497100830078, "logits/rejected": -2.362208366394043, "logps/chosen": -2.4317665100097656, "logps/rejected": -4.347914695739746, "loss": 0.616, "rewards/accuracies": 1.0, "rewards/chosen": 0.7799474596977234, "rewards/margins": 0.3259752094745636, "rewards/rejected": 0.4539722502231598, "step": 652 }, { "epoch": 1.65, "learning_rate": 4.420289799844825e-08, "logits/chosen": -2.237452745437622, "logits/rejected": -2.236259698867798, "logps/chosen": -2.8723292350769043, "logps/rejected": -7.786721229553223, "loss": 0.7964, "rewards/accuracies": 0.0, "rewards/chosen": 0.5716705918312073, "rewards/margins": -0.2577711343765259, "rewards/rejected": 0.8294417262077332, "step": 653 }, { "epoch": 1.66, "learning_rate": 4.4067131947574506e-08, "logits/chosen": -2.156238317489624, "logits/rejected": -2.197721481323242, "logps/chosen": -1.0008310079574585, "logps/rejected": -6.417344093322754, "loss": 0.6045, "rewards/accuracies": 0.0, "rewards/chosen": 0.6220870018005371, "rewards/margins": -0.19753646850585938, "rewards/rejected": 0.8196234703063965, "step": 654 }, { "epoch": 1.66, "learning_rate": 4.39314102498138e-08, "logits/chosen": -2.2098968029022217, "logits/rejected": -2.222322463989258, "logps/chosen": -4.849008083343506, "logps/rejected": -3.741032123565674, "loss": 0.5523, "rewards/accuracies": 1.0, "rewards/chosen": 1.1409025192260742, "rewards/margins": 0.5459978580474854, "rewards/rejected": 0.5949046611785889, "step": 655 }, { "epoch": 1.66, "learning_rate": 4.379573391979845e-08, "logits/chosen": -2.3241872787475586, "logits/rejected": -2.32600474357605, "logps/chosen": -1.3722200393676758, "logps/rejected": -4.489802360534668, "loss": 0.7193, "rewards/accuracies": 0.0, "rewards/chosen": 0.8029049038887024, "rewards/margins": -0.002040386199951172, "rewards/rejected": 0.8049452900886536, "step": 656 }, { "epoch": 1.66, "learning_rate": 4.3660103971821625e-08, "logits/chosen": -2.0659408569335938, "logits/rejected": -2.078007936477661, "logps/chosen": -19.296981811523438, "logps/rejected": -11.485359191894531, "loss": 0.6757, "rewards/accuracies": 0.0, "rewards/chosen": 0.6667301058769226, "rewards/margins": -0.14216136932373047, "rewards/rejected": 0.8088914752006531, "step": 657 }, { "epoch": 1.67, "learning_rate": 4.3524521419829786e-08, "logits/chosen": -2.216618537902832, "logits/rejected": -2.218275785446167, "logps/chosen": -1.710777997970581, "logps/rejected": -3.8822269439697266, "loss": 0.5876, "rewards/accuracies": 1.0, "rewards/chosen": 1.0298179388046265, "rewards/margins": 0.5088471174240112, "rewards/rejected": 0.5209708213806152, "step": 658 }, { "epoch": 1.67, "learning_rate": 4.338898727741504e-08, "logits/chosen": -2.2613003253936768, "logits/rejected": -2.2580668926239014, "logps/chosen": -1.446802020072937, "logps/rejected": -7.955979824066162, "loss": 0.5899, "rewards/accuracies": 0.0, "rewards/chosen": 0.5714051127433777, "rewards/margins": -0.24874258041381836, "rewards/rejected": 0.820147693157196, "step": 659 }, { "epoch": 1.67, "learning_rate": 4.325350255780757e-08, "logits/chosen": -2.214165449142456, "logits/rejected": -2.222931146621704, "logps/chosen": -5.613105297088623, "logps/rejected": -5.287190914154053, "loss": 0.5746, "rewards/accuracies": 1.0, "rewards/chosen": 1.1625064611434937, "rewards/margins": 0.6981580257415771, "rewards/rejected": 0.4643484652042389, "step": 660 }, { "epoch": 1.67, "learning_rate": 4.311806827386815e-08, "logits/chosen": -2.3536972999572754, "logits/rejected": -2.352720022201538, "logps/chosen": -0.6815727353096008, "logps/rejected": -9.03874397277832, "loss": 0.5998, "rewards/accuracies": 0.0, "rewards/chosen": 0.6992121934890747, "rewards/margins": -0.1459670066833496, "rewards/rejected": 0.8451792001724243, "step": 661 }, { "epoch": 1.68, "learning_rate": 4.298268543808042e-08, "logits/chosen": -2.22646164894104, "logits/rejected": -2.233038902282715, "logps/chosen": -1.9587892293930054, "logps/rejected": -5.297250747680664, "loss": 0.6971, "rewards/accuracies": 1.0, "rewards/chosen": 0.7508276700973511, "rewards/margins": 0.38593700528144836, "rewards/rejected": 0.3648906648159027, "step": 662 }, { "epoch": 1.68, "learning_rate": 4.2847355062543494e-08, "logits/chosen": -2.3127012252807617, "logits/rejected": -2.3352863788604736, "logps/chosen": -5.9994072914123535, "logps/rejected": -7.460290431976318, "loss": 0.8612, "rewards/accuracies": 0.0, "rewards/chosen": 0.644493818283081, "rewards/margins": -0.2884909510612488, "rewards/rejected": 0.9329847693443298, "step": 663 }, { "epoch": 1.68, "learning_rate": 4.27120781589642e-08, "logits/chosen": -2.1993746757507324, "logits/rejected": -2.202763795852661, "logps/chosen": -1.2179145812988281, "logps/rejected": -2.7684178352355957, "loss": 0.5927, "rewards/accuracies": 1.0, "rewards/chosen": 0.7952796816825867, "rewards/margins": 0.2999069392681122, "rewards/rejected": 0.4953727424144745, "step": 664 }, { "epoch": 1.68, "learning_rate": 4.257685573864971e-08, "logits/chosen": -2.258711338043213, "logits/rejected": -2.2629261016845703, "logps/chosen": -1.6524860858917236, "logps/rejected": -6.115100860595703, "loss": 0.5191, "rewards/accuracies": 1.0, "rewards/chosen": 0.8809008598327637, "rewards/margins": 0.367977499961853, "rewards/rejected": 0.5129233598709106, "step": 665 }, { "epoch": 1.69, "learning_rate": 4.2441688812499854e-08, "logits/chosen": -2.3620622158050537, "logits/rejected": -2.465083599090576, "logps/chosen": -0.588459849357605, "logps/rejected": -20.52385711669922, "loss": 0.6669, "rewards/accuracies": 1.0, "rewards/chosen": 0.7510198950767517, "rewards/margins": 0.6318420767784119, "rewards/rejected": 0.11917781829833984, "step": 666 }, { "epoch": 1.69, "learning_rate": 4.230657839099957e-08, "logits/chosen": -2.0920393466949463, "logits/rejected": -2.1022422313690186, "logps/chosen": -2.82631254196167, "logps/rejected": -6.914973735809326, "loss": 0.6942, "rewards/accuracies": 0.0, "rewards/chosen": 0.4391288757324219, "rewards/margins": -0.43604224920272827, "rewards/rejected": 0.8751711249351501, "step": 667 }, { "epoch": 1.69, "learning_rate": 4.2171525484211426e-08, "logits/chosen": -2.2965476512908936, "logits/rejected": -2.302499294281006, "logps/chosen": -1.8131451606750488, "logps/rejected": -3.5382063388824463, "loss": 0.5404, "rewards/accuracies": 1.0, "rewards/chosen": 0.9446922540664673, "rewards/margins": 0.3800815939903259, "rewards/rejected": 0.5646106600761414, "step": 668 }, { "epoch": 1.69, "learning_rate": 4.203653110176798e-08, "logits/chosen": -2.2403550148010254, "logits/rejected": -2.2459588050842285, "logps/chosen": -1.8402973413467407, "logps/rejected": -3.635242462158203, "loss": 0.5879, "rewards/accuracies": 1.0, "rewards/chosen": 0.8928884863853455, "rewards/margins": 0.2042222023010254, "rewards/rejected": 0.6886662840843201, "step": 669 }, { "epoch": 1.7, "learning_rate": 4.190159625286428e-08, "logits/chosen": -2.1736559867858887, "logits/rejected": -2.174734354019165, "logps/chosen": -5.173153400421143, "logps/rejected": -1.0836812257766724, "loss": 0.6453, "rewards/accuracies": 0.0, "rewards/chosen": 0.6572477221488953, "rewards/margins": -0.1384427547454834, "rewards/rejected": 0.7956904768943787, "step": 670 }, { "epoch": 1.7, "learning_rate": 4.176672194625035e-08, "logits/chosen": -2.3030221462249756, "logits/rejected": -2.3083202838897705, "logps/chosen": -2.6602649688720703, "logps/rejected": -1.360945701599121, "loss": 0.7164, "rewards/accuracies": 0.0, "rewards/chosen": 0.5533637404441833, "rewards/margins": -0.22525358200073242, "rewards/rejected": 0.7786173224449158, "step": 671 }, { "epoch": 1.7, "learning_rate": 4.163190919022356e-08, "logits/chosen": -2.2801566123962402, "logits/rejected": -2.2855725288391113, "logps/chosen": -2.0582454204559326, "logps/rejected": -5.441844940185547, "loss": 0.5506, "rewards/accuracies": 1.0, "rewards/chosen": 0.7903360724449158, "rewards/margins": 0.4532739818096161, "rewards/rejected": 0.3370620906352997, "step": 672 }, { "epoch": 1.7, "learning_rate": 4.149715899262115e-08, "logits/chosen": -2.3385210037231445, "logits/rejected": -2.349263906478882, "logps/chosen": -10.293669700622559, "logps/rejected": -1.242784857749939, "loss": 0.5884, "rewards/accuracies": 1.0, "rewards/chosen": 0.7502263188362122, "rewards/margins": 0.10015761852264404, "rewards/rejected": 0.6500687003135681, "step": 673 }, { "epoch": 1.71, "learning_rate": 4.136247236081268e-08, "logits/chosen": -2.2912399768829346, "logits/rejected": -2.2943613529205322, "logps/chosen": -1.6936521530151367, "logps/rejected": -3.8422272205352783, "loss": 0.6202, "rewards/accuracies": 1.0, "rewards/chosen": 0.8952227830886841, "rewards/margins": 0.4222681224346161, "rewards/rejected": 0.472954660654068, "step": 674 }, { "epoch": 1.71, "learning_rate": 4.122785030169255e-08, "logits/chosen": -2.255397081375122, "logits/rejected": -2.2586376667022705, "logps/chosen": -2.4434332847595215, "logps/rejected": -4.045342922210693, "loss": 0.7212, "rewards/accuracies": 0.0, "rewards/chosen": 0.6493885517120361, "rewards/margins": -0.15187793970108032, "rewards/rejected": 0.8012664914131165, "step": 675 }, { "epoch": 1.71, "learning_rate": 4.1093293821672364e-08, "logits/chosen": -2.3219165802001953, "logits/rejected": -2.3268580436706543, "logps/chosen": -2.786233901977539, "logps/rejected": -8.520256996154785, "loss": 0.722, "rewards/accuracies": 0.0, "rewards/chosen": 0.6436484456062317, "rewards/margins": -0.099021315574646, "rewards/rejected": 0.7426697611808777, "step": 676 }, { "epoch": 1.71, "learning_rate": 4.095880392667348e-08, "logits/chosen": -2.277390956878662, "logits/rejected": -2.2831218242645264, "logps/chosen": -6.829802513122559, "logps/rejected": -4.010682106018066, "loss": 0.5939, "rewards/accuracies": 1.0, "rewards/chosen": 1.080366611480713, "rewards/margins": 0.5811817049980164, "rewards/rejected": 0.49918490648269653, "step": 677 }, { "epoch": 1.72, "learning_rate": 4.082438162211954e-08, "logits/chosen": -2.2135097980499268, "logits/rejected": -2.2029199600219727, "logps/chosen": -1.7394216060638428, "logps/rejected": -6.766752243041992, "loss": 0.7975, "rewards/accuracies": 0.0, "rewards/chosen": 0.7639983296394348, "rewards/margins": -0.06293946504592896, "rewards/rejected": 0.8269377946853638, "step": 678 }, { "epoch": 1.72, "learning_rate": 4.0690027912928816e-08, "logits/chosen": -2.271045684814453, "logits/rejected": -2.258331060409546, "logps/chosen": -1.605610728263855, "logps/rejected": -6.116396903991699, "loss": 0.7624, "rewards/accuracies": 0.0, "rewards/chosen": 0.7317267656326294, "rewards/margins": -0.02314549684524536, "rewards/rejected": 0.7548722624778748, "step": 679 }, { "epoch": 1.72, "learning_rate": 4.055574380350677e-08, "logits/chosen": -2.2217040061950684, "logits/rejected": -2.2088119983673096, "logps/chosen": -1.1397144794464111, "logps/rejected": -6.568449974060059, "loss": 0.6592, "rewards/accuracies": 0.0, "rewards/chosen": 0.7009389996528625, "rewards/margins": -0.17027634382247925, "rewards/rejected": 0.8712153434753418, "step": 680 }, { "epoch": 1.72, "learning_rate": 4.0421530297738604e-08, "logits/chosen": -2.200624465942383, "logits/rejected": -2.2060914039611816, "logps/chosen": -4.606695175170898, "logps/rejected": -2.9495272636413574, "loss": 0.7945, "rewards/accuracies": 0.0, "rewards/chosen": 0.4586862623691559, "rewards/margins": -0.3437057435512543, "rewards/rejected": 0.8023920059204102, "step": 681 }, { "epoch": 1.73, "learning_rate": 4.028738839898168e-08, "logits/chosen": -2.349238634109497, "logits/rejected": -2.344846248626709, "logps/chosen": -5.673091888427734, "logps/rejected": -5.916996002197266, "loss": 0.7098, "rewards/accuracies": 0.0, "rewards/chosen": 0.6450086832046509, "rewards/margins": -0.15317046642303467, "rewards/rejected": 0.7981791496276855, "step": 682 }, { "epoch": 1.73, "learning_rate": 4.015331911005802e-08, "logits/chosen": -2.2681400775909424, "logits/rejected": -2.2703475952148438, "logps/chosen": -1.1157965660095215, "logps/rejected": -6.615094184875488, "loss": 0.5716, "rewards/accuracies": 1.0, "rewards/chosen": 0.906607449054718, "rewards/margins": 0.13366669416427612, "rewards/rejected": 0.7729407548904419, "step": 683 }, { "epoch": 1.73, "learning_rate": 4.001932343324683e-08, "logits/chosen": -2.2589337825775146, "logits/rejected": -2.2584187984466553, "logps/chosen": -2.435539722442627, "logps/rejected": -3.491102695465088, "loss": 0.5522, "rewards/accuracies": 1.0, "rewards/chosen": 0.8650665283203125, "rewards/margins": 0.2053198218345642, "rewards/rejected": 0.6597467064857483, "step": 684 }, { "epoch": 1.73, "learning_rate": 3.988540237027702e-08, "logits/chosen": -2.3565568923950195, "logits/rejected": -2.3555846214294434, "logps/chosen": -0.37212276458740234, "logps/rejected": -6.525988578796387, "loss": 0.7188, "rewards/accuracies": 0.0, "rewards/chosen": 0.5926082730293274, "rewards/margins": -0.2842484712600708, "rewards/rejected": 0.8768567442893982, "step": 685 }, { "epoch": 1.74, "learning_rate": 3.97515569223197e-08, "logits/chosen": -2.287966251373291, "logits/rejected": -2.3236591815948486, "logps/chosen": -0.719757080078125, "logps/rejected": -16.325952529907227, "loss": 0.5694, "rewards/accuracies": 0.0, "rewards/chosen": 0.6651017069816589, "rewards/margins": -0.019394397735595703, "rewards/rejected": 0.6844961047172546, "step": 686 }, { "epoch": 1.74, "learning_rate": 3.961778808998065e-08, "logits/chosen": -2.3617494106292725, "logits/rejected": -2.350682020187378, "logps/chosen": -7.21851110458374, "logps/rejected": -7.9231061935424805, "loss": 0.6432, "rewards/accuracies": 1.0, "rewards/chosen": 0.9423160552978516, "rewards/margins": 0.3195856809616089, "rewards/rejected": 0.6227303743362427, "step": 687 }, { "epoch": 1.74, "learning_rate": 3.948409687329297e-08, "logits/chosen": -2.22383189201355, "logits/rejected": -2.2355263233184814, "logps/chosen": -2.142350196838379, "logps/rejected": -4.241612434387207, "loss": 0.5294, "rewards/accuracies": 1.0, "rewards/chosen": 0.7540459632873535, "rewards/margins": 0.3778601586818695, "rewards/rejected": 0.376185804605484, "step": 688 }, { "epoch": 1.74, "learning_rate": 3.9350484271709436e-08, "logits/chosen": -2.2361624240875244, "logits/rejected": -2.2298951148986816, "logps/chosen": -2.8579342365264893, "logps/rejected": -2.585890054702759, "loss": 0.6925, "rewards/accuracies": 0.0, "rewards/chosen": 0.6452094316482544, "rewards/margins": -0.2395041584968567, "rewards/rejected": 0.8847135901451111, "step": 689 }, { "epoch": 1.75, "learning_rate": 3.921695128409517e-08, "logits/chosen": -2.2603776454925537, "logits/rejected": -2.2659358978271484, "logps/chosen": -0.9376634359359741, "logps/rejected": -1.942694902420044, "loss": 0.6065, "rewards/accuracies": 1.0, "rewards/chosen": 0.8977047801017761, "rewards/margins": 0.38175684213638306, "rewards/rejected": 0.5159479379653931, "step": 690 }, { "epoch": 1.75, "learning_rate": 3.908349890872005e-08, "logits/chosen": -2.180094003677368, "logits/rejected": -2.1738240718841553, "logps/chosen": -2.1797473430633545, "logps/rejected": -7.605336666107178, "loss": 0.6352, "rewards/accuracies": 0.0, "rewards/chosen": 0.5641927719116211, "rewards/margins": -0.22528594732284546, "rewards/rejected": 0.7894787192344666, "step": 691 }, { "epoch": 1.75, "learning_rate": 3.895012814325138e-08, "logits/chosen": -2.236074447631836, "logits/rejected": -2.239887237548828, "logps/chosen": -2.084770917892456, "logps/rejected": -2.221055269241333, "loss": 0.7038, "rewards/accuracies": 1.0, "rewards/chosen": 0.8794999122619629, "rewards/margins": 0.30932003259658813, "rewards/rejected": 0.5701798796653748, "step": 692 }, { "epoch": 1.75, "learning_rate": 3.881683998474633e-08, "logits/chosen": -2.3501174449920654, "logits/rejected": -2.347633123397827, "logps/chosen": -1.527370572090149, "logps/rejected": -15.08746337890625, "loss": 0.6844, "rewards/accuracies": 1.0, "rewards/chosen": 0.823964536190033, "rewards/margins": 0.0713033676147461, "rewards/rejected": 0.7526611685752869, "step": 693 }, { "epoch": 1.76, "learning_rate": 3.868363542964449e-08, "logits/chosen": -2.295727252960205, "logits/rejected": -2.304751396179199, "logps/chosen": -7.777189254760742, "logps/rejected": -2.4378035068511963, "loss": 0.5853, "rewards/accuracies": 1.0, "rewards/chosen": 0.9434648752212524, "rewards/margins": 0.18969076871871948, "rewards/rejected": 0.753774106502533, "step": 694 }, { "epoch": 1.76, "learning_rate": 3.855051547376051e-08, "logits/chosen": -2.3203649520874023, "logits/rejected": -2.3153066635131836, "logps/chosen": -2.0287070274353027, "logps/rejected": -7.924285411834717, "loss": 0.647, "rewards/accuracies": 1.0, "rewards/chosen": 0.9087425470352173, "rewards/margins": 0.2515680193901062, "rewards/rejected": 0.6571745276451111, "step": 695 }, { "epoch": 1.76, "learning_rate": 3.841748111227651e-08, "logits/chosen": -2.2939798831939697, "logits/rejected": -2.28953218460083, "logps/chosen": -2.4446473121643066, "logps/rejected": -5.201775074005127, "loss": 0.6192, "rewards/accuracies": 1.0, "rewards/chosen": 0.7729484438896179, "rewards/margins": 0.046493351459503174, "rewards/rejected": 0.7264550924301147, "step": 696 }, { "epoch": 1.76, "learning_rate": 3.82845333397348e-08, "logits/chosen": -2.2686917781829834, "logits/rejected": -2.260571002960205, "logps/chosen": -2.6063482761383057, "logps/rejected": -2.3857500553131104, "loss": 0.7635, "rewards/accuracies": 0.0, "rewards/chosen": 0.6773138046264648, "rewards/margins": -0.3266582489013672, "rewards/rejected": 1.003972053527832, "step": 697 }, { "epoch": 1.77, "learning_rate": 3.8151673150030275e-08, "logits/chosen": -2.2861385345458984, "logits/rejected": -2.2848427295684814, "logps/chosen": -1.048140525817871, "logps/rejected": -7.657500267028809, "loss": 0.5818, "rewards/accuracies": 1.0, "rewards/chosen": 0.8448584675788879, "rewards/margins": 0.1333954930305481, "rewards/rejected": 0.7114629745483398, "step": 698 }, { "epoch": 1.77, "learning_rate": 3.801890153640319e-08, "logits/chosen": -2.1561648845672607, "logits/rejected": -2.157977342605591, "logps/chosen": -4.3138861656188965, "logps/rejected": -1.865870475769043, "loss": 0.6691, "rewards/accuracies": 1.0, "rewards/chosen": 0.8873829245567322, "rewards/margins": 0.269550621509552, "rewards/rejected": 0.6178323030471802, "step": 699 }, { "epoch": 1.77, "learning_rate": 3.7886219491431515e-08, "logits/chosen": -2.235790491104126, "logits/rejected": -2.235405445098877, "logps/chosen": -1.473474383354187, "logps/rejected": -9.597271919250488, "loss": 0.6522, "rewards/accuracies": 0.0, "rewards/chosen": 0.6953245401382446, "rewards/margins": -0.133761465549469, "rewards/rejected": 0.8290860056877136, "step": 700 }, { "epoch": 1.77, "learning_rate": 3.7753628007023666e-08, "logits/chosen": -2.2086989879608154, "logits/rejected": -2.206300973892212, "logps/chosen": -2.9698774814605713, "logps/rejected": -7.752259731292725, "loss": 0.6412, "rewards/accuracies": 0.0, "rewards/chosen": 0.5142492651939392, "rewards/margins": -0.09351617097854614, "rewards/rejected": 0.6077654361724854, "step": 701 }, { "epoch": 1.78, "learning_rate": 3.7621128074411074e-08, "logits/chosen": -2.24367094039917, "logits/rejected": -2.241811990737915, "logps/chosen": -3.4664225578308105, "logps/rejected": -2.7362418174743652, "loss": 0.6836, "rewards/accuracies": 0.0, "rewards/chosen": 0.7343796491622925, "rewards/margins": -0.1606183648109436, "rewards/rejected": 0.8949980139732361, "step": 702 }, { "epoch": 1.78, "learning_rate": 3.748872068414068e-08, "logits/chosen": -2.233846426010132, "logits/rejected": -2.23519229888916, "logps/chosen": -1.4365267753601074, "logps/rejected": -4.3279218673706055, "loss": 0.6886, "rewards/accuracies": 1.0, "rewards/chosen": 0.8540849685668945, "rewards/margins": 0.3986474871635437, "rewards/rejected": 0.45543748140335083, "step": 703 }, { "epoch": 1.78, "learning_rate": 3.7356406826067635e-08, "logits/chosen": -2.314692258834839, "logits/rejected": -2.3255090713500977, "logps/chosen": -12.084449768066406, "logps/rejected": -2.8118081092834473, "loss": 0.6973, "rewards/accuracies": 1.0, "rewards/chosen": 0.7898516058921814, "rewards/margins": 0.17387330532073975, "rewards/rejected": 0.6159783005714417, "step": 704 }, { "epoch": 1.78, "learning_rate": 3.722418748934784e-08, "logits/chosen": -2.342528820037842, "logits/rejected": -2.3437461853027344, "logps/chosen": -2.752605676651001, "logps/rejected": -4.174252510070801, "loss": 0.6961, "rewards/accuracies": 0.0, "rewards/chosen": 0.5882887840270996, "rewards/margins": -0.28852999210357666, "rewards/rejected": 0.8768187761306763, "step": 705 }, { "epoch": 1.79, "learning_rate": 3.709206366243061e-08, "logits/chosen": -2.2073419094085693, "logits/rejected": -2.212672233581543, "logps/chosen": -1.7472513914108276, "logps/rejected": -4.076907157897949, "loss": 0.5944, "rewards/accuracies": 1.0, "rewards/chosen": 0.7451313138008118, "rewards/margins": 0.33973047137260437, "rewards/rejected": 0.4054008424282074, "step": 706 }, { "epoch": 1.79, "learning_rate": 3.6960036333051184e-08, "logits/chosen": -2.2737505435943604, "logits/rejected": -2.281548500061035, "logps/chosen": -2.3949544429779053, "logps/rejected": -3.8520803451538086, "loss": 0.6176, "rewards/accuracies": 1.0, "rewards/chosen": 1.1856269836425781, "rewards/margins": 0.4540623426437378, "rewards/rejected": 0.7315646409988403, "step": 707 }, { "epoch": 1.79, "learning_rate": 3.682810648822342e-08, "logits/chosen": -2.1437761783599854, "logits/rejected": -2.1248042583465576, "logps/chosen": -4.681770324707031, "logps/rejected": -2.4497151374816895, "loss": 0.635, "rewards/accuracies": 1.0, "rewards/chosen": 0.8545028567314148, "rewards/margins": 0.19056248664855957, "rewards/rejected": 0.6639403700828552, "step": 708 }, { "epoch": 1.79, "learning_rate": 3.669627511423247e-08, "logits/chosen": -2.279451370239258, "logits/rejected": -2.2814648151397705, "logps/chosen": -3.5287065505981445, "logps/rejected": -3.5165019035339355, "loss": 0.6059, "rewards/accuracies": 1.0, "rewards/chosen": 0.9269917607307434, "rewards/margins": 0.39315295219421387, "rewards/rejected": 0.5338388085365295, "step": 709 }, { "epoch": 1.8, "learning_rate": 3.656454319662724e-08, "logits/chosen": -2.1816556453704834, "logits/rejected": -2.1745269298553467, "logps/chosen": -2.408224582672119, "logps/rejected": -3.9457809925079346, "loss": 0.6243, "rewards/accuracies": 0.0, "rewards/chosen": 0.7718287110328674, "rewards/margins": -0.1615014672279358, "rewards/rejected": 0.9333301782608032, "step": 710 }, { "epoch": 1.8, "learning_rate": 3.6432911720213124e-08, "logits/chosen": -2.1404130458831787, "logits/rejected": -2.148480176925659, "logps/chosen": -7.230446815490723, "logps/rejected": -4.41757869720459, "loss": 0.5586, "rewards/accuracies": 1.0, "rewards/chosen": 1.071311116218567, "rewards/margins": 0.5462648272514343, "rewards/rejected": 0.5250462889671326, "step": 711 }, { "epoch": 1.8, "learning_rate": 3.6301381669044704e-08, "logits/chosen": -2.183063507080078, "logits/rejected": -2.185628890991211, "logps/chosen": -4.638848781585693, "logps/rejected": -7.108486652374268, "loss": 0.634, "rewards/accuracies": 0.0, "rewards/chosen": 0.700901448726654, "rewards/margins": -0.049539387226104736, "rewards/rejected": 0.7504408359527588, "step": 712 }, { "epoch": 1.81, "learning_rate": 3.616995402641828e-08, "logits/chosen": -2.240652084350586, "logits/rejected": -2.2512447834014893, "logps/chosen": -4.027718544006348, "logps/rejected": -3.590097427368164, "loss": 0.4929, "rewards/accuracies": 1.0, "rewards/chosen": 1.0197710990905762, "rewards/margins": 0.5139434933662415, "rewards/rejected": 0.5058276057243347, "step": 713 }, { "epoch": 1.81, "learning_rate": 3.603862977486456e-08, "logits/chosen": -2.209620237350464, "logits/rejected": -2.20334529876709, "logps/chosen": -1.9111919403076172, "logps/rejected": -5.014249324798584, "loss": 0.5732, "rewards/accuracies": 0.0, "rewards/chosen": 0.71041339635849, "rewards/margins": -0.04040956497192383, "rewards/rejected": 0.7508229613304138, "step": 714 }, { "epoch": 1.81, "learning_rate": 3.5907409896141304e-08, "logits/chosen": -2.2257235050201416, "logits/rejected": -2.2236812114715576, "logps/chosen": -1.5078842639923096, "logps/rejected": -7.188425064086914, "loss": 0.6112, "rewards/accuracies": 1.0, "rewards/chosen": 0.7521106600761414, "rewards/margins": 0.14740008115768433, "rewards/rejected": 0.604710578918457, "step": 715 }, { "epoch": 1.81, "learning_rate": 3.577629537122605e-08, "logits/chosen": -2.2901787757873535, "logits/rejected": -2.289010763168335, "logps/chosen": -3.9799981117248535, "logps/rejected": -5.446813106536865, "loss": 0.6148, "rewards/accuracies": 1.0, "rewards/chosen": 0.8076738715171814, "rewards/margins": 0.3353879749774933, "rewards/rejected": 0.4722858965396881, "step": 716 }, { "epoch": 1.82, "learning_rate": 3.564528718030869e-08, "logits/chosen": -2.2357850074768066, "logits/rejected": -2.3104355335235596, "logps/chosen": -3.3493573665618896, "logps/rejected": -20.41698455810547, "loss": 0.6745, "rewards/accuracies": 1.0, "rewards/chosen": 0.6709755063056946, "rewards/margins": 0.09153807163238525, "rewards/rejected": 0.5794374346733093, "step": 717 }, { "epoch": 1.82, "learning_rate": 3.551438630278416e-08, "logits/chosen": -2.233393430709839, "logits/rejected": -2.2289392948150635, "logps/chosen": -10.397439956665039, "logps/rejected": -6.801542282104492, "loss": 0.7499, "rewards/accuracies": 0.0, "rewards/chosen": 0.48024293780326843, "rewards/margins": -0.28033027052879333, "rewards/rejected": 0.7605732083320618, "step": 718 }, { "epoch": 1.82, "learning_rate": 3.538359371724522e-08, "logits/chosen": -2.221781015396118, "logits/rejected": -2.2104151248931885, "logps/chosen": -20.257965087890625, "logps/rejected": -4.363966941833496, "loss": 0.6757, "rewards/accuracies": 0.0, "rewards/chosen": 0.7771305441856384, "rewards/margins": -0.16388416290283203, "rewards/rejected": 0.9410147070884705, "step": 719 }, { "epoch": 1.82, "learning_rate": 3.525291040147498e-08, "logits/chosen": -2.3757567405700684, "logits/rejected": -2.365691661834717, "logps/chosen": -1.0486098527908325, "logps/rejected": -7.739170551300049, "loss": 0.8394, "rewards/accuracies": 0.0, "rewards/chosen": 0.6478528380393982, "rewards/margins": -0.3178221583366394, "rewards/rejected": 0.9656749963760376, "step": 720 }, { "epoch": 1.83, "learning_rate": 3.5122337332439666e-08, "logits/chosen": -2.3199541568756104, "logits/rejected": -2.3281610012054443, "logps/chosen": -10.674887657165527, "logps/rejected": -2.0623600482940674, "loss": 0.5742, "rewards/accuracies": 1.0, "rewards/chosen": 0.8103873133659363, "rewards/margins": 0.09496933221817017, "rewards/rejected": 0.7154179811477661, "step": 721 }, { "epoch": 1.83, "learning_rate": 3.49918754862814e-08, "logits/chosen": -2.2189950942993164, "logits/rejected": -2.2570791244506836, "logps/chosen": -11.575894355773926, "logps/rejected": -3.2054708003997803, "loss": 0.5826, "rewards/accuracies": 1.0, "rewards/chosen": 0.9321881532669067, "rewards/margins": 0.2302646040916443, "rewards/rejected": 0.7019235491752625, "step": 722 }, { "epoch": 1.83, "learning_rate": 3.486152583831072e-08, "logits/chosen": -2.1494531631469727, "logits/rejected": -2.159998655319214, "logps/chosen": -2.8297064304351807, "logps/rejected": -5.406404972076416, "loss": 0.6579, "rewards/accuracies": 1.0, "rewards/chosen": 0.7333630919456482, "rewards/margins": 0.27495285868644714, "rewards/rejected": 0.45841023325920105, "step": 723 }, { "epoch": 1.83, "learning_rate": 3.473128936299947e-08, "logits/chosen": -2.2371461391448975, "logits/rejected": -2.240975856781006, "logps/chosen": -2.1349103450775146, "logps/rejected": -4.363243579864502, "loss": 0.6207, "rewards/accuracies": 1.0, "rewards/chosen": 0.8935075998306274, "rewards/margins": 0.31775879859924316, "rewards/rejected": 0.5757488012313843, "step": 724 }, { "epoch": 1.84, "learning_rate": 3.460116703397336e-08, "logits/chosen": -2.341092824935913, "logits/rejected": -2.3528308868408203, "logps/chosen": -0.8757227659225464, "logps/rejected": -7.9958391189575195, "loss": 0.6584, "rewards/accuracies": 1.0, "rewards/chosen": 0.7542924284934998, "rewards/margins": 0.34519875049591064, "rewards/rejected": 0.4090936779975891, "step": 725 }, { "epoch": 1.84, "learning_rate": 3.447115982400485e-08, "logits/chosen": -2.215747594833374, "logits/rejected": -2.2367992401123047, "logps/chosen": -4.838686466217041, "logps/rejected": -10.3369140625, "loss": 0.6217, "rewards/accuracies": 1.0, "rewards/chosen": 1.041264533996582, "rewards/margins": 0.6488234400749207, "rewards/rejected": 0.3924410939216614, "step": 726 }, { "epoch": 1.84, "learning_rate": 3.434126870500571e-08, "logits/chosen": -2.264599084854126, "logits/rejected": -2.2581069469451904, "logps/chosen": -6.189286708831787, "logps/rejected": -2.93165922164917, "loss": 0.6773, "rewards/accuracies": 1.0, "rewards/chosen": 0.8033294677734375, "rewards/margins": 0.3214486539363861, "rewards/rejected": 0.4818808138370514, "step": 727 }, { "epoch": 1.84, "learning_rate": 3.4211494648019856e-08, "logits/chosen": -1.9898854494094849, "logits/rejected": -2.0086207389831543, "logps/chosen": -3.820197343826294, "logps/rejected": -5.772658824920654, "loss": 0.6673, "rewards/accuracies": 1.0, "rewards/chosen": 0.6719996333122253, "rewards/margins": 0.11750423908233643, "rewards/rejected": 0.5544953942298889, "step": 728 }, { "epoch": 1.85, "learning_rate": 3.408183862321612e-08, "logits/chosen": -2.206439733505249, "logits/rejected": -2.2133212089538574, "logps/chosen": -4.058230400085449, "logps/rejected": -3.159231662750244, "loss": 0.5083, "rewards/accuracies": 1.0, "rewards/chosen": 1.0085033178329468, "rewards/margins": 0.5059195756912231, "rewards/rejected": 0.5025837421417236, "step": 729 }, { "epoch": 1.85, "learning_rate": 3.3952301599880875e-08, "logits/chosen": -2.130361318588257, "logits/rejected": -2.1206588745117188, "logps/chosen": -5.290835380554199, "logps/rejected": -8.20477294921875, "loss": 0.6814, "rewards/accuracies": 0.0, "rewards/chosen": 0.6633160710334778, "rewards/margins": -0.229394793510437, "rewards/rejected": 0.8927108645439148, "step": 730 }, { "epoch": 1.85, "learning_rate": 3.3822884546410877e-08, "logits/chosen": -2.1314878463745117, "logits/rejected": -2.1464786529541016, "logps/chosen": -2.964548349380493, "logps/rejected": -5.995027542114258, "loss": 0.6286, "rewards/accuracies": 1.0, "rewards/chosen": 0.9887226223945618, "rewards/margins": 0.4547927975654602, "rewards/rejected": 0.5339298248291016, "step": 731 }, { "epoch": 1.85, "learning_rate": 3.369358843030603e-08, "logits/chosen": -2.26990008354187, "logits/rejected": -2.282994031906128, "logps/chosen": -6.155820846557617, "logps/rejected": -3.8027284145355225, "loss": 0.7077, "rewards/accuracies": 1.0, "rewards/chosen": 0.9708315134048462, "rewards/margins": 0.27134889364242554, "rewards/rejected": 0.6994826197624207, "step": 732 }, { "epoch": 1.86, "learning_rate": 3.356441421816213e-08, "logits/chosen": -2.3116328716278076, "logits/rejected": -2.304584264755249, "logps/chosen": -4.605449199676514, "logps/rejected": -2.7221481800079346, "loss": 0.6415, "rewards/accuracies": 0.0, "rewards/chosen": 0.3539047837257385, "rewards/margins": -0.40150320529937744, "rewards/rejected": 0.755407989025116, "step": 733 }, { "epoch": 1.86, "learning_rate": 3.343536287566362e-08, "logits/chosen": -2.2455081939697266, "logits/rejected": -2.2530252933502197, "logps/chosen": -5.271970748901367, "logps/rejected": -5.1884589195251465, "loss": 0.5572, "rewards/accuracies": 1.0, "rewards/chosen": 1.0144484043121338, "rewards/margins": 0.5751256942749023, "rewards/rejected": 0.43932271003723145, "step": 734 }, { "epoch": 1.86, "learning_rate": 3.3306435367576377e-08, "logits/chosen": -2.315073013305664, "logits/rejected": -2.3234925270080566, "logps/chosen": -0.8758537769317627, "logps/rejected": -4.041670799255371, "loss": 0.4823, "rewards/accuracies": 1.0, "rewards/chosen": 0.881370484828949, "rewards/margins": 0.28517746925354004, "rewards/rejected": 0.5961930155754089, "step": 735 }, { "epoch": 1.86, "learning_rate": 3.3177632657740575e-08, "logits/chosen": -2.2325735092163086, "logits/rejected": -2.2971150875091553, "logps/chosen": -3.13209867477417, "logps/rejected": -23.14654541015625, "loss": 0.4672, "rewards/accuracies": 1.0, "rewards/chosen": 0.7963573932647705, "rewards/margins": 0.8796745538711548, "rewards/rejected": -0.08331718295812607, "step": 736 }, { "epoch": 1.87, "learning_rate": 3.304895570906336e-08, "logits/chosen": -2.1369314193725586, "logits/rejected": -2.1411447525024414, "logps/chosen": -1.6337209939956665, "logps/rejected": -3.6445765495300293, "loss": 0.6149, "rewards/accuracies": 1.0, "rewards/chosen": 0.9279923439025879, "rewards/margins": 0.4276251196861267, "rewards/rejected": 0.5003672242164612, "step": 737 }, { "epoch": 1.87, "learning_rate": 3.29204054835117e-08, "logits/chosen": -2.2631430625915527, "logits/rejected": -2.269801378250122, "logps/chosen": -3.414278507232666, "logps/rejected": -2.5563971996307373, "loss": 0.5309, "rewards/accuracies": 1.0, "rewards/chosen": 0.8584285974502563, "rewards/margins": 0.19600838422775269, "rewards/rejected": 0.6624202132225037, "step": 738 }, { "epoch": 1.87, "learning_rate": 3.279198294210526e-08, "logits/chosen": -2.3136837482452393, "logits/rejected": -2.3085834980010986, "logps/chosen": -0.9070597290992737, "logps/rejected": -6.715824604034424, "loss": 0.666, "rewards/accuracies": 0.0, "rewards/chosen": 0.6299343109130859, "rewards/margins": -0.25293636322021484, "rewards/rejected": 0.8828706741333008, "step": 739 }, { "epoch": 1.87, "learning_rate": 3.2663689044909135e-08, "logits/chosen": -2.0992910861968994, "logits/rejected": -2.1027989387512207, "logps/chosen": -4.485334396362305, "logps/rejected": -5.392510414123535, "loss": 0.623, "rewards/accuracies": 1.0, "rewards/chosen": 0.7742684483528137, "rewards/margins": 0.20910513401031494, "rewards/rejected": 0.5651633143424988, "step": 740 }, { "epoch": 1.88, "learning_rate": 3.2535524751026674e-08, "logits/chosen": -2.2939562797546387, "logits/rejected": -2.2855074405670166, "logps/chosen": -1.5698537826538086, "logps/rejected": -4.978455066680908, "loss": 0.635, "rewards/accuracies": 0.0, "rewards/chosen": 0.8078581690788269, "rewards/margins": -0.1393546462059021, "rewards/rejected": 0.947212815284729, "step": 741 }, { "epoch": 1.88, "learning_rate": 3.2407491018592335e-08, "logits/chosen": -2.2475640773773193, "logits/rejected": -2.2504773139953613, "logps/chosen": -1.6289336681365967, "logps/rejected": -4.183829307556152, "loss": 0.545, "rewards/accuracies": 1.0, "rewards/chosen": 0.8670781254768372, "rewards/margins": 0.395181268453598, "rewards/rejected": 0.47189685702323914, "step": 742 }, { "epoch": 1.88, "learning_rate": 3.227958880476457e-08, "logits/chosen": -2.28678035736084, "logits/rejected": -2.3044497966766357, "logps/chosen": -0.9665138721466064, "logps/rejected": -4.860077857971191, "loss": 0.6221, "rewards/accuracies": 1.0, "rewards/chosen": 0.8319957852363586, "rewards/margins": 0.2734958529472351, "rewards/rejected": 0.5584999322891235, "step": 743 }, { "epoch": 1.88, "learning_rate": 3.2151819065718574e-08, "logits/chosen": -2.2066121101379395, "logits/rejected": -2.201092004776001, "logps/chosen": -1.595523476600647, "logps/rejected": -3.2968196868896484, "loss": 0.5582, "rewards/accuracies": 1.0, "rewards/chosen": 0.905303418636322, "rewards/margins": 0.441943496465683, "rewards/rejected": 0.46335992217063904, "step": 744 }, { "epoch": 1.89, "learning_rate": 3.202418275663919e-08, "logits/chosen": -2.1267306804656982, "logits/rejected": -2.1204679012298584, "logps/chosen": -2.489988327026367, "logps/rejected": -3.779097318649292, "loss": 0.5889, "rewards/accuracies": 1.0, "rewards/chosen": 0.8853586316108704, "rewards/margins": 0.32189130783081055, "rewards/rejected": 0.5634673237800598, "step": 745 }, { "epoch": 1.89, "learning_rate": 3.189668083171378e-08, "logits/chosen": -2.084322452545166, "logits/rejected": -2.0858707427978516, "logps/chosen": -3.282804250717163, "logps/rejected": -4.396401882171631, "loss": 0.6974, "rewards/accuracies": 1.0, "rewards/chosen": 0.7604904174804688, "rewards/margins": 0.3020819127559662, "rewards/rejected": 0.45840850472450256, "step": 746 }, { "epoch": 1.89, "learning_rate": 3.176931424412505e-08, "logits/chosen": -2.325763463973999, "logits/rejected": -2.325481653213501, "logps/chosen": -2.8575243949890137, "logps/rejected": -5.175843715667725, "loss": 0.6196, "rewards/accuracies": 0.0, "rewards/chosen": 0.8099125027656555, "rewards/margins": -0.10475099086761475, "rewards/rejected": 0.9146634936332703, "step": 747 }, { "epoch": 1.89, "learning_rate": 3.1642083946043975e-08, "logits/chosen": -2.2748093605041504, "logits/rejected": -2.2823097705841064, "logps/chosen": -1.4778803586959839, "logps/rejected": -3.180819034576416, "loss": 0.6385, "rewards/accuracies": 1.0, "rewards/chosen": 0.9574149250984192, "rewards/margins": 0.43625444173812866, "rewards/rejected": 0.5211604833602905, "step": 748 }, { "epoch": 1.9, "learning_rate": 3.1514990888622594e-08, "logits/chosen": -2.244344472885132, "logits/rejected": -2.314997911453247, "logps/chosen": -0.7067841291427612, "logps/rejected": -29.227237701416016, "loss": 0.6353, "rewards/accuracies": 1.0, "rewards/chosen": 0.7414822578430176, "rewards/margins": 0.49029606580734253, "rewards/rejected": 0.25118619203567505, "step": 749 }, { "epoch": 1.9, "learning_rate": 3.1388036021987044e-08, "logits/chosen": -2.1401660442352295, "logits/rejected": -2.1523866653442383, "logps/chosen": -5.8981146812438965, "logps/rejected": -4.232548713684082, "loss": 0.5373, "rewards/accuracies": 1.0, "rewards/chosen": 1.0395959615707397, "rewards/margins": 0.4091792702674866, "rewards/rejected": 0.6304166913032532, "step": 750 }, { "epoch": 1.9, "learning_rate": 3.1261220295230305e-08, "logits/chosen": -2.228969097137451, "logits/rejected": -2.22878360748291, "logps/chosen": -1.2045924663543701, "logps/rejected": -4.199066638946533, "loss": 0.7669, "rewards/accuracies": 1.0, "rewards/chosen": 0.8363267183303833, "rewards/margins": 0.005830705165863037, "rewards/rejected": 0.8304960131645203, "step": 751 }, { "epoch": 1.9, "learning_rate": 3.113454465640519e-08, "logits/chosen": -2.322854995727539, "logits/rejected": -2.3343098163604736, "logps/chosen": -0.45167022943496704, "logps/rejected": -11.691852569580078, "loss": 0.7322, "rewards/accuracies": 0.0, "rewards/chosen": 0.7430366277694702, "rewards/margins": -0.05899697542190552, "rewards/rejected": 0.8020336031913757, "step": 752 }, { "epoch": 1.91, "learning_rate": 3.1008010052517266e-08, "logits/chosen": -2.2681491374969482, "logits/rejected": -2.2632083892822266, "logps/chosen": -2.9880778789520264, "logps/rejected": -4.156134605407715, "loss": 0.6084, "rewards/accuracies": 1.0, "rewards/chosen": 0.7966834306716919, "rewards/margins": 0.31560155749320984, "rewards/rejected": 0.48108187317848206, "step": 753 }, { "epoch": 1.91, "learning_rate": 3.0881617429517694e-08, "logits/chosen": -2.235739231109619, "logits/rejected": -2.2429697513580322, "logps/chosen": -3.2361693382263184, "logps/rejected": -3.8699402809143066, "loss": 0.6416, "rewards/accuracies": 1.0, "rewards/chosen": 0.8115646243095398, "rewards/margins": 0.3125217854976654, "rewards/rejected": 0.4990428388118744, "step": 754 }, { "epoch": 1.91, "learning_rate": 3.075536773229624e-08, "logits/chosen": -2.16912841796875, "logits/rejected": -2.1670827865600586, "logps/chosen": -2.5271100997924805, "logps/rejected": -7.709492206573486, "loss": 0.5191, "rewards/accuracies": 1.0, "rewards/chosen": 0.8185027241706848, "rewards/margins": 0.3067595958709717, "rewards/rejected": 0.5117431282997131, "step": 755 }, { "epoch": 1.91, "learning_rate": 3.0629261904674204e-08, "logits/chosen": -2.2659947872161865, "logits/rejected": -2.2736494541168213, "logps/chosen": -4.961239814758301, "logps/rejected": -2.868194818496704, "loss": 0.6403, "rewards/accuracies": 1.0, "rewards/chosen": 1.0048412084579468, "rewards/margins": 0.47232526540756226, "rewards/rejected": 0.5325159430503845, "step": 756 }, { "epoch": 1.92, "learning_rate": 3.05033008893973e-08, "logits/chosen": -2.2545599937438965, "logits/rejected": -2.2576498985290527, "logps/chosen": -2.758918046951294, "logps/rejected": -4.857627868652344, "loss": 0.6171, "rewards/accuracies": 0.0, "rewards/chosen": 0.6916977763175964, "rewards/margins": -0.25942182540893555, "rewards/rejected": 0.951119601726532, "step": 757 }, { "epoch": 1.92, "learning_rate": 3.037748562812865e-08, "logits/chosen": -2.2499077320098877, "logits/rejected": -2.2497434616088867, "logps/chosen": -1.1060141324996948, "logps/rejected": -4.785794734954834, "loss": 0.6067, "rewards/accuracies": 1.0, "rewards/chosen": 0.7592204213142395, "rewards/margins": 0.35290780663490295, "rewards/rejected": 0.40631261467933655, "step": 758 }, { "epoch": 1.92, "learning_rate": 3.0251817061441775e-08, "logits/chosen": -2.221808910369873, "logits/rejected": -2.268419027328491, "logps/chosen": -2.136017322540283, "logps/rejected": -7.434420108795166, "loss": 0.6363, "rewards/accuracies": 1.0, "rewards/chosen": 0.9135521054267883, "rewards/margins": 0.3420923948287964, "rewards/rejected": 0.5714597105979919, "step": 759 }, { "epoch": 1.92, "learning_rate": 3.012629612881353e-08, "logits/chosen": -2.226318359375, "logits/rejected": -2.2103164196014404, "logps/chosen": -1.7662160396575928, "logps/rejected": -6.866567611694336, "loss": 0.6684, "rewards/accuracies": 0.0, "rewards/chosen": 0.6705598831176758, "rewards/margins": -0.2177022099494934, "rewards/rejected": 0.8882620930671692, "step": 760 }, { "epoch": 1.93, "learning_rate": 3.000092376861705e-08, "logits/chosen": -2.2441649436950684, "logits/rejected": -2.261190176010132, "logps/chosen": -4.471224308013916, "logps/rejected": -10.972794532775879, "loss": 0.566, "rewards/accuracies": 1.0, "rewards/chosen": 0.7117627859115601, "rewards/margins": 0.5129140615463257, "rewards/rejected": 0.19884872436523438, "step": 761 }, { "epoch": 1.93, "learning_rate": 2.987570091811479e-08, "logits/chosen": -2.2821927070617676, "logits/rejected": -2.281599998474121, "logps/chosen": -2.1222214698791504, "logps/rejected": -4.085506916046143, "loss": 0.62, "rewards/accuracies": 1.0, "rewards/chosen": 1.005204439163208, "rewards/margins": 0.32521140575408936, "rewards/rejected": 0.6799930334091187, "step": 762 }, { "epoch": 1.93, "learning_rate": 2.9750628513451497e-08, "logits/chosen": -2.198410749435425, "logits/rejected": -2.204650640487671, "logps/chosen": -2.498741865158081, "logps/rejected": -3.992767810821533, "loss": 0.6257, "rewards/accuracies": 0.0, "rewards/chosen": 0.7091107368469238, "rewards/margins": -0.14863216876983643, "rewards/rejected": 0.8577429056167603, "step": 763 }, { "epoch": 1.93, "learning_rate": 2.9625707489647224e-08, "logits/chosen": -2.276371479034424, "logits/rejected": -2.2947194576263428, "logps/chosen": -4.45821475982666, "logps/rejected": -3.6464128494262695, "loss": 0.6457, "rewards/accuracies": 1.0, "rewards/chosen": 1.1725013256072998, "rewards/margins": 0.6421894431114197, "rewards/rejected": 0.5303118824958801, "step": 764 }, { "epoch": 1.94, "learning_rate": 2.9500938780590274e-08, "logits/chosen": -2.231837272644043, "logits/rejected": -2.228851556777954, "logps/chosen": -1.783366084098816, "logps/rejected": -6.572787284851074, "loss": 0.6349, "rewards/accuracies": 0.0, "rewards/chosen": 0.6101114153862, "rewards/margins": -0.20253419876098633, "rewards/rejected": 0.8126456141471863, "step": 765 }, { "epoch": 1.94, "learning_rate": 2.9376323319030316e-08, "logits/chosen": -2.2368342876434326, "logits/rejected": -2.379758834838867, "logps/chosen": -5.300089359283447, "logps/rejected": -21.801267623901367, "loss": 0.6537, "rewards/accuracies": 0.0, "rewards/chosen": 0.5923991799354553, "rewards/margins": -0.06860029697418213, "rewards/rejected": 0.6609994769096375, "step": 766 }, { "epoch": 1.94, "learning_rate": 2.9251862036571394e-08, "logits/chosen": -2.2071871757507324, "logits/rejected": -2.225426197052002, "logps/chosen": -10.26707935333252, "logps/rejected": -1.3872851133346558, "loss": 0.6214, "rewards/accuracies": 1.0, "rewards/chosen": 0.9262294173240662, "rewards/margins": 0.2665247321128845, "rewards/rejected": 0.6597046852111816, "step": 767 }, { "epoch": 1.94, "learning_rate": 2.9127555863664854e-08, "logits/chosen": -2.2155168056488037, "logits/rejected": -2.2257721424102783, "logps/chosen": -13.050539016723633, "logps/rejected": -1.8557543754577637, "loss": 0.617, "rewards/accuracies": 1.0, "rewards/chosen": 0.7490150332450867, "rewards/margins": 0.13026106357574463, "rewards/rejected": 0.618753969669342, "step": 768 }, { "epoch": 1.95, "learning_rate": 2.9003405729602525e-08, "logits/chosen": -2.156581401824951, "logits/rejected": -2.185133457183838, "logps/chosen": -6.1632819175720215, "logps/rejected": -26.4027099609375, "loss": 0.8267, "rewards/accuracies": 1.0, "rewards/chosen": 0.9446130990982056, "rewards/margins": 0.35590457916259766, "rewards/rejected": 0.5887085199356079, "step": 769 }, { "epoch": 1.95, "learning_rate": 2.887941256250972e-08, "logits/chosen": -2.110316276550293, "logits/rejected": -2.130206823348999, "logps/chosen": -2.6284756660461426, "logps/rejected": -4.755442142486572, "loss": 0.6548, "rewards/accuracies": 1.0, "rewards/chosen": 0.898593544960022, "rewards/margins": 0.3261365294456482, "rewards/rejected": 0.5724570155143738, "step": 770 }, { "epoch": 1.95, "learning_rate": 2.8755577289338263e-08, "logits/chosen": -2.2000885009765625, "logits/rejected": -2.2131059169769287, "logps/chosen": -6.410940170288086, "logps/rejected": -10.378484725952148, "loss": 0.5333, "rewards/accuracies": 1.0, "rewards/chosen": 0.9968166351318359, "rewards/margins": 0.5924121737480164, "rewards/rejected": 0.4044044613838196, "step": 771 }, { "epoch": 1.95, "learning_rate": 2.8631900835859612e-08, "logits/chosen": -2.199308395385742, "logits/rejected": -2.200092315673828, "logps/chosen": -2.2083253860473633, "logps/rejected": -13.320623397827148, "loss": 0.7051, "rewards/accuracies": 1.0, "rewards/chosen": 0.5838338136672974, "rewards/margins": 0.06433075666427612, "rewards/rejected": 0.5195030570030212, "step": 772 }, { "epoch": 1.96, "learning_rate": 2.85083841266579e-08, "logits/chosen": -2.3771722316741943, "logits/rejected": -2.37353253364563, "logps/chosen": -2.761908769607544, "logps/rejected": -4.355303764343262, "loss": 0.6874, "rewards/accuracies": 0.0, "rewards/chosen": 0.5841423869132996, "rewards/margins": -0.4381428360939026, "rewards/rejected": 1.0222852230072021, "step": 773 }, { "epoch": 1.96, "learning_rate": 2.8385028085123087e-08, "logits/chosen": -2.1917552947998047, "logits/rejected": -2.1968448162078857, "logps/chosen": -4.881549835205078, "logps/rejected": -4.309107303619385, "loss": 0.7207, "rewards/accuracies": 1.0, "rewards/chosen": 1.0021275281906128, "rewards/margins": 0.5178471803665161, "rewards/rejected": 0.4842803478240967, "step": 774 }, { "epoch": 1.96, "learning_rate": 2.826183363344391e-08, "logits/chosen": -2.304553747177124, "logits/rejected": -2.342301845550537, "logps/chosen": -0.7611356973648071, "logps/rejected": -7.910653591156006, "loss": 0.6042, "rewards/accuracies": 1.0, "rewards/chosen": 0.8329266905784607, "rewards/margins": 0.022646844387054443, "rewards/rejected": 0.8102798461914062, "step": 775 }, { "epoch": 1.96, "learning_rate": 2.8138801692601167e-08, "logits/chosen": -2.3548247814178467, "logits/rejected": -2.3813953399658203, "logps/chosen": -5.973544597625732, "logps/rejected": -5.512679100036621, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": 0.840060830116272, "rewards/margins": 0.18948853015899658, "rewards/rejected": 0.6505722999572754, "step": 776 }, { "epoch": 1.97, "learning_rate": 2.8015933182360773e-08, "logits/chosen": -2.2941665649414062, "logits/rejected": -2.287627696990967, "logps/chosen": -1.4478116035461426, "logps/rejected": -4.565077304840088, "loss": 0.7181, "rewards/accuracies": 0.0, "rewards/chosen": 0.7220481634140015, "rewards/margins": -0.08248960971832275, "rewards/rejected": 0.8045377731323242, "step": 777 }, { "epoch": 1.97, "learning_rate": 2.7893229021266774e-08, "logits/chosen": -2.2336814403533936, "logits/rejected": -2.3029069900512695, "logps/chosen": -0.7703055143356323, "logps/rejected": -26.4411563873291, "loss": 0.6754, "rewards/accuracies": 1.0, "rewards/chosen": 0.6891176104545593, "rewards/margins": 0.3293224275112152, "rewards/rejected": 0.3597951829433441, "step": 778 }, { "epoch": 1.97, "learning_rate": 2.777069012663464e-08, "logits/chosen": -2.221282958984375, "logits/rejected": -2.2791988849639893, "logps/chosen": -4.217074871063232, "logps/rejected": -18.546722412109375, "loss": 0.5609, "rewards/accuracies": 1.0, "rewards/chosen": 0.8822826743125916, "rewards/margins": 0.9568037390708923, "rewards/rejected": -0.07452106475830078, "step": 779 }, { "epoch": 1.97, "learning_rate": 2.7648317414544316e-08, "logits/chosen": -2.272331714630127, "logits/rejected": -2.2649803161621094, "logps/chosen": -2.2471795082092285, "logps/rejected": -8.693902015686035, "loss": 0.6524, "rewards/accuracies": 0.0, "rewards/chosen": 0.604270339012146, "rewards/margins": -0.10145455598831177, "rewards/rejected": 0.7057248950004578, "step": 780 }, { "epoch": 1.98, "learning_rate": 2.7526111799833396e-08, "logits/chosen": -1.9855387210845947, "logits/rejected": -2.0284979343414307, "logps/chosen": -0.8017539381980896, "logps/rejected": -9.845340728759766, "loss": 0.6314, "rewards/accuracies": 1.0, "rewards/chosen": 0.8091878890991211, "rewards/margins": 0.23956042528152466, "rewards/rejected": 0.5696274638175964, "step": 781 }, { "epoch": 1.98, "learning_rate": 2.7404074196090277e-08, "logits/chosen": -2.317291498184204, "logits/rejected": -2.323960304260254, "logps/chosen": -3.7597460746765137, "logps/rejected": -3.8159894943237305, "loss": 0.6121, "rewards/accuracies": 1.0, "rewards/chosen": 0.9050564169883728, "rewards/margins": 0.32929039001464844, "rewards/rejected": 0.5757660269737244, "step": 782 }, { "epoch": 1.98, "learning_rate": 2.7282205515647346e-08, "logits/chosen": -2.1977765560150146, "logits/rejected": -2.2043087482452393, "logps/chosen": -2.255721092224121, "logps/rejected": -5.375226020812988, "loss": 0.6618, "rewards/accuracies": 1.0, "rewards/chosen": 0.8963858485221863, "rewards/margins": 0.3679072856903076, "rewards/rejected": 0.5284785628318787, "step": 783 }, { "epoch": 1.98, "learning_rate": 2.7160506669574134e-08, "logits/chosen": -2.2354531288146973, "logits/rejected": -2.2141900062561035, "logps/chosen": -2.242654800415039, "logps/rejected": -8.944450378417969, "loss": 0.604, "rewards/accuracies": 0.0, "rewards/chosen": 0.8279885649681091, "rewards/margins": -0.021388530731201172, "rewards/rejected": 0.8493770956993103, "step": 784 }, { "epoch": 1.99, "learning_rate": 2.7038978567670557e-08, "logits/chosen": -2.2354965209960938, "logits/rejected": -2.2291979789733887, "logps/chosen": -0.5380697846412659, "logps/rejected": -9.465645790100098, "loss": 0.7583, "rewards/accuracies": 0.0, "rewards/chosen": 0.7339670062065125, "rewards/margins": -0.4778168797492981, "rewards/rejected": 1.2117838859558105, "step": 785 }, { "epoch": 1.99, "learning_rate": 2.691762211845997e-08, "logits/chosen": -2.2600839138031006, "logits/rejected": -2.2658917903900146, "logps/chosen": -1.6309239864349365, "logps/rejected": -5.470459938049316, "loss": 0.4892, "rewards/accuracies": 1.0, "rewards/chosen": 0.8774981498718262, "rewards/margins": 0.4480815827846527, "rewards/rejected": 0.42941656708717346, "step": 786 }, { "epoch": 1.99, "learning_rate": 2.679643822918264e-08, "logits/chosen": -2.256901264190674, "logits/rejected": -2.253387928009033, "logps/chosen": -1.4415857791900635, "logps/rejected": -2.932349920272827, "loss": 0.629, "rewards/accuracies": 1.0, "rewards/chosen": 0.7303915023803711, "rewards/margins": 0.19469356536865234, "rewards/rejected": 0.5356979370117188, "step": 787 }, { "epoch": 1.99, "learning_rate": 2.6675427805788696e-08, "logits/chosen": -2.1453089714050293, "logits/rejected": -2.1440446376800537, "logps/chosen": -1.3901548385620117, "logps/rejected": -7.740085124969482, "loss": 0.711, "rewards/accuracies": 0.0, "rewards/chosen": 0.7128090262413025, "rewards/margins": -0.08959770202636719, "rewards/rejected": 0.8024067282676697, "step": 788 }, { "epoch": 2.0, "learning_rate": 2.6554591752931455e-08, "logits/chosen": -2.138627529144287, "logits/rejected": -2.1435089111328125, "logps/chosen": -2.71941876411438, "logps/rejected": -3.220506429672241, "loss": 0.4575, "rewards/accuracies": 1.0, "rewards/chosen": 0.9766806960105896, "rewards/margins": 0.39558345079421997, "rewards/rejected": 0.5810972452163696, "step": 789 }, { "epoch": 2.0, "learning_rate": 2.6433930973960773e-08, "logits/chosen": -2.2181930541992188, "logits/rejected": -2.215005397796631, "logps/chosen": -2.5194177627563477, "logps/rejected": -7.623414993286133, "loss": 0.6804, "rewards/accuracies": 1.0, "rewards/chosen": 0.9640231132507324, "rewards/margins": 0.11277663707733154, "rewards/rejected": 0.8512464761734009, "step": 790 }, { "epoch": 2.0, "learning_rate": 2.631344637091607e-08, "logits/chosen": -2.3009073734283447, "logits/rejected": -2.2934091091156006, "logps/chosen": -2.7408082485198975, "logps/rejected": -4.589150905609131, "loss": 0.7836, "rewards/accuracies": 0.0, "rewards/chosen": 0.6484659314155579, "rewards/margins": -0.21174412965774536, "rewards/rejected": 0.8602100610733032, "step": 791 }, { "epoch": 2.01, "learning_rate": 2.619313884451978e-08, "logits/chosen": -2.33793044090271, "logits/rejected": -2.3324222564697266, "logps/chosen": -1.078521728515625, "logps/rejected": -5.101180076599121, "loss": 0.785, "rewards/accuracies": 0.0, "rewards/chosen": 0.6707646250724792, "rewards/margins": -0.2960803508758545, "rewards/rejected": 0.9668449759483337, "step": 792 }, { "epoch": 2.01, "learning_rate": 2.607300929417051e-08, "logits/chosen": -2.0668108463287354, "logits/rejected": -2.0781915187835693, "logps/chosen": -19.28837776184082, "logps/rejected": -11.66228199005127, "loss": 0.555, "rewards/accuracies": 0.0, "rewards/chosen": 0.6675905585289001, "rewards/margins": -0.12360864877700806, "rewards/rejected": 0.7911992073059082, "step": 793 }, { "epoch": 2.01, "learning_rate": 2.5953058617936362e-08, "logits/chosen": -2.2079153060913086, "logits/rejected": -2.213778018951416, "logps/chosen": -1.8035250902175903, "logps/rejected": -4.165160179138184, "loss": 0.6625, "rewards/accuracies": 1.0, "rewards/chosen": 0.7395039796829224, "rewards/margins": 0.3429284393787384, "rewards/rejected": 0.39657554030418396, "step": 794 }, { "epoch": 2.01, "learning_rate": 2.5833287712548195e-08, "logits/chosen": -2.355311155319214, "logits/rejected": -2.381391763687134, "logps/chosen": -5.980546474456787, "logps/rejected": -5.5952935218811035, "loss": 0.602, "rewards/accuracies": 1.0, "rewards/chosen": 0.8393606543540955, "rewards/margins": 0.1970497965812683, "rewards/rejected": 0.6423108577728271, "step": 795 }, { "epoch": 2.02, "learning_rate": 2.5713697473392947e-08, "logits/chosen": -2.1583211421966553, "logits/rejected": -2.159334421157837, "logps/chosen": -2.0484726428985596, "logps/rejected": -5.640482425689697, "loss": 0.6862, "rewards/accuracies": 1.0, "rewards/chosen": 0.8073550462722778, "rewards/margins": 0.27534741163253784, "rewards/rejected": 0.53200763463974, "step": 796 }, { "epoch": 2.02, "learning_rate": 2.5594288794506912e-08, "logits/chosen": -2.17409086227417, "logits/rejected": -2.1804492473602295, "logps/chosen": -1.5767301321029663, "logps/rejected": -3.5436742305755615, "loss": 0.6599, "rewards/accuracies": 1.0, "rewards/chosen": 0.9514724016189575, "rewards/margins": 0.44117194414138794, "rewards/rejected": 0.5103004574775696, "step": 797 }, { "epoch": 2.02, "learning_rate": 2.5475062568569074e-08, "logits/chosen": -2.348343849182129, "logits/rejected": -2.3410210609436035, "logps/chosen": -6.531855583190918, "logps/rejected": -9.068181991577148, "loss": 0.6511, "rewards/accuracies": 0.0, "rewards/chosen": 0.7387613654136658, "rewards/margins": -0.139265239238739, "rewards/rejected": 0.8780266046524048, "step": 798 }, { "epoch": 2.02, "learning_rate": 2.5356019686894452e-08, "logits/chosen": -2.2204995155334473, "logits/rejected": -2.2134315967559814, "logps/chosen": -1.711686134338379, "logps/rejected": -3.112307548522949, "loss": 0.5292, "rewards/accuracies": 1.0, "rewards/chosen": 1.0576149225234985, "rewards/margins": 0.4151528477668762, "rewards/rejected": 0.6424620747566223, "step": 799 }, { "epoch": 2.03, "learning_rate": 2.5237161039427334e-08, "logits/chosen": -2.2873687744140625, "logits/rejected": -2.2839457988739014, "logps/chosen": -6.008584976196289, "logps/rejected": -1.2394466400146484, "loss": 0.6556, "rewards/accuracies": 1.0, "rewards/chosen": 0.964641273021698, "rewards/margins": 0.37817418575286865, "rewards/rejected": 0.5864670872688293, "step": 800 }, { "epoch": 2.03, "learning_rate": 2.511848751473484e-08, "logits/chosen": -2.308196544647217, "logits/rejected": -2.2971901893615723, "logps/chosen": -2.7193691730499268, "logps/rejected": -4.448293685913086, "loss": 0.7871, "rewards/accuracies": 0.0, "rewards/chosen": 0.6495023965835571, "rewards/margins": -0.35479605197906494, "rewards/rejected": 1.004298448562622, "step": 801 }, { "epoch": 2.03, "learning_rate": 2.500000000000001e-08, "logits/chosen": -2.2415242195129395, "logits/rejected": -2.2399258613586426, "logps/chosen": -4.211150646209717, "logps/rejected": -3.8794777393341064, "loss": 0.6095, "rewards/accuracies": 1.0, "rewards/chosen": 0.7262703776359558, "rewards/margins": 0.289564311504364, "rewards/rejected": 0.4367060661315918, "step": 802 }, { "epoch": 2.03, "learning_rate": 2.488169938101536e-08, "logits/chosen": -2.109499454498291, "logits/rejected": -2.1303601264953613, "logps/chosen": -1.5701885223388672, "logps/rejected": -6.917586326599121, "loss": 0.7098, "rewards/accuracies": 1.0, "rewards/chosen": 0.8761583566665649, "rewards/margins": 0.38265249133110046, "rewards/rejected": 0.4935058653354645, "step": 803 }, { "epoch": 2.04, "learning_rate": 2.4763586542176267e-08, "logits/chosen": -2.1803553104400635, "logits/rejected": -2.2391884326934814, "logps/chosen": -0.9186714887619019, "logps/rejected": -7.657916069030762, "loss": 0.5437, "rewards/accuracies": 1.0, "rewards/chosen": 0.790886402130127, "rewards/margins": 0.17923694849014282, "rewards/rejected": 0.6116494536399841, "step": 804 }, { "epoch": 2.04, "learning_rate": 2.4645662366474184e-08, "logits/chosen": -2.259692907333374, "logits/rejected": -2.2483468055725098, "logps/chosen": -1.8958966732025146, "logps/rejected": -11.237587928771973, "loss": 0.6095, "rewards/accuracies": 0.0, "rewards/chosen": 0.6162998080253601, "rewards/margins": -0.1484687328338623, "rewards/rejected": 0.7647685408592224, "step": 805 }, { "epoch": 2.04, "learning_rate": 2.4527927735490212e-08, "logits/chosen": -2.257702350616455, "logits/rejected": -2.2448227405548096, "logps/chosen": -1.8904049396514893, "logps/rejected": -6.832267761230469, "loss": 0.6496, "rewards/accuracies": 0.0, "rewards/chosen": 0.6632966995239258, "rewards/margins": -0.42559266090393066, "rewards/rejected": 1.0888893604278564, "step": 806 }, { "epoch": 2.04, "learning_rate": 2.4410383529388446e-08, "logits/chosen": -2.255046844482422, "logits/rejected": -2.2441964149475098, "logps/chosen": -3.679828643798828, "logps/rejected": -6.4622297286987305, "loss": 0.7523, "rewards/accuracies": 0.0, "rewards/chosen": 0.5076649785041809, "rewards/margins": -0.3485383987426758, "rewards/rejected": 0.8562033772468567, "step": 807 }, { "epoch": 2.05, "learning_rate": 2.4293030626909378e-08, "logits/chosen": -2.1313376426696777, "logits/rejected": -2.1462132930755615, "logps/chosen": -2.8832359313964844, "logps/rejected": -6.06404972076416, "loss": 0.5571, "rewards/accuracies": 1.0, "rewards/chosen": 0.9968538284301758, "rewards/margins": 0.46982622146606445, "rewards/rejected": 0.5270276069641113, "step": 808 }, { "epoch": 2.05, "learning_rate": 2.4175869905363387e-08, "logits/chosen": -2.174726724624634, "logits/rejected": -2.1791865825653076, "logps/chosen": -2.8049089908599854, "logps/rejected": -4.53420877456665, "loss": 0.7454, "rewards/accuracies": 1.0, "rewards/chosen": 0.9864894151687622, "rewards/margins": 0.4462737441062927, "rewards/rejected": 0.5402156710624695, "step": 809 }, { "epoch": 2.05, "learning_rate": 2.4058902240624057e-08, "logits/chosen": -2.2154908180236816, "logits/rejected": -2.225609540939331, "logps/chosen": -12.969490051269531, "logps/rejected": -1.8545222282409668, "loss": 0.6046, "rewards/accuracies": 1.0, "rewards/chosen": 0.7571199536323547, "rewards/margins": 0.13824278116226196, "rewards/rejected": 0.6188771724700928, "step": 810 }, { "epoch": 2.05, "learning_rate": 2.3942128507121813e-08, "logits/chosen": -2.306614637374878, "logits/rejected": -2.326748847961426, "logps/chosen": -6.745361804962158, "logps/rejected": -7.178494453430176, "loss": 0.5793, "rewards/accuracies": 1.0, "rewards/chosen": 0.8326131701469421, "rewards/margins": 0.22724223136901855, "rewards/rejected": 0.6053709387779236, "step": 811 }, { "epoch": 2.06, "learning_rate": 2.382554957783724e-08, "logits/chosen": -2.2423343658447266, "logits/rejected": -2.2601611614227295, "logps/chosen": -4.307753562927246, "logps/rejected": -11.071605682373047, "loss": 0.6246, "rewards/accuracies": 1.0, "rewards/chosen": 0.7281098365783691, "rewards/margins": 0.5391422510147095, "rewards/rejected": 0.18896761536598206, "step": 812 }, { "epoch": 2.06, "learning_rate": 2.3709166324294545e-08, "logits/chosen": -2.320725202560425, "logits/rejected": -2.3307201862335205, "logps/chosen": -5.479713439941406, "logps/rejected": -1.1867561340332031, "loss": 0.7125, "rewards/accuracies": 1.0, "rewards/chosen": 0.8545460104942322, "rewards/margins": 0.2747666835784912, "rewards/rejected": 0.579779326915741, "step": 813 }, { "epoch": 2.06, "learning_rate": 2.359297961655519e-08, "logits/chosen": -2.304886817932129, "logits/rejected": -2.3035542964935303, "logps/chosen": -0.6071083545684814, "logps/rejected": -2.6585757732391357, "loss": 0.6146, "rewards/accuracies": 1.0, "rewards/chosen": 0.6711128354072571, "rewards/margins": 0.03612107038497925, "rewards/rejected": 0.6349917650222778, "step": 814 }, { "epoch": 2.06, "learning_rate": 2.3476990323211265e-08, "logits/chosen": -2.3434953689575195, "logits/rejected": -2.3465495109558105, "logps/chosen": -1.3334156274795532, "logps/rejected": -1.579906940460205, "loss": 0.5157, "rewards/accuracies": 1.0, "rewards/chosen": 1.0006707906723022, "rewards/margins": 0.392955482006073, "rewards/rejected": 0.6077153086662292, "step": 815 }, { "epoch": 2.07, "learning_rate": 2.3361199311378967e-08, "logits/chosen": -2.2172811031341553, "logits/rejected": -2.2239010334014893, "logps/chosen": -2.922459840774536, "logps/rejected": -5.845641613006592, "loss": 0.6406, "rewards/accuracies": 1.0, "rewards/chosen": 0.7930615544319153, "rewards/margins": 0.3265821933746338, "rewards/rejected": 0.4664793610572815, "step": 816 }, { "epoch": 2.07, "learning_rate": 2.3245607446692234e-08, "logits/chosen": -2.2605183124542236, "logits/rejected": -2.2822234630584717, "logps/chosen": -1.246404767036438, "logps/rejected": -10.413504600524902, "loss": 0.6197, "rewards/accuracies": 1.0, "rewards/chosen": 0.8817111253738403, "rewards/margins": 0.40427523851394653, "rewards/rejected": 0.4774358868598938, "step": 817 }, { "epoch": 2.07, "learning_rate": 2.31302155932962e-08, "logits/chosen": -2.1182191371917725, "logits/rejected": -2.1105904579162598, "logps/chosen": -2.6385021209716797, "logps/rejected": -5.167537212371826, "loss": 0.6689, "rewards/accuracies": 0.0, "rewards/chosen": 0.6766209006309509, "rewards/margins": -0.05778390169143677, "rewards/rejected": 0.7344048023223877, "step": 818 }, { "epoch": 2.07, "learning_rate": 2.301502461384074e-08, "logits/chosen": -2.3132333755493164, "logits/rejected": -2.3157730102539062, "logps/chosen": -0.6137293577194214, "logps/rejected": -9.847687721252441, "loss": 0.6295, "rewards/accuracies": 0.0, "rewards/chosen": 0.6518762111663818, "rewards/margins": -0.3375852108001709, "rewards/rejected": 0.9894614219665527, "step": 819 }, { "epoch": 2.08, "learning_rate": 2.2900035369474042e-08, "logits/chosen": -2.228241205215454, "logits/rejected": -2.2275335788726807, "logps/chosen": -1.225363850593567, "logps/rejected": -4.2266435623168945, "loss": 0.5865, "rewards/accuracies": 1.0, "rewards/chosen": 0.8342496156692505, "rewards/margins": 0.006511330604553223, "rewards/rejected": 0.8277382850646973, "step": 820 }, { "epoch": 2.08, "learning_rate": 2.2785248719836142e-08, "logits/chosen": -2.2740089893341064, "logits/rejected": -2.285421848297119, "logps/chosen": -2.913968801498413, "logps/rejected": -4.551944732666016, "loss": 0.6206, "rewards/accuracies": 1.0, "rewards/chosen": 0.8102653622627258, "rewards/margins": 0.26651060581207275, "rewards/rejected": 0.5437547564506531, "step": 821 }, { "epoch": 2.08, "learning_rate": 2.267066552305253e-08, "logits/chosen": -2.331465482711792, "logits/rejected": -2.325913667678833, "logps/chosen": -3.339667320251465, "logps/rejected": -4.6725568771362305, "loss": 0.6679, "rewards/accuracies": 1.0, "rewards/chosen": 0.7576238512992859, "rewards/margins": 0.2136315107345581, "rewards/rejected": 0.5439923405647278, "step": 822 }, { "epoch": 2.08, "learning_rate": 2.2556286635727697e-08, "logits/chosen": -2.3373684883117676, "logits/rejected": -2.3583734035491943, "logps/chosen": -3.2860774993896484, "logps/rejected": -4.6891093254089355, "loss": 0.6062, "rewards/accuracies": 1.0, "rewards/chosen": 1.0121128559112549, "rewards/margins": 0.4143807291984558, "rewards/rejected": 0.5977321267127991, "step": 823 }, { "epoch": 2.09, "learning_rate": 2.2442112912938767e-08, "logits/chosen": -2.1694743633270264, "logits/rejected": -2.1673707962036133, "logps/chosen": -2.518317699432373, "logps/rejected": -7.794379234313965, "loss": 0.8068, "rewards/accuracies": 1.0, "rewards/chosen": 0.8193818926811218, "rewards/margins": 0.3161274790763855, "rewards/rejected": 0.5032544136047363, "step": 824 }, { "epoch": 2.09, "learning_rate": 2.2328145208229094e-08, "logits/chosen": -2.222790002822876, "logits/rejected": -2.2566487789154053, "logps/chosen": -0.752973198890686, "logps/rejected": -8.526945114135742, "loss": 0.6567, "rewards/accuracies": 1.0, "rewards/chosen": 0.8326080441474915, "rewards/margins": 0.4920278489589691, "rewards/rejected": 0.34058019518852234, "step": 825 }, { "epoch": 2.09, "learning_rate": 2.221438437360184e-08, "logits/chosen": -2.2393875122070312, "logits/rejected": -2.263658046722412, "logps/chosen": -0.8051429986953735, "logps/rejected": -16.886472702026367, "loss": 0.731, "rewards/accuracies": 0.0, "rewards/chosen": 0.7229481339454651, "rewards/margins": -0.07190698385238647, "rewards/rejected": 0.7948551177978516, "step": 826 }, { "epoch": 2.09, "learning_rate": 2.210083125951366e-08, "logits/chosen": -2.232320785522461, "logits/rejected": -2.2409353256225586, "logps/chosen": -1.217534065246582, "logps/rejected": -3.3248708248138428, "loss": 0.6268, "rewards/accuracies": 1.0, "rewards/chosen": 0.9775728583335876, "rewards/margins": 0.4874143898487091, "rewards/rejected": 0.49015846848487854, "step": 827 }, { "epoch": 2.1, "learning_rate": 2.198748671486838e-08, "logits/chosen": -2.2262094020843506, "logits/rejected": -2.2241334915161133, "logps/chosen": -1.46514892578125, "logps/rejected": -6.894195079803467, "loss": 0.5666, "rewards/accuracies": 1.0, "rewards/chosen": 0.7563841938972473, "rewards/margins": 0.12225061655044556, "rewards/rejected": 0.6341335773468018, "step": 828 }, { "epoch": 2.1, "learning_rate": 2.1874351587010502e-08, "logits/chosen": -2.170888900756836, "logits/rejected": -2.170041561126709, "logps/chosen": -0.7924299836158752, "logps/rejected": -8.935154914855957, "loss": 0.6087, "rewards/accuracies": 0.0, "rewards/chosen": 0.6058552861213684, "rewards/margins": -0.39408934116363525, "rewards/rejected": 0.9999446272850037, "step": 829 }, { "epoch": 2.1, "learning_rate": 2.176142672171901e-08, "logits/chosen": -2.08430814743042, "logits/rejected": -2.0867490768432617, "logps/chosen": -3.2557930946350098, "logps/rejected": -4.343945503234863, "loss": 0.6016, "rewards/accuracies": 1.0, "rewards/chosen": 0.7631915807723999, "rewards/margins": 0.29953745007514954, "rewards/rejected": 0.46365413069725037, "step": 830 }, { "epoch": 2.1, "learning_rate": 2.1648712963201056e-08, "logits/chosen": -2.1964409351348877, "logits/rejected": -2.191485643386841, "logps/chosen": -1.5368067026138306, "logps/rejected": -9.119022369384766, "loss": 0.6557, "rewards/accuracies": 0.0, "rewards/chosen": 0.5911981463432312, "rewards/margins": -0.10444414615631104, "rewards/rejected": 0.6956422924995422, "step": 831 }, { "epoch": 2.11, "learning_rate": 2.153621115408547e-08, "logits/chosen": -2.182774782180786, "logits/rejected": -2.1899256706237793, "logps/chosen": -1.459625482559204, "logps/rejected": -6.050059795379639, "loss": 0.7402, "rewards/accuracies": 0.0, "rewards/chosen": 0.5270213484764099, "rewards/margins": -0.30240750312805176, "rewards/rejected": 0.8294288516044617, "step": 832 }, { "epoch": 2.11, "learning_rate": 2.1423922135416688e-08, "logits/chosen": -2.242265462875366, "logits/rejected": -2.2417945861816406, "logps/chosen": -1.4504339694976807, "logps/rejected": -3.762554883956909, "loss": 0.5722, "rewards/accuracies": 0.0, "rewards/chosen": 0.7279707193374634, "rewards/margins": -0.18585634231567383, "rewards/rejected": 0.9138270616531372, "step": 833 }, { "epoch": 2.11, "learning_rate": 2.131184674664832e-08, "logits/chosen": -2.2007670402526855, "logits/rejected": -2.1847658157348633, "logps/chosen": -5.8630852699279785, "logps/rejected": -2.2403573989868164, "loss": 0.6703, "rewards/accuracies": 1.0, "rewards/chosen": 0.8575422167778015, "rewards/margins": 0.08822524547576904, "rewards/rejected": 0.7693169713020325, "step": 834 }, { "epoch": 2.11, "learning_rate": 2.1199985825636918e-08, "logits/chosen": -2.326988935470581, "logits/rejected": -2.3269271850585938, "logps/chosen": -1.3354519605636597, "logps/rejected": -3.6941964626312256, "loss": 0.724, "rewards/accuracies": 0.0, "rewards/chosen": 0.6949187517166138, "rewards/margins": -0.21524590253829956, "rewards/rejected": 0.9101646542549133, "step": 835 }, { "epoch": 2.12, "learning_rate": 2.108834020863573e-08, "logits/chosen": -2.2629027366638184, "logits/rejected": -2.2725534439086914, "logps/chosen": -4.319255828857422, "logps/rejected": -4.094490051269531, "loss": 0.575, "rewards/accuracies": 1.0, "rewards/chosen": 1.0822498798370361, "rewards/margins": 0.6049965620040894, "rewards/rejected": 0.47725334763526917, "step": 836 }, { "epoch": 2.12, "learning_rate": 2.0976910730288356e-08, "logits/chosen": -2.2085232734680176, "logits/rejected": -2.2026164531707764, "logps/chosen": -1.883774757385254, "logps/rejected": -4.975846290588379, "loss": 0.6032, "rewards/accuracies": 0.0, "rewards/chosen": 0.7131550908088684, "rewards/margins": -0.04150819778442383, "rewards/rejected": 0.7546632885932922, "step": 837 }, { "epoch": 2.12, "learning_rate": 2.086569822362269e-08, "logits/chosen": -2.2435195446014404, "logits/rejected": -2.2460014820098877, "logps/chosen": -4.407044410705566, "logps/rejected": -5.142470359802246, "loss": 0.611, "rewards/accuracies": 1.0, "rewards/chosen": 0.7896103858947754, "rewards/margins": 0.3361387252807617, "rewards/rejected": 0.45347166061401367, "step": 838 }, { "epoch": 2.12, "learning_rate": 2.075470352004453e-08, "logits/chosen": -2.1552624702453613, "logits/rejected": -2.195423126220703, "logps/chosen": -0.9715601801872253, "logps/rejected": -6.479698181152344, "loss": 0.5994, "rewards/accuracies": 0.0, "rewards/chosen": 0.6250141263008118, "rewards/margins": -0.18837392330169678, "rewards/rejected": 0.8133880496025085, "step": 839 }, { "epoch": 2.13, "learning_rate": 2.064392744933135e-08, "logits/chosen": -2.188764810562134, "logits/rejected": -2.1929707527160645, "logps/chosen": -0.8278723955154419, "logps/rejected": -5.140645980834961, "loss": 0.7814, "rewards/accuracies": 0.0, "rewards/chosen": 0.5817882418632507, "rewards/margins": -0.3839229941368103, "rewards/rejected": 0.965711236000061, "step": 840 }, { "epoch": 2.13, "learning_rate": 2.0533370839626297e-08, "logits/chosen": -2.183806896209717, "logits/rejected": -2.258409023284912, "logps/chosen": -2.8205814361572266, "logps/rejected": -17.788101196289062, "loss": 0.627, "rewards/accuracies": 1.0, "rewards/chosen": 0.7115821242332458, "rewards/margins": 0.05458712577819824, "rewards/rejected": 0.6569949984550476, "step": 841 }, { "epoch": 2.13, "learning_rate": 2.0423034517431736e-08, "logits/chosen": -2.173196792602539, "logits/rejected": -2.172013759613037, "logps/chosen": -10.876415252685547, "logps/rejected": -4.258007526397705, "loss": 0.5759, "rewards/accuracies": 1.0, "rewards/chosen": 1.3667396306991577, "rewards/margins": 0.6122584342956543, "rewards/rejected": 0.7544811964035034, "step": 842 }, { "epoch": 2.13, "learning_rate": 2.0312919307603282e-08, "logits/chosen": -2.0923569202423096, "logits/rejected": -2.088329315185547, "logps/chosen": -3.09517502784729, "logps/rejected": -5.973957538604736, "loss": 0.7154, "rewards/accuracies": 0.0, "rewards/chosen": 0.5400549173355103, "rewards/margins": -0.27538394927978516, "rewards/rejected": 0.8154388666152954, "step": 843 }, { "epoch": 2.14, "learning_rate": 2.0203026033343522e-08, "logits/chosen": -2.353963851928711, "logits/rejected": -2.3629343509674072, "logps/chosen": -2.3112945556640625, "logps/rejected": -4.426202774047852, "loss": 0.6255, "rewards/accuracies": 1.0, "rewards/chosen": 0.7919946908950806, "rewards/margins": 0.34585124254226685, "rewards/rejected": 0.4461434483528137, "step": 844 }, { "epoch": 2.14, "learning_rate": 2.0093355516195888e-08, "logits/chosen": -2.259040355682373, "logits/rejected": -2.2601583003997803, "logps/chosen": -0.9622423052787781, "logps/rejected": -10.40167236328125, "loss": 0.5904, "rewards/accuracies": 0.0, "rewards/chosen": 0.6059249043464661, "rewards/margins": -0.2939692735671997, "rewards/rejected": 0.8998941779136658, "step": 845 }, { "epoch": 2.14, "learning_rate": 1.9983908576038526e-08, "logits/chosen": -2.2193961143493652, "logits/rejected": -2.2048180103302, "logps/chosen": -2.461965799331665, "logps/rejected": -7.641199588775635, "loss": 0.7084, "rewards/accuracies": 0.0, "rewards/chosen": 0.6306514739990234, "rewards/margins": -0.1814987063407898, "rewards/rejected": 0.8121501803398132, "step": 846 }, { "epoch": 2.14, "learning_rate": 1.9874686031078152e-08, "logits/chosen": -2.242027521133423, "logits/rejected": -2.2421023845672607, "logps/chosen": -2.0667388439178467, "logps/rejected": -8.19521713256836, "loss": 0.6316, "rewards/accuracies": 1.0, "rewards/chosen": 0.8076626062393188, "rewards/margins": 0.03254967927932739, "rewards/rejected": 0.7751129269599915, "step": 847 }, { "epoch": 2.15, "learning_rate": 1.976568869784396e-08, "logits/chosen": -2.232903003692627, "logits/rejected": -2.223093032836914, "logps/chosen": -1.6609243154525757, "logps/rejected": -6.927599906921387, "loss": 0.5867, "rewards/accuracies": 0.0, "rewards/chosen": 0.6083820462226868, "rewards/margins": -0.4594706892967224, "rewards/rejected": 1.0678527355194092, "step": 848 }, { "epoch": 2.15, "learning_rate": 1.965691739118146e-08, "logits/chosen": -2.0774025917053223, "logits/rejected": -2.101062536239624, "logps/chosen": -4.524970054626465, "logps/rejected": -9.273195266723633, "loss": 0.6032, "rewards/accuracies": 1.0, "rewards/chosen": 1.105729103088379, "rewards/margins": 0.8140354156494141, "rewards/rejected": 0.29169368743896484, "step": 849 }, { "epoch": 2.15, "learning_rate": 1.9548372924246492e-08, "logits/chosen": -2.2083637714385986, "logits/rejected": -2.2064263820648193, "logps/chosen": -1.309746265411377, "logps/rejected": -7.2131266593933105, "loss": 0.612, "rewards/accuracies": 0.0, "rewards/chosen": 0.7650231719017029, "rewards/margins": -0.12792497873306274, "rewards/rejected": 0.8929481506347656, "step": 850 }, { "epoch": 2.15, "learning_rate": 1.944005610849897e-08, "logits/chosen": -2.2129147052764893, "logits/rejected": -2.3634467124938965, "logps/chosen": -3.203824520111084, "logps/rejected": -13.885028839111328, "loss": 0.693, "rewards/accuracies": 0.0, "rewards/chosen": 0.6874266266822815, "rewards/margins": -0.08190244436264038, "rewards/rejected": 0.7693290710449219, "step": 851 }, { "epoch": 2.16, "learning_rate": 1.9331967753697077e-08, "logits/chosen": -2.3403594493865967, "logits/rejected": -2.3347175121307373, "logps/chosen": -1.554057240486145, "logps/rejected": -1.5510377883911133, "loss": 0.6475, "rewards/accuracies": 0.0, "rewards/chosen": 0.6747183799743652, "rewards/margins": -0.22345876693725586, "rewards/rejected": 0.8981771469116211, "step": 852 }, { "epoch": 2.16, "learning_rate": 1.9224108667890914e-08, "logits/chosen": -2.2705016136169434, "logits/rejected": -2.272259473800659, "logps/chosen": -0.6311978101730347, "logps/rejected": -8.475776672363281, "loss": 0.6273, "rewards/accuracies": 0.0, "rewards/chosen": 0.7249907851219177, "rewards/margins": -0.05006188154220581, "rewards/rejected": 0.7750526666641235, "step": 853 }, { "epoch": 2.16, "learning_rate": 1.9116479657416685e-08, "logits/chosen": -2.124187707901001, "logits/rejected": -2.134735345840454, "logps/chosen": -2.0407662391662598, "logps/rejected": -7.763862133026123, "loss": 0.6855, "rewards/accuracies": 1.0, "rewards/chosen": 0.7651540637016296, "rewards/margins": 0.3512630760669708, "rewards/rejected": 0.4138909876346588, "step": 854 }, { "epoch": 2.16, "learning_rate": 1.900908152689062e-08, "logits/chosen": -2.207782506942749, "logits/rejected": -2.217595338821411, "logps/chosen": -2.448106050491333, "logps/rejected": -4.573004722595215, "loss": 0.5755, "rewards/accuracies": 1.0, "rewards/chosen": 0.7916227579116821, "rewards/margins": 0.3720039427280426, "rewards/rejected": 0.4196188151836395, "step": 855 }, { "epoch": 2.17, "learning_rate": 1.8901915079202834e-08, "logits/chosen": -2.247873306274414, "logits/rejected": -2.2511589527130127, "logps/chosen": -1.259149432182312, "logps/rejected": -2.901094675064087, "loss": 0.6785, "rewards/accuracies": 1.0, "rewards/chosen": 0.8274517059326172, "rewards/margins": 0.2776050567626953, "rewards/rejected": 0.5498466491699219, "step": 856 }, { "epoch": 2.17, "learning_rate": 1.8794981115511476e-08, "logits/chosen": -2.202770471572876, "logits/rejected": -2.2006542682647705, "logps/chosen": -7.620458602905273, "logps/rejected": -5.265259265899658, "loss": 0.7691, "rewards/accuracies": 1.0, "rewards/chosen": 0.7336870431900024, "rewards/margins": 0.013545870780944824, "rewards/rejected": 0.7201411724090576, "step": 857 }, { "epoch": 2.17, "learning_rate": 1.868828043523673e-08, "logits/chosen": -2.3390579223632812, "logits/rejected": -2.3457679748535156, "logps/chosen": -4.5643815994262695, "logps/rejected": -4.067355632781982, "loss": 0.597, "rewards/accuracies": 1.0, "rewards/chosen": 1.318654179573059, "rewards/margins": 0.8601444959640503, "rewards/rejected": 0.4585096836090088, "step": 858 }, { "epoch": 2.17, "learning_rate": 1.8581813836054693e-08, "logits/chosen": -2.3076484203338623, "logits/rejected": -2.303591251373291, "logps/chosen": -4.254903793334961, "logps/rejected": -2.3015401363372803, "loss": 0.5807, "rewards/accuracies": 1.0, "rewards/chosen": 0.9165266156196594, "rewards/margins": 0.40669286251068115, "rewards/rejected": 0.5098337531089783, "step": 859 }, { "epoch": 2.18, "learning_rate": 1.8475582113891585e-08, "logits/chosen": -2.243854522705078, "logits/rejected": -2.2413418292999268, "logps/chosen": -3.256068706512451, "logps/rejected": -2.8173656463623047, "loss": 0.7349, "rewards/accuracies": 0.0, "rewards/chosen": 0.7554150223731995, "rewards/margins": -0.13147062063217163, "rewards/rejected": 0.8868856430053711, "step": 860 }, { "epoch": 2.18, "learning_rate": 1.8369586062917692e-08, "logits/chosen": -2.2452735900878906, "logits/rejected": -2.253751277923584, "logps/chosen": -1.998369812965393, "logps/rejected": -4.32602596282959, "loss": 0.6518, "rewards/accuracies": 1.0, "rewards/chosen": 0.9137699007987976, "rewards/margins": 0.40318363904953003, "rewards/rejected": 0.5105862617492676, "step": 861 }, { "epoch": 2.18, "learning_rate": 1.8263826475541477e-08, "logits/chosen": -2.253115653991699, "logits/rejected": -2.2514326572418213, "logps/chosen": -3.9416322708129883, "logps/rejected": -6.973329544067383, "loss": 0.5965, "rewards/accuracies": 0.0, "rewards/chosen": 0.6724317669868469, "rewards/margins": -0.041686058044433594, "rewards/rejected": 0.7141178250312805, "step": 862 }, { "epoch": 2.18, "learning_rate": 1.8158304142403653e-08, "logits/chosen": -2.1482789516448975, "logits/rejected": -2.1559550762176514, "logps/chosen": -3.1016407012939453, "logps/rejected": -4.354651927947998, "loss": 0.617, "rewards/accuracies": 1.0, "rewards/chosen": 0.7412990927696228, "rewards/margins": 0.28326842188835144, "rewards/rejected": 0.45803067088127136, "step": 863 }, { "epoch": 2.19, "learning_rate": 1.8053019852371193e-08, "logits/chosen": -2.264228582382202, "logits/rejected": -2.274174928665161, "logps/chosen": -4.795304775238037, "logps/rejected": -2.9447429180145264, "loss": 0.5181, "rewards/accuracies": 1.0, "rewards/chosen": 1.0214347839355469, "rewards/margins": 0.4965737462043762, "rewards/rejected": 0.5248610377311707, "step": 864 }, { "epoch": 2.19, "learning_rate": 1.7947974392531613e-08, "logits/chosen": -2.250384569168091, "logits/rejected": -2.300196886062622, "logps/chosen": -0.8373137712478638, "logps/rejected": -9.533153533935547, "loss": 0.6413, "rewards/accuracies": 1.0, "rewards/chosen": 0.8261967897415161, "rewards/margins": 0.4955344498157501, "rewards/rejected": 0.330662339925766, "step": 865 }, { "epoch": 2.19, "learning_rate": 1.7843168548186893e-08, "logits/chosen": -2.139174699783325, "logits/rejected": -2.147599935531616, "logps/chosen": -7.056827545166016, "logps/rejected": -4.647380828857422, "loss": 0.6968, "rewards/accuracies": 1.0, "rewards/chosen": 1.0886729955673218, "rewards/margins": 0.5866069197654724, "rewards/rejected": 0.5020660758018494, "step": 866 }, { "epoch": 2.19, "learning_rate": 1.7738603102847693e-08, "logits/chosen": -2.2585432529449463, "logits/rejected": -2.291477918624878, "logps/chosen": -1.0547775030136108, "logps/rejected": -6.7501115798950195, "loss": 0.6814, "rewards/accuracies": 1.0, "rewards/chosen": 0.8167359232902527, "rewards/margins": 0.3061071038246155, "rewards/rejected": 0.5106288194656372, "step": 867 }, { "epoch": 2.2, "learning_rate": 1.7634278838227524e-08, "logits/chosen": -2.279017210006714, "logits/rejected": -2.2810475826263428, "logps/chosen": -3.46079158782959, "logps/rejected": -3.600064754486084, "loss": 0.5221, "rewards/accuracies": 1.0, "rewards/chosen": 0.933783233165741, "rewards/margins": 0.4083006978034973, "rewards/rejected": 0.5254825353622437, "step": 868 }, { "epoch": 2.2, "learning_rate": 1.753019653423684e-08, "logits/chosen": -2.1793789863586426, "logits/rejected": -2.2693800926208496, "logps/chosen": -17.311180114746094, "logps/rejected": -21.253700256347656, "loss": 0.5413, "rewards/accuracies": 1.0, "rewards/chosen": 1.2695515155792236, "rewards/margins": 0.9655376672744751, "rewards/rejected": 0.30401381850242615, "step": 869 }, { "epoch": 2.2, "learning_rate": 1.7426356968977264e-08, "logits/chosen": -2.3250412940979004, "logits/rejected": -2.3498427867889404, "logps/chosen": -2.7654716968536377, "logps/rejected": -11.33669376373291, "loss": 0.6824, "rewards/accuracies": 1.0, "rewards/chosen": 0.6654912829399109, "rewards/margins": 0.12461495399475098, "rewards/rejected": 0.5408763289451599, "step": 870 }, { "epoch": 2.21, "learning_rate": 1.7322760918735734e-08, "logits/chosen": -2.2003002166748047, "logits/rejected": -2.190816879272461, "logps/chosen": -3.5732593536376953, "logps/rejected": -2.655888319015503, "loss": 0.6752, "rewards/accuracies": 1.0, "rewards/chosen": 0.8833560347557068, "rewards/margins": 0.2863011360168457, "rewards/rejected": 0.5970548987388611, "step": 871 }, { "epoch": 2.21, "learning_rate": 1.7219409157978704e-08, "logits/chosen": -2.206742763519287, "logits/rejected": -2.2173421382904053, "logps/chosen": -2.915738105773926, "logps/rejected": -3.147935628890991, "loss": 0.6075, "rewards/accuracies": 1.0, "rewards/chosen": 0.8394387364387512, "rewards/margins": 0.294803261756897, "rewards/rejected": 0.5446354746818542, "step": 872 }, { "epoch": 2.21, "learning_rate": 1.7116302459346377e-08, "logits/chosen": -2.220898151397705, "logits/rejected": -2.2232391834259033, "logps/chosen": -3.0816564559936523, "logps/rejected": -3.2437572479248047, "loss": 0.5399, "rewards/accuracies": 1.0, "rewards/chosen": 0.8245970606803894, "rewards/margins": 0.3586216866970062, "rewards/rejected": 0.4659753739833832, "step": 873 }, { "epoch": 2.21, "learning_rate": 1.7013441593646892e-08, "logits/chosen": -2.301642656326294, "logits/rejected": -2.3013081550598145, "logps/chosen": -0.8842742443084717, "logps/rejected": -6.366711616516113, "loss": 0.6, "rewards/accuracies": 1.0, "rewards/chosen": 0.9336602091789246, "rewards/margins": 0.5275521278381348, "rewards/rejected": 0.4061081111431122, "step": 874 }, { "epoch": 2.22, "learning_rate": 1.6910827329850614e-08, "logits/chosen": -2.1134629249572754, "logits/rejected": -2.1167571544647217, "logps/chosen": -1.5189411640167236, "logps/rejected": -3.5253806114196777, "loss": 0.5489, "rewards/accuracies": 1.0, "rewards/chosen": 0.8057234883308411, "rewards/margins": 0.4519289433956146, "rewards/rejected": 0.35379454493522644, "step": 875 }, { "epoch": 2.22, "learning_rate": 1.6808460435084315e-08, "logits/chosen": -2.2627437114715576, "logits/rejected": -2.2566137313842773, "logps/chosen": -1.468152642250061, "logps/rejected": -8.004912376403809, "loss": 0.8394, "rewards/accuracies": 0.0, "rewards/chosen": 0.5692700743675232, "rewards/margins": -0.24598443508148193, "rewards/rejected": 0.8152545094490051, "step": 876 }, { "epoch": 2.22, "learning_rate": 1.6706341674625535e-08, "logits/chosen": -2.1999802589416504, "logits/rejected": -2.2008605003356934, "logps/chosen": -2.2594945430755615, "logps/rejected": -13.098752975463867, "loss": 0.64, "rewards/accuracies": 1.0, "rewards/chosen": 0.5787168741226196, "rewards/margins": 0.037026822566986084, "rewards/rejected": 0.5416900515556335, "step": 877 }, { "epoch": 2.22, "learning_rate": 1.6604471811896703e-08, "logits/chosen": -2.3135502338409424, "logits/rejected": -2.3189375400543213, "logps/chosen": -4.004006862640381, "logps/rejected": -3.6212992668151855, "loss": 0.5252, "rewards/accuracies": 1.0, "rewards/chosen": 0.979483425617218, "rewards/margins": 0.39256012439727783, "rewards/rejected": 0.5869233012199402, "step": 878 }, { "epoch": 2.23, "learning_rate": 1.6502851608459667e-08, "logits/chosen": -2.3733973503112793, "logits/rejected": -2.5028326511383057, "logps/chosen": -1.7342042922973633, "logps/rejected": -14.232706069946289, "loss": 0.6637, "rewards/accuracies": 1.0, "rewards/chosen": 0.7732154130935669, "rewards/margins": 0.04414397478103638, "rewards/rejected": 0.7290714383125305, "step": 879 }, { "epoch": 2.23, "learning_rate": 1.6401481824009748e-08, "logits/chosen": -2.2027580738067627, "logits/rejected": -2.2073287963867188, "logps/chosen": -1.520096778869629, "logps/rejected": -2.3762171268463135, "loss": 0.7173, "rewards/accuracies": 1.0, "rewards/chosen": 0.9001032114028931, "rewards/margins": 0.3773021101951599, "rewards/rejected": 0.5228011012077332, "step": 880 }, { "epoch": 2.23, "learning_rate": 1.6300363216370216e-08, "logits/chosen": -2.248133420944214, "logits/rejected": -2.2472634315490723, "logps/chosen": -4.535856246948242, "logps/rejected": -3.4483590126037598, "loss": 0.667, "rewards/accuracies": 1.0, "rewards/chosen": 0.7859256863594055, "rewards/margins": 0.3160422742366791, "rewards/rejected": 0.46988341212272644, "step": 881 }, { "epoch": 2.23, "learning_rate": 1.6199496541486644e-08, "logits/chosen": -2.354675531387329, "logits/rejected": -2.3500850200653076, "logps/chosen": -1.3134756088256836, "logps/rejected": -5.055531978607178, "loss": 0.6561, "rewards/accuracies": 1.0, "rewards/chosen": 0.8682442903518677, "rewards/margins": 0.45214593410491943, "rewards/rejected": 0.41609835624694824, "step": 882 }, { "epoch": 2.24, "learning_rate": 1.60988825534211e-08, "logits/chosen": -2.1707489490509033, "logits/rejected": -2.157108783721924, "logps/chosen": -8.465485572814941, "logps/rejected": -4.454567909240723, "loss": 0.4764, "rewards/accuracies": 1.0, "rewards/chosen": 0.7659069895744324, "rewards/margins": 0.4370672106742859, "rewards/rejected": 0.3288397789001465, "step": 883 }, { "epoch": 2.24, "learning_rate": 1.5998522004346672e-08, "logits/chosen": -2.2635111808776855, "logits/rejected": -2.254889965057373, "logps/chosen": -2.032914400100708, "logps/rejected": -2.9262259006500244, "loss": 0.7841, "rewards/accuracies": 0.0, "rewards/chosen": 0.7068687677383423, "rewards/margins": -0.3056502342224121, "rewards/rejected": 1.0125190019607544, "step": 884 }, { "epoch": 2.24, "learning_rate": 1.5898415644541758e-08, "logits/chosen": -2.1445953845977783, "logits/rejected": -2.143240213394165, "logps/chosen": -1.383901834487915, "logps/rejected": -7.665195465087891, "loss": 0.7457, "rewards/accuracies": 0.0, "rewards/chosen": 0.7134343385696411, "rewards/margins": -0.09646129608154297, "rewards/rejected": 0.8098956346511841, "step": 885 }, { "epoch": 2.24, "learning_rate": 1.579856422238449e-08, "logits/chosen": -2.169966697692871, "logits/rejected": -2.1684086322784424, "logps/chosen": -0.8724854588508606, "logps/rejected": -8.231964111328125, "loss": 0.8017, "rewards/accuracies": 0.0, "rewards/chosen": 0.7287499904632568, "rewards/margins": -0.13715577125549316, "rewards/rejected": 0.86590576171875, "step": 886 }, { "epoch": 2.25, "learning_rate": 1.569896848434713e-08, "logits/chosen": -2.161933660507202, "logits/rejected": -2.1619009971618652, "logps/chosen": -3.581547260284424, "logps/rejected": -6.516843795776367, "loss": 0.5941, "rewards/accuracies": 1.0, "rewards/chosen": 0.7956085801124573, "rewards/margins": 0.457217276096344, "rewards/rejected": 0.3383913040161133, "step": 887 }, { "epoch": 2.25, "learning_rate": 1.559962917499048e-08, "logits/chosen": -2.2773213386535645, "logits/rejected": -2.281897783279419, "logps/chosen": -6.423957824707031, "logps/rejected": -4.016806125640869, "loss": 0.6061, "rewards/accuracies": 1.0, "rewards/chosen": 1.1209510564804077, "rewards/margins": 0.6223785877227783, "rewards/rejected": 0.4985724985599518, "step": 888 }, { "epoch": 2.25, "learning_rate": 1.5500547036958333e-08, "logits/chosen": -2.2739157676696777, "logits/rejected": -2.2614879608154297, "logps/chosen": -3.913752794265747, "logps/rejected": -3.4674222469329834, "loss": 0.6169, "rewards/accuracies": 1.0, "rewards/chosen": 0.9748760461807251, "rewards/margins": 0.5486369132995605, "rewards/rejected": 0.42623910307884216, "step": 889 }, { "epoch": 2.25, "learning_rate": 1.5401722810971924e-08, "logits/chosen": -2.287966251373291, "logits/rejected": -2.3232502937316895, "logps/chosen": -0.788067638874054, "logps/rejected": -16.240957260131836, "loss": 0.6704, "rewards/accuracies": 0.0, "rewards/chosen": 0.6582706570625305, "rewards/margins": -0.03472501039505005, "rewards/rejected": 0.6929956674575806, "step": 890 }, { "epoch": 2.26, "learning_rate": 1.530315723582432e-08, "logits/chosen": -2.201991558074951, "logits/rejected": -2.2409298419952393, "logps/chosen": -1.4843473434448242, "logps/rejected": -4.927816390991211, "loss": 0.5602, "rewards/accuracies": 1.0, "rewards/chosen": 0.8858675360679626, "rewards/margins": 0.22452354431152344, "rewards/rejected": 0.6613439917564392, "step": 891 }, { "epoch": 2.26, "learning_rate": 1.520485104837505e-08, "logits/chosen": -2.2894043922424316, "logits/rejected": -2.2943477630615234, "logps/chosen": -1.3633010387420654, "logps/rejected": -5.581693172454834, "loss": 0.5945, "rewards/accuracies": 1.0, "rewards/chosen": 0.860843300819397, "rewards/margins": 0.5062706470489502, "rewards/rejected": 0.3545726239681244, "step": 892 }, { "epoch": 2.26, "learning_rate": 1.5106804983544468e-08, "logits/chosen": -2.228593111038208, "logits/rejected": -2.226346015930176, "logps/chosen": -2.088008165359497, "logps/rejected": -3.6787514686584473, "loss": 0.6499, "rewards/accuracies": 0.0, "rewards/chosen": 0.6818041801452637, "rewards/margins": -0.25826311111450195, "rewards/rejected": 0.9400672912597656, "step": 893 }, { "epoch": 2.26, "learning_rate": 1.5009019774308245e-08, "logits/chosen": -2.3233702182769775, "logits/rejected": -2.332578420639038, "logps/chosen": -4.444520473480225, "logps/rejected": -5.461182117462158, "loss": 0.5808, "rewards/accuracies": 1.0, "rewards/chosen": 1.000352144241333, "rewards/margins": 0.4262579679489136, "rewards/rejected": 0.5740941762924194, "step": 894 }, { "epoch": 2.27, "learning_rate": 1.491149615169201e-08, "logits/chosen": -2.283240556716919, "logits/rejected": -2.3547682762145996, "logps/chosen": -1.5167555809020996, "logps/rejected": -26.185028076171875, "loss": 0.6662, "rewards/accuracies": 1.0, "rewards/chosen": 0.7298237681388855, "rewards/margins": 0.129893958568573, "rewards/rejected": 0.5999298095703125, "step": 895 }, { "epoch": 2.27, "learning_rate": 1.4814234844765782e-08, "logits/chosen": -2.2368812561035156, "logits/rejected": -2.232170343399048, "logps/chosen": -3.369561195373535, "logps/rejected": -2.808187246322632, "loss": 0.7195, "rewards/accuracies": 0.0, "rewards/chosen": 0.49950656294822693, "rewards/margins": -0.39136871695518494, "rewards/rejected": 0.8908752799034119, "step": 896 }, { "epoch": 2.27, "learning_rate": 1.4717236580638559e-08, "logits/chosen": -2.1399624347686768, "logits/rejected": -2.1372978687286377, "logps/chosen": -1.354925274848938, "logps/rejected": -10.137401580810547, "loss": 0.5779, "rewards/accuracies": 0.0, "rewards/chosen": 0.6888433694839478, "rewards/margins": -0.08254295587539673, "rewards/rejected": 0.7713863253593445, "step": 897 }, { "epoch": 2.27, "learning_rate": 1.4620502084452868e-08, "logits/chosen": -2.116283893585205, "logits/rejected": -2.1099648475646973, "logps/chosen": -1.8361122608184814, "logps/rejected": -4.532260417938232, "loss": 0.6547, "rewards/accuracies": 0.0, "rewards/chosen": 0.5535967946052551, "rewards/margins": -0.47200363874435425, "rewards/rejected": 1.0256004333496094, "step": 898 }, { "epoch": 2.28, "learning_rate": 1.4524032079379367e-08, "logits/chosen": -2.255532741546631, "logits/rejected": -2.2540509700775146, "logps/chosen": -3.461760997772217, "logps/rejected": -6.415596008300781, "loss": 0.634, "rewards/accuracies": 0.0, "rewards/chosen": 0.704675555229187, "rewards/margins": -0.11392003297805786, "rewards/rejected": 0.8185955882072449, "step": 899 }, { "epoch": 2.28, "learning_rate": 1.442782728661141e-08, "logits/chosen": -2.3500025272369385, "logits/rejected": -2.3470942974090576, "logps/chosen": -1.509056806564331, "logps/rejected": -14.9579496383667, "loss": 0.6849, "rewards/accuracies": 1.0, "rewards/chosen": 0.8257958292961121, "rewards/margins": 0.06018328666687012, "rewards/rejected": 0.7656125426292419, "step": 900 }, { "epoch": 2.28, "learning_rate": 1.4331888425359695e-08, "logits/chosen": -2.2217442989349365, "logits/rejected": -2.2223801612854004, "logps/chosen": -2.13547682762146, "logps/rejected": -4.820616722106934, "loss": 0.6363, "rewards/accuracies": 1.0, "rewards/chosen": 0.767331600189209, "rewards/margins": 0.412068635225296, "rewards/rejected": 0.35526296496391296, "step": 901 }, { "epoch": 2.28, "learning_rate": 1.4236216212846785e-08, "logits/chosen": -2.2880473136901855, "logits/rejected": -2.2898495197296143, "logps/chosen": -1.9943162202835083, "logps/rejected": -6.321636199951172, "loss": 0.696, "rewards/accuracies": 1.0, "rewards/chosen": 0.905535876750946, "rewards/margins": 0.15500235557556152, "rewards/rejected": 0.7505335211753845, "step": 902 }, { "epoch": 2.29, "learning_rate": 1.414081136430193e-08, "logits/chosen": -2.215686321258545, "logits/rejected": -2.207066297531128, "logps/chosen": -0.8807255029678345, "logps/rejected": -7.002937316894531, "loss": 0.667, "rewards/accuracies": 0.0, "rewards/chosen": 0.5821854472160339, "rewards/margins": -0.364943265914917, "rewards/rejected": 0.9471287131309509, "step": 903 }, { "epoch": 2.29, "learning_rate": 1.4045674592955559e-08, "logits/chosen": -2.077514171600342, "logits/rejected": -2.0708391666412354, "logps/chosen": -2.323815107345581, "logps/rejected": -8.145730018615723, "loss": 0.7227, "rewards/accuracies": 0.0, "rewards/chosen": 0.7451524138450623, "rewards/margins": -0.06117284297943115, "rewards/rejected": 0.8063252568244934, "step": 904 }, { "epoch": 2.29, "learning_rate": 1.3950806610033955e-08, "logits/chosen": -2.205144166946411, "logits/rejected": -2.2792000770568848, "logps/chosen": -3.610917091369629, "logps/rejected": -18.67818832397461, "loss": 0.5813, "rewards/accuracies": 1.0, "rewards/chosen": 0.6898493766784668, "rewards/margins": 0.13424211740493774, "rewards/rejected": 0.555607259273529, "step": 905 }, { "epoch": 2.29, "learning_rate": 1.3856208124754088e-08, "logits/chosen": -2.3123581409454346, "logits/rejected": -2.379383087158203, "logps/chosen": -3.908886432647705, "logps/rejected": -53.49169921875, "loss": 0.6722, "rewards/accuracies": 1.0, "rewards/chosen": 0.8226181268692017, "rewards/margins": 0.13792574405670166, "rewards/rejected": 0.6846923828125, "step": 906 }, { "epoch": 2.3, "learning_rate": 1.3761879844318115e-08, "logits/chosen": -2.31227707862854, "logits/rejected": -2.33821177482605, "logps/chosen": -1.5205243825912476, "logps/rejected": -9.093635559082031, "loss": 0.5778, "rewards/accuracies": 1.0, "rewards/chosen": 0.6790781617164612, "rewards/margins": 0.07676255702972412, "rewards/rejected": 0.6023156046867371, "step": 907 }, { "epoch": 2.3, "learning_rate": 1.3667822473908219e-08, "logits/chosen": -2.3162434101104736, "logits/rejected": -2.328253984451294, "logps/chosen": -6.342138767242432, "logps/rejected": -3.3075621128082275, "loss": 0.5535, "rewards/accuracies": 1.0, "rewards/chosen": 1.0405601263046265, "rewards/margins": 0.43031078577041626, "rewards/rejected": 0.6102493405342102, "step": 908 }, { "epoch": 2.3, "learning_rate": 1.3574036716681364e-08, "logits/chosen": -2.121204137802124, "logits/rejected": -2.112372875213623, "logps/chosen": -1.8329658508300781, "logps/rejected": -10.88890266418457, "loss": 0.7377, "rewards/accuracies": 0.0, "rewards/chosen": 0.8277974128723145, "rewards/margins": -0.0868382453918457, "rewards/rejected": 0.9146356582641602, "step": 909 }, { "epoch": 2.3, "learning_rate": 1.3480523273763878e-08, "logits/chosen": -2.2321932315826416, "logits/rejected": -2.233717441558838, "logps/chosen": -6.9341206550598145, "logps/rejected": -6.0720534324646, "loss": 0.6866, "rewards/accuracies": 1.0, "rewards/chosen": 1.0793060064315796, "rewards/margins": 0.6967609524726868, "rewards/rejected": 0.3825450539588928, "step": 910 }, { "epoch": 2.31, "learning_rate": 1.3387282844246384e-08, "logits/chosen": -2.1860294342041016, "logits/rejected": -2.2054824829101562, "logps/chosen": -1.4500443935394287, "logps/rejected": -9.84774398803711, "loss": 0.6559, "rewards/accuracies": 1.0, "rewards/chosen": 0.8189045190811157, "rewards/margins": 0.5064735412597656, "rewards/rejected": 0.3124309480190277, "step": 911 }, { "epoch": 2.31, "learning_rate": 1.3294316125178473e-08, "logits/chosen": -2.2364158630371094, "logits/rejected": -2.231795310974121, "logps/chosen": -0.749165415763855, "logps/rejected": -2.7349796295166016, "loss": 0.6672, "rewards/accuracies": 0.0, "rewards/chosen": 0.6936880946159363, "rewards/margins": -0.13158857822418213, "rewards/rejected": 0.8252766728401184, "step": 912 }, { "epoch": 2.31, "learning_rate": 1.3201623811563545e-08, "logits/chosen": -2.267801284790039, "logits/rejected": -2.2696638107299805, "logps/chosen": -2.060250997543335, "logps/rejected": -6.416688442230225, "loss": 0.6414, "rewards/accuracies": 1.0, "rewards/chosen": 0.8627414703369141, "rewards/margins": 0.5132512450218201, "rewards/rejected": 0.349490225315094, "step": 913 }, { "epoch": 2.31, "learning_rate": 1.3109206596353573e-08, "logits/chosen": -2.2678918838500977, "logits/rejected": -2.262022018432617, "logps/chosen": -1.0563160181045532, "logps/rejected": -3.149948835372925, "loss": 0.649, "rewards/accuracies": 0.0, "rewards/chosen": 0.6364864706993103, "rewards/margins": -0.38092654943466187, "rewards/rejected": 1.0174130201339722, "step": 914 }, { "epoch": 2.32, "learning_rate": 1.3017065170443948e-08, "logits/chosen": -2.2583465576171875, "logits/rejected": -2.2577123641967773, "logps/chosen": -2.3044357299804688, "logps/rejected": -3.6294004917144775, "loss": 0.5793, "rewards/accuracies": 1.0, "rewards/chosen": 0.8781768679618835, "rewards/margins": 0.23225992918014526, "rewards/rejected": 0.6459169387817383, "step": 915 }, { "epoch": 2.32, "learning_rate": 1.2925200222668308e-08, "logits/chosen": -2.1989381313323975, "logits/rejected": -2.1943893432617188, "logps/chosen": -1.2322336435317993, "logps/rejected": -9.418883323669434, "loss": 0.6063, "rewards/accuracies": 1.0, "rewards/chosen": 0.8913663029670715, "rewards/margins": 0.12987881898880005, "rewards/rejected": 0.7614874839782715, "step": 916 }, { "epoch": 2.32, "learning_rate": 1.2833612439793401e-08, "logits/chosen": -2.143282413482666, "logits/rejected": -2.151627540588379, "logps/chosen": -3.2065985202789307, "logps/rejected": -6.397167682647705, "loss": 0.6055, "rewards/accuracies": 1.0, "rewards/chosen": 0.8335172533988953, "rewards/margins": 0.4576137959957123, "rewards/rejected": 0.375903457403183, "step": 917 }, { "epoch": 2.32, "learning_rate": 1.2742302506513892e-08, "logits/chosen": -2.2572860717773438, "logits/rejected": -2.255614995956421, "logps/chosen": -0.471101850271225, "logps/rejected": -7.315985679626465, "loss": 0.7997, "rewards/accuracies": 0.0, "rewards/chosen": 0.6021925806999207, "rewards/margins": -0.19952452182769775, "rewards/rejected": 0.8017171025276184, "step": 918 }, { "epoch": 2.33, "learning_rate": 1.2651271105447319e-08, "logits/chosen": -2.1985716819763184, "logits/rejected": -2.2568819522857666, "logps/chosen": -2.7871904373168945, "logps/rejected": -8.415656089782715, "loss": 0.6024, "rewards/accuracies": 1.0, "rewards/chosen": 0.9942449927330017, "rewards/margins": 0.16883134841918945, "rewards/rejected": 0.8254136443138123, "step": 919 }, { "epoch": 2.33, "learning_rate": 1.2560518917129014e-08, "logits/chosen": -2.2499005794525146, "logits/rejected": -2.241495370864868, "logps/chosen": -0.8307152390480042, "logps/rejected": -5.521650791168213, "loss": 0.6151, "rewards/accuracies": 0.0, "rewards/chosen": 0.7161785960197449, "rewards/margins": -0.40921181440353394, "rewards/rejected": 1.1253904104232788, "step": 920 }, { "epoch": 2.33, "learning_rate": 1.2470046620006857e-08, "logits/chosen": -2.2327234745025635, "logits/rejected": -2.2456119060516357, "logps/chosen": -2.2952568531036377, "logps/rejected": -2.933218002319336, "loss": 0.5421, "rewards/accuracies": 1.0, "rewards/chosen": 0.7882695198059082, "rewards/margins": 0.273378849029541, "rewards/rejected": 0.5148906707763672, "step": 921 }, { "epoch": 2.33, "learning_rate": 1.2379854890436375e-08, "logits/chosen": -2.29103684425354, "logits/rejected": -2.2941086292266846, "logps/chosen": -1.6399881839752197, "logps/rejected": -3.925229549407959, "loss": 0.5779, "rewards/accuracies": 1.0, "rewards/chosen": 0.9005891680717468, "rewards/margins": 0.43593475222587585, "rewards/rejected": 0.46465441584587097, "step": 922 }, { "epoch": 2.34, "learning_rate": 1.2289944402675617e-08, "logits/chosen": -2.3438429832458496, "logits/rejected": -2.350741386413574, "logps/chosen": -4.5634636878967285, "logps/rejected": -3.6870064735412598, "loss": 0.6852, "rewards/accuracies": 1.0, "rewards/chosen": 0.9736696481704712, "rewards/margins": 0.5589509606361389, "rewards/rejected": 0.4147186875343323, "step": 923 }, { "epoch": 2.34, "learning_rate": 1.2200315828880093e-08, "logits/chosen": -2.220486879348755, "logits/rejected": -2.2086193561553955, "logps/chosen": -20.213897705078125, "logps/rejected": -4.2740983963012695, "loss": 0.6848, "rewards/accuracies": 0.0, "rewards/chosen": 0.7815372347831726, "rewards/margins": -0.1684643030166626, "rewards/rejected": 0.9500015377998352, "step": 924 }, { "epoch": 2.34, "learning_rate": 1.2110969839097795e-08, "logits/chosen": -2.3385965824127197, "logits/rejected": -2.3497958183288574, "logps/chosen": -9.543272018432617, "logps/rejected": -1.196047306060791, "loss": 0.6261, "rewards/accuracies": 1.0, "rewards/chosen": 0.8252660632133484, "rewards/margins": 0.17052364349365234, "rewards/rejected": 0.654742419719696, "step": 925 }, { "epoch": 2.34, "learning_rate": 1.2021907101264146e-08, "logits/chosen": -2.232293128967285, "logits/rejected": -2.2319083213806152, "logps/chosen": -1.5372729301452637, "logps/rejected": -9.29092025756836, "loss": 0.5983, "rewards/accuracies": 0.0, "rewards/chosen": 0.6889446973800659, "rewards/margins": -0.17077648639678955, "rewards/rejected": 0.8597211837768555, "step": 926 }, { "epoch": 2.35, "learning_rate": 1.1933128281197041e-08, "logits/chosen": -2.1964023113250732, "logits/rejected": -2.2025105953216553, "logps/chosen": -2.565891742706299, "logps/rejected": -4.011312961578369, "loss": 0.7452, "rewards/accuracies": 0.0, "rewards/chosen": 0.7023957967758179, "rewards/margins": -0.15349256992340088, "rewards/rejected": 0.8558883666992188, "step": 927 }, { "epoch": 2.35, "learning_rate": 1.1844634042591856e-08, "logits/chosen": -2.3332502841949463, "logits/rejected": -2.3656795024871826, "logps/chosen": -3.23720121383667, "logps/rejected": -7.031388282775879, "loss": 0.5571, "rewards/accuracies": 0.0, "rewards/chosen": 0.7252292037010193, "rewards/margins": -0.0890960693359375, "rewards/rejected": 0.8143252730369568, "step": 928 }, { "epoch": 2.35, "learning_rate": 1.1756425047016439e-08, "logits/chosen": -2.1486661434173584, "logits/rejected": -2.1411752700805664, "logps/chosen": -1.2693583965301514, "logps/rejected": -2.4945621490478516, "loss": 0.5955, "rewards/accuracies": 0.0, "rewards/chosen": 0.7643740773200989, "rewards/margins": -0.2126094102859497, "rewards/rejected": 0.9769834876060486, "step": 929 }, { "epoch": 2.35, "learning_rate": 1.1668501953906279e-08, "logits/chosen": -2.137880563735962, "logits/rejected": -2.144683599472046, "logps/chosen": -2.7900474071502686, "logps/rejected": -3.1311635971069336, "loss": 0.7115, "rewards/accuracies": 1.0, "rewards/chosen": 0.9696177840232849, "rewards/margins": 0.37958621978759766, "rewards/rejected": 0.5900315642356873, "step": 930 }, { "epoch": 2.36, "learning_rate": 1.1580865420559488e-08, "logits/chosen": -2.2976300716400146, "logits/rejected": -2.304739475250244, "logps/chosen": -1.5659983158111572, "logps/rejected": -3.9842641353607178, "loss": 0.6495, "rewards/accuracies": 1.0, "rewards/chosen": 0.8851074576377869, "rewards/margins": 0.4107828438282013, "rewards/rejected": 0.47432461380958557, "step": 931 }, { "epoch": 2.36, "learning_rate": 1.1493516102131835e-08, "logits/chosen": -2.197406768798828, "logits/rejected": -2.194589614868164, "logps/chosen": -1.7929883003234863, "logps/rejected": -6.218681335449219, "loss": 0.7364, "rewards/accuracies": 0.0, "rewards/chosen": 0.5208718776702881, "rewards/margins": -0.4452363848686218, "rewards/rejected": 0.9661082625389099, "step": 932 }, { "epoch": 2.36, "learning_rate": 1.1406454651632042e-08, "logits/chosen": -2.1959216594696045, "logits/rejected": -2.1974236965179443, "logps/chosen": -6.119021415710449, "logps/rejected": -3.1244544982910156, "loss": 0.527, "rewards/accuracies": 1.0, "rewards/chosen": 0.835627019405365, "rewards/margins": 0.4177038371562958, "rewards/rejected": 0.4179231822490692, "step": 933 }, { "epoch": 2.36, "learning_rate": 1.1319681719916662e-08, "logits/chosen": -2.1669437885284424, "logits/rejected": -2.179523468017578, "logps/chosen": -8.539670944213867, "logps/rejected": -3.3190178871154785, "loss": 0.6122, "rewards/accuracies": 1.0, "rewards/chosen": 1.1159861087799072, "rewards/margins": 0.5167246460914612, "rewards/rejected": 0.599261462688446, "step": 934 }, { "epoch": 2.37, "learning_rate": 1.1233197955685409e-08, "logits/chosen": -2.36006760597229, "logits/rejected": -2.363863229751587, "logps/chosen": -3.716642379760742, "logps/rejected": -4.162032127380371, "loss": 0.598, "rewards/accuracies": 1.0, "rewards/chosen": 0.7898367047309875, "rewards/margins": 0.11920821666717529, "rewards/rejected": 0.6706284880638123, "step": 935 }, { "epoch": 2.37, "learning_rate": 1.114700400547619e-08, "logits/chosen": -2.2374091148376465, "logits/rejected": -2.2390975952148438, "logps/chosen": -2.069956064224243, "logps/rejected": -4.506118297576904, "loss": 0.5736, "rewards/accuracies": 1.0, "rewards/chosen": 0.9000030755996704, "rewards/margins": 0.33854174613952637, "rewards/rejected": 0.561461329460144, "step": 936 }, { "epoch": 2.37, "learning_rate": 1.1061100513660331e-08, "logits/chosen": -2.1853067874908447, "logits/rejected": -2.1768124103546143, "logps/chosen": -5.269832611083984, "logps/rejected": -2.9101364612579346, "loss": 0.6613, "rewards/accuracies": 0.0, "rewards/chosen": 0.8624737858772278, "rewards/margins": -0.030346214771270752, "rewards/rejected": 0.8928200006484985, "step": 937 }, { "epoch": 2.37, "learning_rate": 1.0975488122437731e-08, "logits/chosen": -2.2362804412841797, "logits/rejected": -2.2431631088256836, "logps/chosen": -1.4359691143035889, "logps/rejected": -5.545988082885742, "loss": 0.4769, "rewards/accuracies": 1.0, "rewards/chosen": 0.9256862998008728, "rewards/margins": 0.3830626606941223, "rewards/rejected": 0.5426236391067505, "step": 938 }, { "epoch": 2.38, "learning_rate": 1.0890167471832079e-08, "logits/chosen": -2.246027708053589, "logits/rejected": -2.2345662117004395, "logps/chosen": -19.781551361083984, "logps/rejected": -5.692936420440674, "loss": 0.6683, "rewards/accuracies": 0.0, "rewards/chosen": 0.6743047833442688, "rewards/margins": -0.32602888345718384, "rewards/rejected": 1.0003336668014526, "step": 939 }, { "epoch": 2.38, "learning_rate": 1.0805139199686048e-08, "logits/chosen": -2.241070032119751, "logits/rejected": -2.2308714389801025, "logps/chosen": -3.664363384246826, "logps/rejected": -3.676884174346924, "loss": 0.6506, "rewards/accuracies": 0.0, "rewards/chosen": 0.6398123502731323, "rewards/margins": -0.10399961471557617, "rewards/rejected": 0.7438119649887085, "step": 940 }, { "epoch": 2.38, "learning_rate": 1.0720403941656547e-08, "logits/chosen": -2.33453106880188, "logits/rejected": -2.342726230621338, "logps/chosen": -11.462902069091797, "logps/rejected": -1.718941569328308, "loss": 0.6133, "rewards/accuracies": 1.0, "rewards/chosen": 1.0091127157211304, "rewards/margins": 0.2669753432273865, "rewards/rejected": 0.7421373724937439, "step": 941 }, { "epoch": 2.38, "learning_rate": 1.063596233120997e-08, "logits/chosen": -2.192894697189331, "logits/rejected": -2.1805405616760254, "logps/chosen": -1.390250325202942, "logps/rejected": -2.787660598754883, "loss": 0.7734, "rewards/accuracies": 0.0, "rewards/chosen": 0.6790668368339539, "rewards/margins": -0.11396980285644531, "rewards/rejected": 0.7930366396903992, "step": 942 }, { "epoch": 2.39, "learning_rate": 1.055181499961743e-08, "logits/chosen": -2.2261459827423096, "logits/rejected": -2.2320122718811035, "logps/chosen": -5.939493179321289, "logps/rejected": -3.8434906005859375, "loss": 0.6673, "rewards/accuracies": 1.0, "rewards/chosen": 1.074951171875, "rewards/margins": 0.5818917751312256, "rewards/rejected": 0.493059366941452, "step": 943 }, { "epoch": 2.39, "learning_rate": 1.0467962575950095e-08, "logits/chosen": -2.2658841609954834, "logits/rejected": -2.307223081588745, "logps/chosen": -0.7439841032028198, "logps/rejected": -8.133081436157227, "loss": 0.6374, "rewards/accuracies": 1.0, "rewards/chosen": 0.8001956939697266, "rewards/margins": 0.18644863367080688, "rewards/rejected": 0.6137470602989197, "step": 944 }, { "epoch": 2.39, "learning_rate": 1.0384405687074399e-08, "logits/chosen": -2.199993371963501, "logits/rejected": -2.202789068222046, "logps/chosen": -1.8582513332366943, "logps/rejected": -5.393517017364502, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": 0.766592800617218, "rewards/margins": 0.0561642050743103, "rewards/rejected": 0.7104285955429077, "step": 945 }, { "epoch": 2.39, "learning_rate": 1.030114495764744e-08, "logits/chosen": -2.2692227363586426, "logits/rejected": -2.2809910774230957, "logps/chosen": -5.929428577423096, "logps/rejected": -3.702591896057129, "loss": 0.6007, "rewards/accuracies": 1.0, "rewards/chosen": 0.9934707880020142, "rewards/margins": 0.28397446870803833, "rewards/rejected": 0.7094963192939758, "step": 946 }, { "epoch": 2.4, "learning_rate": 1.021818101011232e-08, "logits/chosen": -2.1340255737304688, "logits/rejected": -2.139509916305542, "logps/chosen": -1.560450553894043, "logps/rejected": -3.8244469165802, "loss": 0.6596, "rewards/accuracies": 1.0, "rewards/chosen": 0.9353194236755371, "rewards/margins": 0.4529392123222351, "rewards/rejected": 0.482380211353302, "step": 947 }, { "epoch": 2.4, "learning_rate": 1.0135514464693369e-08, "logits/chosen": -2.1942555904388428, "logits/rejected": -2.185358762741089, "logps/chosen": -1.8630878925323486, "logps/rejected": -5.161010265350342, "loss": 0.763, "rewards/accuracies": 0.0, "rewards/chosen": 0.7054448127746582, "rewards/margins": -0.2012423872947693, "rewards/rejected": 0.9066872000694275, "step": 948 }, { "epoch": 2.4, "learning_rate": 1.0053145939391639e-08, "logits/chosen": -2.3774027824401855, "logits/rejected": -2.3741095066070557, "logps/chosen": -1.2300748825073242, "logps/rejected": -6.0403361320495605, "loss": 0.5442, "rewards/accuracies": 1.0, "rewards/chosen": 0.7647964358329773, "rewards/margins": 0.39067548513412476, "rewards/rejected": 0.37412095069885254, "step": 949 }, { "epoch": 2.41, "learning_rate": 9.971076049980221e-09, "logits/chosen": -2.2298202514648438, "logits/rejected": -2.226912021636963, "logps/chosen": -1.6604965925216675, "logps/rejected": -6.784514427185059, "loss": 0.5486, "rewards/accuracies": 0.0, "rewards/chosen": 0.6223983764648438, "rewards/margins": -0.16907453536987305, "rewards/rejected": 0.7914729118347168, "step": 950 }, { "epoch": 2.41, "learning_rate": 9.889305409999654e-09, "logits/chosen": -2.1694586277008057, "logits/rejected": -2.322640895843506, "logps/chosen": -1.5633660554885864, "logps/rejected": -14.270646095275879, "loss": 0.6061, "rewards/accuracies": 1.0, "rewards/chosen": 0.8002558946609497, "rewards/margins": 0.06826955080032349, "rewards/rejected": 0.7319863438606262, "step": 951 }, { "epoch": 2.41, "learning_rate": 9.807834630753364e-09, "logits/chosen": -2.2317121028900146, "logits/rejected": -2.2461953163146973, "logps/chosen": -5.563626766204834, "logps/rejected": -5.214356422424316, "loss": 0.4963, "rewards/accuracies": 1.0, "rewards/chosen": 1.0665358304977417, "rewards/margins": 0.48796069622039795, "rewards/rejected": 0.5785751342773438, "step": 952 }, { "epoch": 2.41, "learning_rate": 9.726664321303008e-09, "logits/chosen": -2.168039560317993, "logits/rejected": -2.1706345081329346, "logps/chosen": -6.390867710113525, "logps/rejected": -1.6300281286239624, "loss": 0.6173, "rewards/accuracies": 1.0, "rewards/chosen": 0.8818607330322266, "rewards/margins": 0.011134028434753418, "rewards/rejected": 0.8707267045974731, "step": 953 }, { "epoch": 2.42, "learning_rate": 9.645795088464048e-09, "logits/chosen": -2.232996940612793, "logits/rejected": -2.242785692214966, "logps/chosen": -5.260503768920898, "logps/rejected": -4.21711540222168, "loss": 0.4879, "rewards/accuracies": 1.0, "rewards/chosen": 1.0526796579360962, "rewards/margins": 0.49008697271347046, "rewards/rejected": 0.5625926852226257, "step": 954 }, { "epoch": 2.42, "learning_rate": 9.565227536801134e-09, "logits/chosen": -2.3001556396484375, "logits/rejected": -2.3018300533294678, "logps/chosen": -1.4780985116958618, "logps/rejected": -2.9659290313720703, "loss": 0.6246, "rewards/accuracies": 1.0, "rewards/chosen": 0.8427436947822571, "rewards/margins": 0.3312244415283203, "rewards/rejected": 0.5115192532539368, "step": 955 }, { "epoch": 2.42, "learning_rate": 9.484962268623547e-09, "logits/chosen": -2.316647529602051, "logits/rejected": -2.455185890197754, "logps/chosen": -1.581041693687439, "logps/rejected": -13.98841667175293, "loss": 0.65, "rewards/accuracies": 1.0, "rewards/chosen": 0.7851170897483826, "rewards/margins": 0.06123781204223633, "rewards/rejected": 0.7238792777061462, "step": 956 }, { "epoch": 2.42, "learning_rate": 9.404999883980819e-09, "logits/chosen": -2.29158878326416, "logits/rejected": -2.276555299758911, "logps/chosen": -2.4086194038391113, "logps/rejected": -7.445014476776123, "loss": 0.6717, "rewards/accuracies": 0.0, "rewards/chosen": 0.5321805477142334, "rewards/margins": -0.513288140296936, "rewards/rejected": 1.0454686880111694, "step": 957 }, { "epoch": 2.43, "learning_rate": 9.325340980658147e-09, "logits/chosen": -2.2385082244873047, "logits/rejected": -2.239258050918579, "logps/chosen": -4.43653678894043, "logps/rejected": -3.3363897800445557, "loss": 0.6084, "rewards/accuracies": 1.0, "rewards/chosen": 0.7946736216545105, "rewards/margins": 0.14519506692886353, "rewards/rejected": 0.649478554725647, "step": 958 }, { "epoch": 2.43, "learning_rate": 9.245986154171914e-09, "logits/chosen": -2.2287824153900146, "logits/rejected": -2.231468677520752, "logps/chosen": -0.7654460072517395, "logps/rejected": -6.50847053527832, "loss": 0.5808, "rewards/accuracies": 1.0, "rewards/chosen": 0.8002601861953735, "rewards/margins": 0.5013002157211304, "rewards/rejected": 0.2989599406719208, "step": 959 }, { "epoch": 2.43, "learning_rate": 9.166935997765363e-09, "logits/chosen": -2.219212293624878, "logits/rejected": -2.2314679622650146, "logps/chosen": -0.8288359642028809, "logps/rejected": -5.717096328735352, "loss": 0.6494, "rewards/accuracies": 1.0, "rewards/chosen": 0.820813775062561, "rewards/margins": 0.15836620330810547, "rewards/rejected": 0.6624475717544556, "step": 960 }, { "epoch": 2.43, "learning_rate": 9.088191102403992e-09, "logits/chosen": -2.263113021850586, "logits/rejected": -2.2753162384033203, "logps/chosen": -6.794457912445068, "logps/rejected": -2.3302884101867676, "loss": 0.6254, "rewards/accuracies": 1.0, "rewards/chosen": 1.1118217706680298, "rewards/margins": 0.45091503858566284, "rewards/rejected": 0.6609067320823669, "step": 961 }, { "epoch": 2.44, "learning_rate": 9.009752056771258e-09, "logits/chosen": -2.2467920780181885, "logits/rejected": -2.2515170574188232, "logps/chosen": -1.4514563083648682, "logps/rejected": -1.1346790790557861, "loss": 0.7204, "rewards/accuracies": 1.0, "rewards/chosen": 0.8711681365966797, "rewards/margins": 0.2569734454154968, "rewards/rejected": 0.6141946911811829, "step": 962 }, { "epoch": 2.44, "learning_rate": 8.931619447264139e-09, "logits/chosen": -2.292536497116089, "logits/rejected": -2.285972833633423, "logps/chosen": -1.4521870613098145, "logps/rejected": -4.500589370727539, "loss": 0.6887, "rewards/accuracies": 0.0, "rewards/chosen": 0.7216106057167053, "rewards/margins": -0.08937603235244751, "rewards/rejected": 0.8109866380691528, "step": 963 }, { "epoch": 2.44, "learning_rate": 8.853793857988734e-09, "logits/chosen": -2.27873158454895, "logits/rejected": -2.3264148235321045, "logps/chosen": -0.8054472208023071, "logps/rejected": -6.209721565246582, "loss": 0.6777, "rewards/accuracies": 1.0, "rewards/chosen": 0.8566581606864929, "rewards/margins": 0.20312267541885376, "rewards/rejected": 0.6535354852676392, "step": 964 }, { "epoch": 2.44, "learning_rate": 8.776275870755923e-09, "logits/chosen": -2.276728630065918, "logits/rejected": -2.2679643630981445, "logps/chosen": -1.7194135189056396, "logps/rejected": -5.537649631500244, "loss": 0.6917, "rewards/accuracies": 0.0, "rewards/chosen": 0.6613977551460266, "rewards/margins": -0.14508593082427979, "rewards/rejected": 0.8064836859703064, "step": 965 }, { "epoch": 2.45, "learning_rate": 8.699066065077004e-09, "logits/chosen": -2.314042091369629, "logits/rejected": -2.3146965503692627, "logps/chosen": -1.7909899950027466, "logps/rejected": -3.252124547958374, "loss": 0.5573, "rewards/accuracies": 1.0, "rewards/chosen": 0.8108565211296082, "rewards/margins": 0.39087721705436707, "rewards/rejected": 0.4199793040752411, "step": 966 }, { "epoch": 2.45, "learning_rate": 8.622165018159355e-09, "logits/chosen": -2.234696388244629, "logits/rejected": -2.235637903213501, "logps/chosen": -4.78697395324707, "logps/rejected": -4.375446319580078, "loss": 0.6783, "rewards/accuracies": 1.0, "rewards/chosen": 0.8422343134880066, "rewards/margins": 0.35981759428977966, "rewards/rejected": 0.48241671919822693, "step": 967 }, { "epoch": 2.45, "learning_rate": 8.545573304902149e-09, "logits/chosen": -2.2177486419677734, "logits/rejected": -2.2235445976257324, "logps/chosen": -8.645162582397461, "logps/rejected": -3.0766639709472656, "loss": 0.4844, "rewards/accuracies": 1.0, "rewards/chosen": 1.2161273956298828, "rewards/margins": 0.5649592280387878, "rewards/rejected": 0.651168167591095, "step": 968 }, { "epoch": 2.45, "learning_rate": 8.469291497891978e-09, "logits/chosen": -2.194626569747925, "logits/rejected": -2.20223331451416, "logps/chosen": -3.1011037826538086, "logps/rejected": -7.359055042266846, "loss": 0.6846, "rewards/accuracies": 0.0, "rewards/chosen": 0.7644912004470825, "rewards/margins": -0.33977532386779785, "rewards/rejected": 1.1042665243148804, "step": 969 }, { "epoch": 2.46, "learning_rate": 8.39332016739867e-09, "logits/chosen": -2.1591408252716064, "logits/rejected": -2.1615166664123535, "logps/chosen": -1.4227077960968018, "logps/rejected": -7.652206897735596, "loss": 0.5622, "rewards/accuracies": 0.0, "rewards/chosen": 0.7565475702285767, "rewards/margins": -0.2592576742172241, "rewards/rejected": 1.0158052444458008, "step": 970 }, { "epoch": 2.46, "learning_rate": 8.31765988137102e-09, "logits/chosen": -2.1958260536193848, "logits/rejected": -2.1859824657440186, "logps/chosen": -2.592071294784546, "logps/rejected": -5.908250331878662, "loss": 0.6336, "rewards/accuracies": 0.0, "rewards/chosen": 0.7032591104507446, "rewards/margins": -0.04156976938247681, "rewards/rejected": 0.7448288798332214, "step": 971 }, { "epoch": 2.46, "learning_rate": 8.242311205432418e-09, "logits/chosen": -2.2750587463378906, "logits/rejected": -2.293727159500122, "logps/chosen": -4.4294023513793945, "logps/rejected": -3.7334442138671875, "loss": 0.4955, "rewards/accuracies": 1.0, "rewards/chosen": 1.1753824949264526, "rewards/margins": 0.6537737250328064, "rewards/rejected": 0.5216087698936462, "step": 972 }, { "epoch": 2.46, "learning_rate": 8.167274702876765e-09, "logits/chosen": -2.1605632305145264, "logits/rejected": -2.158985137939453, "logps/chosen": -4.030506610870361, "logps/rejected": -4.686598777770996, "loss": 0.6316, "rewards/accuracies": 1.0, "rewards/chosen": 0.7827487587928772, "rewards/margins": 0.39505621790885925, "rewards/rejected": 0.38769254088401794, "step": 973 }, { "epoch": 2.47, "learning_rate": 8.092550934664227e-09, "logits/chosen": -2.2256433963775635, "logits/rejected": -2.2291648387908936, "logps/chosen": -3.921919822692871, "logps/rejected": -4.032479286193848, "loss": 0.6628, "rewards/accuracies": 1.0, "rewards/chosen": 0.9029619097709656, "rewards/margins": 0.4701082110404968, "rewards/rejected": 0.43285369873046875, "step": 974 }, { "epoch": 2.47, "learning_rate": 8.01814045941696e-09, "logits/chosen": -2.327510118484497, "logits/rejected": -2.4267451763153076, "logps/chosen": -10.378884315490723, "logps/rejected": -25.638389587402344, "loss": 0.5751, "rewards/accuracies": 1.0, "rewards/chosen": 0.895108699798584, "rewards/margins": 0.48357534408569336, "rewards/rejected": 0.4115333557128906, "step": 975 }, { "epoch": 2.47, "learning_rate": 7.944043833415043e-09, "logits/chosen": -2.0908725261688232, "logits/rejected": -2.0693347454071045, "logps/chosen": -1.821333885192871, "logps/rejected": -3.8648085594177246, "loss": 0.678, "rewards/accuracies": 0.0, "rewards/chosen": 0.7471885085105896, "rewards/margins": -0.2846531271934509, "rewards/rejected": 1.0318416357040405, "step": 976 }, { "epoch": 2.47, "learning_rate": 7.870261610592256e-09, "logits/chosen": -2.2441277503967285, "logits/rejected": -2.315542459487915, "logps/chosen": -0.7060465216636658, "logps/rejected": -28.84638214111328, "loss": 0.4897, "rewards/accuracies": 1.0, "rewards/chosen": 0.7415560483932495, "rewards/margins": 0.4522843062877655, "rewards/rejected": 0.289271742105484, "step": 977 }, { "epoch": 2.48, "learning_rate": 7.796794342531949e-09, "logits/chosen": -2.165034055709839, "logits/rejected": -2.173696279525757, "logps/chosen": -0.9383249282836914, "logps/rejected": -6.099370002746582, "loss": 0.5482, "rewards/accuracies": 1.0, "rewards/chosen": 0.989539623260498, "rewards/margins": 0.5630956888198853, "rewards/rejected": 0.4264439642429352, "step": 978 }, { "epoch": 2.48, "learning_rate": 7.723642578462947e-09, "logits/chosen": -2.2332825660705566, "logits/rejected": -2.2153847217559814, "logps/chosen": -1.3943504095077515, "logps/rejected": -3.387753963470459, "loss": 0.7272, "rewards/accuracies": 0.0, "rewards/chosen": 0.6967706084251404, "rewards/margins": -0.2171626091003418, "rewards/rejected": 0.9139332175254822, "step": 979 }, { "epoch": 2.48, "learning_rate": 7.65080686525536e-09, "logits/chosen": -2.340893507003784, "logits/rejected": -2.3338749408721924, "logps/chosen": -2.1963186264038086, "logps/rejected": -5.206376552581787, "loss": 0.6961, "rewards/accuracies": 1.0, "rewards/chosen": 0.7503089904785156, "rewards/margins": 0.014097452163696289, "rewards/rejected": 0.7362115383148193, "step": 980 }, { "epoch": 2.48, "learning_rate": 7.578287747416639e-09, "logits/chosen": -2.1706602573394775, "logits/rejected": -2.1654157638549805, "logps/chosen": -0.6198164820671082, "logps/rejected": -7.641117095947266, "loss": 0.7547, "rewards/accuracies": 0.0, "rewards/chosen": 0.6041125655174255, "rewards/margins": -0.5819794535636902, "rewards/rejected": 1.1860920190811157, "step": 981 }, { "epoch": 2.49, "learning_rate": 7.506085767087384e-09, "logits/chosen": -2.2714974880218506, "logits/rejected": -2.335819721221924, "logps/chosen": -0.7842181921005249, "logps/rejected": -29.086933135986328, "loss": 0.6304, "rewards/accuracies": 1.0, "rewards/chosen": 0.7850021123886108, "rewards/margins": 0.5640556216239929, "rewards/rejected": 0.2209465056657791, "step": 982 }, { "epoch": 2.49, "learning_rate": 7.4342014640372875e-09, "logits/chosen": -2.302907705307007, "logits/rejected": -2.286099910736084, "logps/chosen": -0.9122976064682007, "logps/rejected": -6.932967185974121, "loss": 0.5405, "rewards/accuracies": 1.0, "rewards/chosen": 0.7406827807426453, "rewards/margins": 0.01100146770477295, "rewards/rejected": 0.7296813130378723, "step": 983 }, { "epoch": 2.49, "learning_rate": 7.3626353756612245e-09, "logits/chosen": -2.2449541091918945, "logits/rejected": -2.245927572250366, "logps/chosen": -1.2218672037124634, "logps/rejected": -2.1630780696868896, "loss": 0.5475, "rewards/accuracies": 1.0, "rewards/chosen": 0.8925542235374451, "rewards/margins": 0.23362791538238525, "rewards/rejected": 0.6589263081550598, "step": 984 }, { "epoch": 2.49, "learning_rate": 7.291388036975071e-09, "logits/chosen": -2.354844331741333, "logits/rejected": -2.4205949306488037, "logps/chosen": -2.129838466644287, "logps/rejected": -25.910266876220703, "loss": 0.6163, "rewards/accuracies": 1.0, "rewards/chosen": 0.7194430828094482, "rewards/margins": 0.2827092707157135, "rewards/rejected": 0.43673381209373474, "step": 985 }, { "epoch": 2.5, "learning_rate": 7.220459980611837e-09, "logits/chosen": -2.292114734649658, "logits/rejected": -2.3200666904449463, "logps/chosen": -2.706592321395874, "logps/rejected": -5.973620891571045, "loss": 0.6044, "rewards/accuracies": 1.0, "rewards/chosen": 0.730741024017334, "rewards/margins": 0.10212576389312744, "rewards/rejected": 0.6286152601242065, "step": 986 }, { "epoch": 2.5, "learning_rate": 7.149851736817608e-09, "logits/chosen": -2.296978712081909, "logits/rejected": -2.2899913787841797, "logps/chosen": -1.636295199394226, "logps/rejected": -7.5025129318237305, "loss": 0.8444, "rewards/accuracies": 0.0, "rewards/chosen": 0.6073744893074036, "rewards/margins": -0.3978796601295471, "rewards/rejected": 1.0052541494369507, "step": 987 }, { "epoch": 2.5, "learning_rate": 7.079563833447616e-09, "logits/chosen": -2.2760093212127686, "logits/rejected": -2.266831636428833, "logps/chosen": -1.6140741109848022, "logps/rejected": -9.566278457641602, "loss": 0.675, "rewards/accuracies": 0.0, "rewards/chosen": 0.7694916129112244, "rewards/margins": -0.07889610528945923, "rewards/rejected": 0.8483877182006836, "step": 988 }, { "epoch": 2.5, "learning_rate": 7.009596795962275e-09, "logits/chosen": -2.2266690731048584, "logits/rejected": -2.232175827026367, "logps/chosen": -6.639265060424805, "logps/rejected": -3.8093838691711426, "loss": 0.5764, "rewards/accuracies": 1.0, "rewards/chosen": 1.131039023399353, "rewards/margins": 0.5088719725608826, "rewards/rejected": 0.6221670508384705, "step": 989 }, { "epoch": 2.51, "learning_rate": 6.939951147423268e-09, "logits/chosen": -2.3172507286071777, "logits/rejected": -2.3218352794647217, "logps/chosen": -0.6256335973739624, "logps/rejected": -8.358719825744629, "loss": 0.6554, "rewards/accuracies": 0.0, "rewards/chosen": 0.70262610912323, "rewards/margins": -0.3652832508087158, "rewards/rejected": 1.0679093599319458, "step": 990 }, { "epoch": 2.51, "learning_rate": 6.8706274084896155e-09, "logits/chosen": -2.2595160007476807, "logits/rejected": -2.2625210285186768, "logps/chosen": -0.8444182872772217, "logps/rejected": -2.2003016471862793, "loss": 0.5839, "rewards/accuracies": 1.0, "rewards/chosen": 0.9070292711257935, "rewards/margins": 0.4168420433998108, "rewards/rejected": 0.49018722772598267, "step": 991 }, { "epoch": 2.51, "learning_rate": 6.801626097413815e-09, "logits/chosen": -2.3131072521209717, "logits/rejected": -2.311924695968628, "logps/chosen": -1.3334414958953857, "logps/rejected": -4.404955863952637, "loss": 0.6937, "rewards/accuracies": 0.0, "rewards/chosen": 0.6296387910842896, "rewards/margins": -0.30288809537887573, "rewards/rejected": 0.9325268864631653, "step": 992 }, { "epoch": 2.51, "learning_rate": 6.732947730037935e-09, "logits/chosen": -2.241238832473755, "logits/rejected": -2.2357425689697266, "logps/chosen": -2.3773233890533447, "logps/rejected": -3.00187611579895, "loss": 0.6988, "rewards/accuracies": 0.0, "rewards/chosen": 0.8257603049278259, "rewards/margins": -0.14897513389587402, "rewards/rejected": 0.9747354388237, "step": 993 }, { "epoch": 2.52, "learning_rate": 6.664592819789777e-09, "logits/chosen": -2.263425827026367, "logits/rejected": -2.2573277950286865, "logps/chosen": -6.007384777069092, "logps/rejected": -2.986701250076294, "loss": 0.5497, "rewards/accuracies": 1.0, "rewards/chosen": 0.821519672870636, "rewards/margins": 0.34514304995536804, "rewards/rejected": 0.47637662291526794, "step": 994 }, { "epoch": 2.52, "learning_rate": 6.596561877679035e-09, "logits/chosen": -2.300645589828491, "logits/rejected": -2.3263304233551025, "logps/chosen": -1.3911826610565186, "logps/rejected": -7.937463283538818, "loss": 0.7156, "rewards/accuracies": 1.0, "rewards/chosen": 0.9595452547073364, "rewards/margins": 0.5403754711151123, "rewards/rejected": 0.41916975378990173, "step": 995 }, { "epoch": 2.52, "learning_rate": 6.528855412293449e-09, "logits/chosen": -2.3292291164398193, "logits/rejected": -2.310650110244751, "logps/chosen": -7.206446170806885, "logps/rejected": -5.700892925262451, "loss": 0.5956, "rewards/accuracies": 1.0, "rewards/chosen": 0.8559461832046509, "rewards/margins": 0.010587334632873535, "rewards/rejected": 0.8453588485717773, "step": 996 }, { "epoch": 2.52, "learning_rate": 6.461473929795053e-09, "logits/chosen": -2.1834497451782227, "logits/rejected": -2.1819348335266113, "logps/chosen": -1.4716675281524658, "logps/rejected": -6.475998401641846, "loss": 0.6969, "rewards/accuracies": 0.0, "rewards/chosen": 0.6555556654930115, "rewards/margins": -0.25350672006607056, "rewards/rejected": 0.909062385559082, "step": 997 }, { "epoch": 2.53, "learning_rate": 6.394417933916374e-09, "logits/chosen": -2.193411111831665, "logits/rejected": -2.2418696880340576, "logps/chosen": -1.59909188747406, "logps/rejected": -13.864431381225586, "loss": 0.7171, "rewards/accuracies": 0.0, "rewards/chosen": 0.6588600873947144, "rewards/margins": -0.2872641682624817, "rewards/rejected": 0.946124255657196, "step": 998 }, { "epoch": 2.53, "learning_rate": 6.327687925956615e-09, "logits/chosen": -2.21540904045105, "logits/rejected": -2.2108943462371826, "logps/chosen": -1.7119669914245605, "logps/rejected": -5.7693867683410645, "loss": 0.6417, "rewards/accuracies": 0.0, "rewards/chosen": 0.6941158175468445, "rewards/margins": -0.062413573265075684, "rewards/rejected": 0.7565293908119202, "step": 999 }, { "epoch": 2.53, "learning_rate": 6.2612844047779775e-09, "logits/chosen": -2.264315366744995, "logits/rejected": -2.258441209793091, "logps/chosen": -3.3244435787200928, "logps/rejected": -4.788002967834473, "loss": 0.7079, "rewards/accuracies": 1.0, "rewards/chosen": 0.6874887347221375, "rewards/margins": 0.20930659770965576, "rewards/rejected": 0.4781821370124817, "step": 1000 }, { "epoch": 2.53, "learning_rate": 6.195207866801899e-09, "logits/chosen": -2.2621116638183594, "logits/rejected": -2.25476336479187, "logps/chosen": -1.5338077545166016, "logps/rejected": -4.510954856872559, "loss": 0.762, "rewards/accuracies": 0.0, "rewards/chosen": 0.7508653998374939, "rewards/margins": -0.2647170424461365, "rewards/rejected": 1.0155824422836304, "step": 1001 }, { "epoch": 2.54, "learning_rate": 6.12945880600535e-09, "logits/chosen": -2.2517805099487305, "logits/rejected": -2.255138397216797, "logps/chosen": -18.49143409729004, "logps/rejected": -12.136792182922363, "loss": 0.6837, "rewards/accuracies": 0.0, "rewards/chosen": 0.6307659149169922, "rewards/margins": -0.3667284846305847, "rewards/rejected": 0.9974943995475769, "step": 1002 }, { "epoch": 2.54, "learning_rate": 6.06403771391713e-09, "logits/chosen": -2.2214772701263428, "logits/rejected": -2.207829236984253, "logps/chosen": -1.0877199172973633, "logps/rejected": -6.622659206390381, "loss": 0.737, "rewards/accuracies": 0.0, "rewards/chosen": 0.7061384320259094, "rewards/margins": -0.15965592861175537, "rewards/rejected": 0.8657943606376648, "step": 1003 }, { "epoch": 2.54, "learning_rate": 5.998945079614199e-09, "logits/chosen": -2.2357890605926514, "logits/rejected": -2.246567726135254, "logps/chosen": -1.8109253644943237, "logps/rejected": -3.9823660850524902, "loss": 0.6746, "rewards/accuracies": 1.0, "rewards/chosen": 0.8034787178039551, "rewards/margins": 0.3412412106990814, "rewards/rejected": 0.46223750710487366, "step": 1004 }, { "epoch": 2.54, "learning_rate": 5.9341813897180295e-09, "logits/chosen": -2.273313283920288, "logits/rejected": -2.26743745803833, "logps/chosen": -2.6931259632110596, "logps/rejected": -4.900378704071045, "loss": 0.6576, "rewards/accuracies": 0.0, "rewards/chosen": 0.7226996421813965, "rewards/margins": -0.10455429553985596, "rewards/rejected": 0.8272539377212524, "step": 1005 }, { "epoch": 2.55, "learning_rate": 5.869747128390961e-09, "logits/chosen": -2.1792235374450684, "logits/rejected": -2.1730146408081055, "logps/chosen": -2.1184699535369873, "logps/rejected": -7.4009270668029785, "loss": 0.6695, "rewards/accuracies": 0.0, "rewards/chosen": 0.5703205466270447, "rewards/margins": -0.23959922790527344, "rewards/rejected": 0.8099197745323181, "step": 1006 }, { "epoch": 2.55, "learning_rate": 5.805642777332559e-09, "logits/chosen": -2.259801149368286, "logits/rejected": -2.3183412551879883, "logps/chosen": -1.1283164024353027, "logps/rejected": -6.790138244628906, "loss": 0.5824, "rewards/accuracies": 1.0, "rewards/chosen": 0.8292347192764282, "rewards/margins": 0.3421896994113922, "rewards/rejected": 0.487045019865036, "step": 1007 }, { "epoch": 2.55, "learning_rate": 5.741868815776079e-09, "logits/chosen": -2.18332839012146, "logits/rejected": -2.175600290298462, "logps/chosen": -2.181260108947754, "logps/rejected": -3.0247793197631836, "loss": 0.6708, "rewards/accuracies": 0.0, "rewards/chosen": 0.7882691621780396, "rewards/margins": -0.17480188608169556, "rewards/rejected": 0.9630710482597351, "step": 1008 }, { "epoch": 2.55, "learning_rate": 5.678425720484814e-09, "logits/chosen": -2.2441468238830566, "logits/rejected": -2.238487958908081, "logps/chosen": -1.6458126306533813, "logps/rejected": -4.946414947509766, "loss": 0.5872, "rewards/accuracies": 0.0, "rewards/chosen": 0.7699817419052124, "rewards/margins": -0.12260496616363525, "rewards/rejected": 0.8925867080688477, "step": 1009 }, { "epoch": 2.56, "learning_rate": 5.6153139657485305e-09, "logits/chosen": -2.295301675796509, "logits/rejected": -2.301226854324341, "logps/chosen": -1.6731997728347778, "logps/rejected": -3.665823459625244, "loss": 0.5993, "rewards/accuracies": 1.0, "rewards/chosen": 0.9586867690086365, "rewards/margins": 0.4068378210067749, "rewards/rejected": 0.5518489480018616, "step": 1010 }, { "epoch": 2.56, "learning_rate": 5.552534023380023e-09, "logits/chosen": -2.255643367767334, "logits/rejected": -2.286583185195923, "logps/chosen": -21.24981689453125, "logps/rejected": -15.552163124084473, "loss": 0.5767, "rewards/accuracies": 1.0, "rewards/chosen": 0.7166553735733032, "rewards/margins": 0.05193185806274414, "rewards/rejected": 0.6647235155105591, "step": 1011 }, { "epoch": 2.56, "learning_rate": 5.490086362711433e-09, "logits/chosen": -2.210249423980713, "logits/rejected": -2.2269952297210693, "logps/chosen": -5.225412845611572, "logps/rejected": -3.7991080284118652, "loss": 0.5844, "rewards/accuracies": 1.0, "rewards/chosen": 1.0747722387313843, "rewards/margins": 0.4024495482444763, "rewards/rejected": 0.672322690486908, "step": 1012 }, { "epoch": 2.56, "learning_rate": 5.427971450590868e-09, "logits/chosen": -2.2314183712005615, "logits/rejected": -2.214553117752075, "logps/chosen": -4.857758045196533, "logps/rejected": -5.022180080413818, "loss": 0.76, "rewards/accuracies": 0.0, "rewards/chosen": 0.5085108280181885, "rewards/margins": -0.4751407504081726, "rewards/rejected": 0.9836515784263611, "step": 1013 }, { "epoch": 2.57, "learning_rate": 5.366189751378858e-09, "logits/chosen": -2.20629620552063, "logits/rejected": -2.1977880001068115, "logps/chosen": -2.3848283290863037, "logps/rejected": -8.523614883422852, "loss": 0.622, "rewards/accuracies": 0.0, "rewards/chosen": 0.597952663898468, "rewards/margins": -0.2442251443862915, "rewards/rejected": 0.8421778082847595, "step": 1014 }, { "epoch": 2.57, "learning_rate": 5.304741726944872e-09, "logits/chosen": -2.2501959800720215, "logits/rejected": -2.2475602626800537, "logps/chosen": -2.5101449489593506, "logps/rejected": -13.508455276489258, "loss": 0.5807, "rewards/accuracies": 0.0, "rewards/chosen": 0.8718706369400024, "rewards/margins": -0.0544513463973999, "rewards/rejected": 0.9263219833374023, "step": 1015 }, { "epoch": 2.57, "learning_rate": 5.243627836663906e-09, "logits/chosen": -2.230863571166992, "logits/rejected": -2.295661211013794, "logps/chosen": -3.0934441089630127, "logps/rejected": -23.203136444091797, "loss": 0.5214, "rewards/accuracies": 1.0, "rewards/chosen": 0.8002228736877441, "rewards/margins": 0.8891991376876831, "rewards/rejected": -0.08897628635168076, "step": 1016 }, { "epoch": 2.57, "learning_rate": 5.182848537413009e-09, "logits/chosen": -2.138970375061035, "logits/rejected": -2.15055775642395, "logps/chosen": -5.619922637939453, "logps/rejected": -4.309525012969971, "loss": 0.6269, "rewards/accuracies": 1.0, "rewards/chosen": 1.0674152374267578, "rewards/margins": 0.44469618797302246, "rewards/rejected": 0.6227190494537354, "step": 1017 }, { "epoch": 2.58, "learning_rate": 5.1224042835678885e-09, "logits/chosen": -2.2172341346740723, "logits/rejected": -2.2218384742736816, "logps/chosen": -1.9655081033706665, "logps/rejected": -5.247321128845215, "loss": 0.5528, "rewards/accuracies": 1.0, "rewards/chosen": 0.9499692320823669, "rewards/margins": 0.5206705331802368, "rewards/rejected": 0.4292986989021301, "step": 1018 }, { "epoch": 2.58, "learning_rate": 5.062295526999522e-09, "logits/chosen": -2.242703676223755, "logits/rejected": -2.360018253326416, "logps/chosen": -0.8972992300987244, "logps/rejected": -17.854488372802734, "loss": 0.6166, "rewards/accuracies": 1.0, "rewards/chosen": 0.7154859304428101, "rewards/margins": 0.30864980816841125, "rewards/rejected": 0.4068361222743988, "step": 1019 }, { "epoch": 2.58, "learning_rate": 5.002522717070751e-09, "logits/chosen": -2.2711410522460938, "logits/rejected": -2.263277292251587, "logps/chosen": -6.067018508911133, "logps/rejected": -10.721774101257324, "loss": 0.6217, "rewards/accuracies": 1.0, "rewards/chosen": 0.8142135739326477, "rewards/margins": 0.43927499651908875, "rewards/rejected": 0.37493857741355896, "step": 1020 }, { "epoch": 2.58, "learning_rate": 4.943086300632921e-09, "logits/chosen": -2.186368942260742, "logits/rejected": -2.190232038497925, "logps/chosen": -4.649319648742676, "logps/rejected": -1.9743592739105225, "loss": 0.6717, "rewards/accuracies": 1.0, "rewards/chosen": 0.718334972858429, "rewards/margins": 0.03418320417404175, "rewards/rejected": 0.6841517686843872, "step": 1021 }, { "epoch": 2.59, "learning_rate": 4.883986722022609e-09, "logits/chosen": -2.2558038234710693, "logits/rejected": -2.2518362998962402, "logps/chosen": -1.400003433227539, "logps/rejected": -3.2240586280822754, "loss": 0.5798, "rewards/accuracies": 1.0, "rewards/chosen": 0.7345497012138367, "rewards/margins": 0.22802257537841797, "rewards/rejected": 0.5065271258354187, "step": 1022 }, { "epoch": 2.59, "learning_rate": 4.8252244230581995e-09, "logits/chosen": -2.253950834274292, "logits/rejected": -2.258204221725464, "logps/chosen": -2.3124144077301025, "logps/rejected": -3.8955843448638916, "loss": 0.6286, "rewards/accuracies": 0.0, "rewards/chosen": 0.6624904870986938, "rewards/margins": -0.1537519097328186, "rewards/rejected": 0.8162423968315125, "step": 1023 }, { "epoch": 2.59, "learning_rate": 4.766799843036651e-09, "logits/chosen": -2.287260055541992, "logits/rejected": -2.297297716140747, "logps/chosen": -5.186746120452881, "logps/rejected": -3.5218634605407715, "loss": 0.6889, "rewards/accuracies": 0.0, "rewards/chosen": 0.753389298915863, "rewards/margins": -0.027840912342071533, "rewards/rejected": 0.7812302112579346, "step": 1024 }, { "epoch": 2.59, "learning_rate": 4.7087134187302094e-09, "logits/chosen": -2.276024580001831, "logits/rejected": -2.2719767093658447, "logps/chosen": -1.1305162906646729, "logps/rejected": -4.708954334259033, "loss": 0.5926, "rewards/accuracies": 1.0, "rewards/chosen": 0.813755989074707, "rewards/margins": 0.3772863745689392, "rewards/rejected": 0.4364696145057678, "step": 1025 }, { "epoch": 2.6, "learning_rate": 4.650965584383082e-09, "logits/chosen": -2.142771005630493, "logits/rejected": -2.1224870681762695, "logps/chosen": -4.390654563903809, "logps/rejected": -2.482484817504883, "loss": 0.6508, "rewards/accuracies": 1.0, "rewards/chosen": 0.8836144804954529, "rewards/margins": 0.22295105457305908, "rewards/rejected": 0.6606634259223938, "step": 1026 }, { "epoch": 2.6, "learning_rate": 4.593556771708279e-09, "logits/chosen": -2.2339794635772705, "logits/rejected": -2.3844504356384277, "logps/chosen": -1.7439887523651123, "logps/rejected": -35.229736328125, "loss": 0.5155, "rewards/accuracies": 1.0, "rewards/chosen": 0.8305705189704895, "rewards/margins": 0.469187468290329, "rewards/rejected": 0.3613830506801605, "step": 1027 }, { "epoch": 2.6, "learning_rate": 4.536487409884327e-09, "logits/chosen": -2.25423526763916, "logits/rejected": -2.234968662261963, "logps/chosen": -2.791952133178711, "logps/rejected": -11.253215789794922, "loss": 0.6241, "rewards/accuracies": 0.0, "rewards/chosen": 0.631955623626709, "rewards/margins": -0.3492676615715027, "rewards/rejected": 0.9812232851982117, "step": 1028 }, { "epoch": 2.61, "learning_rate": 4.479757925552058e-09, "logits/chosen": -2.259329080581665, "logits/rejected": -2.2557148933410645, "logps/chosen": -1.625824213027954, "logps/rejected": -5.127805233001709, "loss": 0.5546, "rewards/accuracies": 1.0, "rewards/chosen": 0.9418631792068481, "rewards/margins": 0.16173595190048218, "rewards/rejected": 0.780127227306366, "step": 1029 }, { "epoch": 2.61, "learning_rate": 4.423368742811467e-09, "logits/chosen": -2.2632861137390137, "logits/rejected": -2.2620584964752197, "logps/chosen": -3.980764150619507, "logps/rejected": -4.567049980163574, "loss": 0.6601, "rewards/accuracies": 0.0, "rewards/chosen": 0.7043933868408203, "rewards/margins": -0.3551534414291382, "rewards/rejected": 1.0595468282699585, "step": 1030 }, { "epoch": 2.61, "learning_rate": 4.3673202832184954e-09, "logits/chosen": -2.3106303215026855, "logits/rejected": -2.3350110054016113, "logps/chosen": -5.838846206665039, "logps/rejected": -6.989467144012451, "loss": 0.7094, "rewards/accuracies": 0.0, "rewards/chosen": 0.6605499386787415, "rewards/margins": -0.3195171356201172, "rewards/rejected": 0.9800670742988586, "step": 1031 }, { "epoch": 2.61, "learning_rate": 4.311612965781902e-09, "logits/chosen": -2.2371466159820557, "logits/rejected": -2.2301297187805176, "logps/chosen": -1.0596197843551636, "logps/rejected": -11.051597595214844, "loss": 0.5253, "rewards/accuracies": 1.0, "rewards/chosen": 0.7377830743789673, "rewards/margins": 0.20804041624069214, "rewards/rejected": 0.5297426581382751, "step": 1032 }, { "epoch": 2.62, "learning_rate": 4.256247206960123e-09, "logits/chosen": -2.1989293098449707, "logits/rejected": -2.192840337753296, "logps/chosen": -7.27534294128418, "logps/rejected": -5.717670917510986, "loss": 0.6118, "rewards/accuracies": 1.0, "rewards/chosen": 0.678862988948822, "rewards/margins": 0.33239027857780457, "rewards/rejected": 0.34647271037101746, "step": 1033 }, { "epoch": 2.62, "learning_rate": 4.201223420658134e-09, "logits/chosen": -2.3361523151397705, "logits/rejected": -2.3288590908050537, "logps/chosen": -2.852961540222168, "logps/rejected": -8.736576080322266, "loss": 0.6535, "rewards/accuracies": 0.0, "rewards/chosen": 0.5198332071304321, "rewards/margins": -0.1756874918937683, "rewards/rejected": 0.6955206990242004, "step": 1034 }, { "epoch": 2.62, "learning_rate": 4.146542018224447e-09, "logits/chosen": -2.289262533187866, "logits/rejected": -2.3040857315063477, "logps/chosen": -0.5384100675582886, "logps/rejected": -13.5481595993042, "loss": 0.6754, "rewards/accuracies": 0.0, "rewards/chosen": 0.6259229779243469, "rewards/margins": -0.21201545000076294, "rewards/rejected": 0.8379384279251099, "step": 1035 }, { "epoch": 2.62, "learning_rate": 4.092203408447914e-09, "logits/chosen": -2.2515869140625, "logits/rejected": -2.255581855773926, "logps/chosen": -2.612818956375122, "logps/rejected": -5.035745143890381, "loss": 0.7536, "rewards/accuracies": 0.0, "rewards/chosen": 0.7063077092170715, "rewards/margins": -0.22700011730194092, "rewards/rejected": 0.9333078265190125, "step": 1036 }, { "epoch": 2.63, "learning_rate": 4.038207997554738e-09, "logits/chosen": -2.2240641117095947, "logits/rejected": -2.2248423099517822, "logps/chosen": -1.1393296718597412, "logps/rejected": -3.5643844604492188, "loss": 0.6861, "rewards/accuracies": 1.0, "rewards/chosen": 0.7699010968208313, "rewards/margins": 0.29219111800193787, "rewards/rejected": 0.47770997881889343, "step": 1037 }, { "epoch": 2.63, "learning_rate": 3.98455618920544e-09, "logits/chosen": -2.3114895820617676, "logits/rejected": -2.3209753036499023, "logps/chosen": -1.4597830772399902, "logps/rejected": -5.564828872680664, "loss": 0.509, "rewards/accuracies": 1.0, "rewards/chosen": 0.7835986018180847, "rewards/margins": 0.4078218340873718, "rewards/rejected": 0.3757767677307129, "step": 1038 }, { "epoch": 2.63, "learning_rate": 3.931248384491814e-09, "logits/chosen": -2.201791524887085, "logits/rejected": -2.201159954071045, "logps/chosen": -1.1004045009613037, "logps/rejected": -12.528162002563477, "loss": 0.6607, "rewards/accuracies": 0.0, "rewards/chosen": 0.6339018940925598, "rewards/margins": -0.2970539927482605, "rewards/rejected": 0.9309558868408203, "step": 1039 }, { "epoch": 2.63, "learning_rate": 3.878284981933949e-09, "logits/chosen": -2.29612398147583, "logits/rejected": -2.3284871578216553, "logps/chosen": -4.179084777832031, "logps/rejected": -9.355860710144043, "loss": 0.7261, "rewards/accuracies": 1.0, "rewards/chosen": 0.9815740585327148, "rewards/margins": 0.11058032512664795, "rewards/rejected": 0.8709937334060669, "step": 1040 }, { "epoch": 2.64, "learning_rate": 3.825666377477238e-09, "logits/chosen": -2.216209888458252, "logits/rejected": -2.213120460510254, "logps/chosen": -2.466022253036499, "logps/rejected": -7.4763078689575195, "loss": 0.5816, "rewards/accuracies": 1.0, "rewards/chosen": 0.9693626761436462, "rewards/margins": 0.10340547561645508, "rewards/rejected": 0.8659572005271912, "step": 1041 }, { "epoch": 2.64, "learning_rate": 3.773392964489425e-09, "logits/chosen": -2.173217296600342, "logits/rejected": -2.177381753921509, "logps/chosen": -1.120866060256958, "logps/rejected": -3.861410617828369, "loss": 0.6522, "rewards/accuracies": 1.0, "rewards/chosen": 0.7690275311470032, "rewards/margins": 0.3268356919288635, "rewards/rejected": 0.44219183921813965, "step": 1042 }, { "epoch": 2.64, "learning_rate": 3.721465133757662e-09, "logits/chosen": -2.092402696609497, "logits/rejected": -2.101708173751831, "logps/chosen": -2.830930709838867, "logps/rejected": -6.9749250411987305, "loss": 0.6112, "rewards/accuracies": 0.0, "rewards/chosen": 0.43866705894470215, "rewards/margins": -0.43050897121429443, "rewards/rejected": 0.8691760301589966, "step": 1043 }, { "epoch": 2.64, "learning_rate": 3.6698832734855745e-09, "logits/chosen": -2.249032735824585, "logits/rejected": -2.2472236156463623, "logps/chosen": -1.931663990020752, "logps/rejected": -6.33540153503418, "loss": 0.6939, "rewards/accuracies": 0.0, "rewards/chosen": 0.645791232585907, "rewards/margins": -0.2831520438194275, "rewards/rejected": 0.9289432764053345, "step": 1044 }, { "epoch": 2.65, "learning_rate": 3.618647769290395e-09, "logits/chosen": -2.2272300720214844, "logits/rejected": -2.210552453994751, "logps/chosen": -1.6775885820388794, "logps/rejected": -6.770359992980957, "loss": 0.6622, "rewards/accuracies": 0.0, "rewards/chosen": 0.6794226765632629, "rewards/margins": -0.21846020221710205, "rewards/rejected": 0.897882878780365, "step": 1045 }, { "epoch": 2.65, "learning_rate": 3.567759004200027e-09, "logits/chosen": -2.286591053009033, "logits/rejected": -2.293807029724121, "logps/chosen": -2.025826930999756, "logps/rejected": -20.38433074951172, "loss": 0.6189, "rewards/accuracies": 0.0, "rewards/chosen": 0.6684513688087463, "rewards/margins": -0.0863141417503357, "rewards/rejected": 0.754765510559082, "step": 1046 }, { "epoch": 2.65, "learning_rate": 3.517217358650254e-09, "logits/chosen": -2.257040500640869, "logits/rejected": -2.2518951892852783, "logps/chosen": -3.4161276817321777, "logps/rejected": -3.446870803833008, "loss": 0.7251, "rewards/accuracies": 0.0, "rewards/chosen": 0.8006700873374939, "rewards/margins": -0.14464974403381348, "rewards/rejected": 0.9453198313713074, "step": 1047 }, { "epoch": 2.65, "learning_rate": 3.4670232104817896e-09, "logits/chosen": -2.3164355754852295, "logits/rejected": -2.322834014892578, "logps/chosen": -1.8476457595825195, "logps/rejected": -2.0507009029388428, "loss": 0.64, "rewards/accuracies": 1.0, "rewards/chosen": 0.9438688158988953, "rewards/margins": 0.40060585737228394, "rewards/rejected": 0.5432629585266113, "step": 1048 }, { "epoch": 2.66, "learning_rate": 3.4171769349375876e-09, "logits/chosen": -2.2261099815368652, "logits/rejected": -2.225865364074707, "logps/chosen": -7.220394134521484, "logps/rejected": -3.208721160888672, "loss": 0.5083, "rewards/accuracies": 1.0, "rewards/chosen": 1.0629745721817017, "rewards/margins": 0.4105701446533203, "rewards/rejected": 0.6524044275283813, "step": 1049 }, { "epoch": 2.66, "learning_rate": 3.367678904659904e-09, "logits/chosen": -2.353691339492798, "logits/rejected": -2.352841854095459, "logps/chosen": -0.6349361538887024, "logps/rejected": -9.106497764587402, "loss": 0.6342, "rewards/accuracies": 0.0, "rewards/chosen": 0.7038758397102356, "rewards/margins": -0.13452798128128052, "rewards/rejected": 0.8384038209915161, "step": 1050 }, { "epoch": 2.66, "learning_rate": 3.318529489687605e-09, "logits/chosen": -2.189073085784912, "logits/rejected": -2.3455097675323486, "logps/chosen": -1.7261013984680176, "logps/rejected": -15.547879219055176, "loss": 0.6941, "rewards/accuracies": 1.0, "rewards/chosen": 0.8265752792358398, "rewards/margins": 0.19028043746948242, "rewards/rejected": 0.6362948417663574, "step": 1051 }, { "epoch": 2.66, "learning_rate": 3.2697290574533855e-09, "logits/chosen": -2.207228660583496, "logits/rejected": -2.233941078186035, "logps/chosen": -2.7114076614379883, "logps/rejected": -12.499275207519531, "loss": 0.6241, "rewards/accuracies": 1.0, "rewards/chosen": 0.8702051043510437, "rewards/margins": 0.6293258666992188, "rewards/rejected": 0.24087925255298615, "step": 1052 }, { "epoch": 2.67, "learning_rate": 3.2212779727809503e-09, "logits/chosen": -2.187870979309082, "logits/rejected": -2.2020862102508545, "logps/chosen": -0.7330828905105591, "logps/rejected": -18.000341415405273, "loss": 0.6719, "rewards/accuracies": 1.0, "rewards/chosen": 0.7919250726699829, "rewards/margins": 0.038193702697753906, "rewards/rejected": 0.753731369972229, "step": 1053 }, { "epoch": 2.67, "learning_rate": 3.1731765978823876e-09, "logits/chosen": -2.286020517349243, "logits/rejected": -2.285202980041504, "logps/chosen": -2.350956678390503, "logps/rejected": -2.9727017879486084, "loss": 0.7301, "rewards/accuracies": 0.0, "rewards/chosen": 0.6311101317405701, "rewards/margins": -0.1848992109298706, "rewards/rejected": 0.8160093426704407, "step": 1054 }, { "epoch": 2.67, "learning_rate": 3.1254252923553992e-09, "logits/chosen": -2.1990561485290527, "logits/rejected": -2.204160690307617, "logps/chosen": -3.296710729598999, "logps/rejected": -5.3487653732299805, "loss": 0.6128, "rewards/accuracies": 1.0, "rewards/chosen": 0.9325447082519531, "rewards/margins": 0.28157681226730347, "rewards/rejected": 0.6509678959846497, "step": 1055 }, { "epoch": 2.67, "learning_rate": 3.0780244131806187e-09, "logits/chosen": -2.1811985969543457, "logits/rejected": -2.1833739280700684, "logps/chosen": -3.3649659156799316, "logps/rejected": -4.211127281188965, "loss": 0.6281, "rewards/accuracies": 1.0, "rewards/chosen": 0.7861748337745667, "rewards/margins": 0.26985639333724976, "rewards/rejected": 0.5163184404373169, "step": 1056 }, { "epoch": 2.68, "learning_rate": 3.03097431471897e-09, "logits/chosen": -2.2686946392059326, "logits/rejected": -2.273437976837158, "logps/chosen": -7.459412574768066, "logps/rejected": -4.94359016418457, "loss": 0.6576, "rewards/accuracies": 1.0, "rewards/chosen": 1.2671908140182495, "rewards/margins": 0.6007440090179443, "rewards/rejected": 0.6664468050003052, "step": 1057 }, { "epoch": 2.68, "learning_rate": 2.984275348708992e-09, "logits/chosen": -2.3168675899505615, "logits/rejected": -2.4313881397247314, "logps/chosen": -3.411985397338867, "logps/rejected": -18.16241455078125, "loss": 0.6446, "rewards/accuracies": 1.0, "rewards/chosen": 0.694909393787384, "rewards/margins": 0.25383082032203674, "rewards/rejected": 0.4410785734653473, "step": 1058 }, { "epoch": 2.68, "learning_rate": 2.937927864264206e-09, "logits/chosen": -2.2326087951660156, "logits/rejected": -2.4106831550598145, "logps/chosen": -1.558834433555603, "logps/rejected": -22.950897216796875, "loss": 0.5794, "rewards/accuracies": 1.0, "rewards/chosen": 0.7869580984115601, "rewards/margins": 0.593602180480957, "rewards/rejected": 0.19335594773292542, "step": 1059 }, { "epoch": 2.68, "learning_rate": 2.8919322078705456e-09, "logits/chosen": -2.341738224029541, "logits/rejected": -2.358199119567871, "logps/chosen": -6.882248878479004, "logps/rejected": -3.9704535007476807, "loss": 0.8206, "rewards/accuracies": 0.0, "rewards/chosen": 0.707176685333252, "rewards/margins": -0.006982743740081787, "rewards/rejected": 0.7141594290733337, "step": 1060 }, { "epoch": 2.69, "learning_rate": 2.846288723383694e-09, "logits/chosen": -2.2138092517852783, "logits/rejected": -2.221980333328247, "logps/chosen": -5.092818260192871, "logps/rejected": -5.391007423400879, "loss": 0.6459, "rewards/accuracies": 1.0, "rewards/chosen": 1.2145352363586426, "rewards/margins": 0.760568380355835, "rewards/rejected": 0.45396682620048523, "step": 1061 }, { "epoch": 2.69, "learning_rate": 2.800997752026596e-09, "logits/chosen": -2.2432382106781006, "logits/rejected": -2.245892286300659, "logps/chosen": -0.9355766177177429, "logps/rejected": -3.6652581691741943, "loss": 0.6345, "rewards/accuracies": 1.0, "rewards/chosen": 0.8201517462730408, "rewards/margins": 0.39112094044685364, "rewards/rejected": 0.42903080582618713, "step": 1062 }, { "epoch": 2.69, "learning_rate": 2.7560596323868647e-09, "logits/chosen": -2.2431960105895996, "logits/rejected": -2.316150188446045, "logps/chosen": -2.1642446517944336, "logps/rejected": -24.168365478515625, "loss": 0.6433, "rewards/accuracies": 1.0, "rewards/chosen": 0.8183565139770508, "rewards/margins": 0.014632582664489746, "rewards/rejected": 0.803723931312561, "step": 1063 }, { "epoch": 2.69, "learning_rate": 2.7114747004142237e-09, "logits/chosen": -2.2443437576293945, "logits/rejected": -2.254077196121216, "logps/chosen": -1.963338851928711, "logps/rejected": -3.9089653491973877, "loss": 0.6099, "rewards/accuracies": 1.0, "rewards/chosen": 0.8388769030570984, "rewards/margins": 0.24645757675170898, "rewards/rejected": 0.5924193263053894, "step": 1064 }, { "epoch": 2.7, "learning_rate": 2.6672432894180585e-09, "logits/chosen": -2.244685411453247, "logits/rejected": -2.24981689453125, "logps/chosen": -5.069785118103027, "logps/rejected": -5.420602798461914, "loss": 0.513, "rewards/accuracies": 1.0, "rewards/chosen": 1.034666895866394, "rewards/margins": 0.6185585260391235, "rewards/rejected": 0.4161083400249481, "step": 1065 }, { "epoch": 2.7, "learning_rate": 2.623365730064869e-09, "logits/chosen": -2.197307825088501, "logits/rejected": -2.198810338973999, "logps/chosen": -1.22565495967865, "logps/rejected": -2.894665479660034, "loss": 0.6118, "rewards/accuracies": 1.0, "rewards/chosen": 0.7945056557655334, "rewards/margins": 0.3117577135562897, "rewards/rejected": 0.4827479422092438, "step": 1066 }, { "epoch": 2.7, "learning_rate": 2.579842350375838e-09, "logits/chosen": -2.221299648284912, "logits/rejected": -2.227573871612549, "logps/chosen": -1.860198974609375, "logps/rejected": -3.525787830352783, "loss": 0.6294, "rewards/accuracies": 1.0, "rewards/chosen": 0.9017065167427063, "rewards/margins": 0.33178699016571045, "rewards/rejected": 0.5699195265769958, "step": 1067 }, { "epoch": 2.7, "learning_rate": 2.536673475724349e-09, "logits/chosen": -2.3128466606140137, "logits/rejected": -2.3234264850616455, "logps/chosen": -11.544952392578125, "logps/rejected": -2.8976666927337646, "loss": 0.5327, "rewards/accuracies": 1.0, "rewards/chosen": 0.8438013195991516, "rewards/margins": 0.23640888929367065, "rewards/rejected": 0.607392430305481, "step": 1068 }, { "epoch": 2.71, "learning_rate": 2.4938594288335724e-09, "logits/chosen": -2.2275447845458984, "logits/rejected": -2.291869640350342, "logps/chosen": -6.1075758934021, "logps/rejected": -14.296764373779297, "loss": 0.5666, "rewards/accuracies": 1.0, "rewards/chosen": 0.8304216265678406, "rewards/margins": 0.5916831493377686, "rewards/rejected": 0.23873844742774963, "step": 1069 }, { "epoch": 2.71, "learning_rate": 2.4514005297740446e-09, "logits/chosen": -2.2966558933258057, "logits/rejected": -2.296983242034912, "logps/chosen": -1.5962797403335571, "logps/rejected": -12.142393112182617, "loss": 0.5785, "rewards/accuracies": 1.0, "rewards/chosen": 0.719418466091156, "rewards/margins": 0.34958985447883606, "rewards/rejected": 0.36982861161231995, "step": 1070 }, { "epoch": 2.71, "learning_rate": 2.409297095961288e-09, "logits/chosen": -2.2364542484283447, "logits/rejected": -2.2424123287200928, "logps/chosen": -3.059439182281494, "logps/rejected": -4.108352184295654, "loss": 0.5672, "rewards/accuracies": 1.0, "rewards/chosen": 0.8292377591133118, "rewards/margins": 0.3540360927581787, "rewards/rejected": 0.47520166635513306, "step": 1071 }, { "epoch": 2.71, "learning_rate": 2.3675494421533883e-09, "logits/chosen": -2.274636745452881, "logits/rejected": -2.336313486099243, "logps/chosen": -0.8266770839691162, "logps/rejected": -30.0804386138916, "loss": 0.5915, "rewards/accuracies": 1.0, "rewards/chosen": 0.7042301297187805, "rewards/margins": 0.3036420941352844, "rewards/rejected": 0.4005880355834961, "step": 1072 }, { "epoch": 2.72, "learning_rate": 2.3261578804487312e-09, "logits/chosen": -2.3174643516540527, "logits/rejected": -2.3118677139282227, "logps/chosen": -1.498014211654663, "logps/rejected": -2.0736753940582275, "loss": 0.7437, "rewards/accuracies": 0.0, "rewards/chosen": 0.6968507766723633, "rewards/margins": -0.29239845275878906, "rewards/rejected": 0.9892492294311523, "step": 1073 }, { "epoch": 2.72, "learning_rate": 2.2851227202836e-09, "logits/chosen": -2.2255983352661133, "logits/rejected": -2.198326826095581, "logps/chosen": -3.5545153617858887, "logps/rejected": -7.542411804199219, "loss": 0.6834, "rewards/accuracies": 0.0, "rewards/chosen": 0.6426794528961182, "rewards/margins": -0.2083989977836609, "rewards/rejected": 0.851078450679779, "step": 1074 }, { "epoch": 2.72, "learning_rate": 2.244444268429857e-09, "logits/chosen": -2.336630344390869, "logits/rejected": -2.353591203689575, "logps/chosen": -0.7967427968978882, "logps/rejected": -7.932551860809326, "loss": 0.6031, "rewards/accuracies": 1.0, "rewards/chosen": 0.7621904015541077, "rewards/margins": 0.34676799178123474, "rewards/rejected": 0.4154224097728729, "step": 1075 }, { "epoch": 2.72, "learning_rate": 2.2041228289927103e-09, "logits/chosen": -2.3489925861358643, "logits/rejected": -2.339294672012329, "logps/chosen": -1.4285507202148438, "logps/rejected": -5.909711837768555, "loss": 0.6171, "rewards/accuracies": 1.0, "rewards/chosen": 0.6847942471504211, "rewards/margins": 0.04282200336456299, "rewards/rejected": 0.6419722437858582, "step": 1076 }, { "epoch": 2.73, "learning_rate": 2.164158703408375e-09, "logits/chosen": -2.1617350578308105, "logits/rejected": -2.1646032333374023, "logps/chosen": -3.9429397583007812, "logps/rejected": -2.6194589138031006, "loss": 0.588, "rewards/accuracies": 0.0, "rewards/chosen": 0.6134451031684875, "rewards/margins": -0.18531817197799683, "rewards/rejected": 0.7987632751464844, "step": 1077 }, { "epoch": 2.73, "learning_rate": 2.124552190441864e-09, "logits/chosen": -2.2683145999908447, "logits/rejected": -2.27215313911438, "logps/chosen": -1.61848783493042, "logps/rejected": -2.9279978275299072, "loss": 0.6551, "rewards/accuracies": 0.0, "rewards/chosen": 0.5579506158828735, "rewards/margins": -0.35732555389404297, "rewards/rejected": 0.9152761697769165, "step": 1078 }, { "epoch": 2.73, "learning_rate": 2.0853035861847447e-09, "logits/chosen": -2.270390033721924, "logits/rejected": -2.2649803161621094, "logps/chosen": -2.4288623332977295, "logps/rejected": -4.961497783660889, "loss": 0.6966, "rewards/accuracies": 0.0, "rewards/chosen": 0.6145891547203064, "rewards/margins": -0.361550509929657, "rewards/rejected": 0.9761396646499634, "step": 1079 }, { "epoch": 2.73, "learning_rate": 2.0464131840528974e-09, "logits/chosen": -2.272178888320923, "logits/rejected": -2.2625532150268555, "logps/chosen": -2.060668706893921, "logps/rejected": -8.746248245239258, "loss": 0.676, "rewards/accuracies": 0.0, "rewards/chosen": 0.6229214072227478, "rewards/margins": -0.0775688886642456, "rewards/rejected": 0.7004902958869934, "step": 1080 }, { "epoch": 2.74, "learning_rate": 2.007881274784362e-09, "logits/chosen": -2.3130526542663574, "logits/rejected": -2.3094828128814697, "logps/chosen": -2.0391764640808105, "logps/rejected": -6.462927341461182, "loss": 0.7705, "rewards/accuracies": 0.0, "rewards/chosen": 0.7259962558746338, "rewards/margins": -0.006915748119354248, "rewards/rejected": 0.732912003993988, "step": 1081 }, { "epoch": 2.74, "learning_rate": 1.9697081464371433e-09, "logits/chosen": -2.241743564605713, "logits/rejected": -2.262324333190918, "logps/chosen": -5.012842655181885, "logps/rejected": -15.762222290039062, "loss": 0.7563, "rewards/accuracies": 1.0, "rewards/chosen": 0.8553417325019836, "rewards/margins": 0.2803550958633423, "rewards/rejected": 0.5749866366386414, "step": 1082 }, { "epoch": 2.74, "learning_rate": 1.931894084387059e-09, "logits/chosen": -2.3517887592315674, "logits/rejected": -2.3495380878448486, "logps/chosen": -1.5765435695648193, "logps/rejected": -9.471779823303223, "loss": 0.6456, "rewards/accuracies": 1.0, "rewards/chosen": 0.792953610420227, "rewards/margins": 0.015276908874511719, "rewards/rejected": 0.7776767015457153, "step": 1083 }, { "epoch": 2.74, "learning_rate": 1.8944393713256068e-09, "logits/chosen": -2.243943452835083, "logits/rejected": -2.2518067359924316, "logps/chosen": -6.018061637878418, "logps/rejected": -4.169035911560059, "loss": 0.617, "rewards/accuracies": 1.0, "rewards/chosen": 1.0885316133499146, "rewards/margins": 0.5413907170295715, "rewards/rejected": 0.547140896320343, "step": 1084 }, { "epoch": 2.75, "learning_rate": 1.8573442872578615e-09, "logits/chosen": -2.280479907989502, "logits/rejected": -2.284148693084717, "logps/chosen": -0.9363372325897217, "logps/rejected": -8.765116691589355, "loss": 0.7582, "rewards/accuracies": 0.0, "rewards/chosen": 0.6551639437675476, "rewards/margins": -0.3349893093109131, "rewards/rejected": 0.9901532530784607, "step": 1085 }, { "epoch": 2.75, "learning_rate": 1.820609109500354e-09, "logits/chosen": -2.2679009437561035, "logits/rejected": -2.298830986022949, "logps/chosen": -0.9607863426208496, "logps/rejected": -14.76610279083252, "loss": 0.6443, "rewards/accuracies": 1.0, "rewards/chosen": 0.7561253309249878, "rewards/margins": 0.0076253414154052734, "rewards/rejected": 0.7484999895095825, "step": 1086 }, { "epoch": 2.75, "learning_rate": 1.7842341126790505e-09, "logits/chosen": -2.1947364807128906, "logits/rejected": -2.175377368927002, "logps/chosen": -1.1455113887786865, "logps/rejected": -7.117659568786621, "loss": 0.799, "rewards/accuracies": 0.0, "rewards/chosen": 0.7258858680725098, "rewards/margins": -0.02804088592529297, "rewards/rejected": 0.7539267539978027, "step": 1087 }, { "epoch": 2.75, "learning_rate": 1.748219568727216e-09, "logits/chosen": -2.1721580028533936, "logits/rejected": -2.1650216579437256, "logps/chosen": -1.700951099395752, "logps/rejected": -4.762135028839111, "loss": 0.6587, "rewards/accuracies": 0.0, "rewards/chosen": 0.6292683482170105, "rewards/margins": -0.2053138017654419, "rewards/rejected": 0.8345821499824524, "step": 1088 }, { "epoch": 2.76, "learning_rate": 1.7125657468834654e-09, "logits/chosen": -2.244941234588623, "logits/rejected": -2.244396924972534, "logps/chosen": -1.3742964267730713, "logps/rejected": -5.497496128082275, "loss": 0.7369, "rewards/accuracies": 1.0, "rewards/chosen": 0.7782179713249207, "rewards/margins": 0.31289419531822205, "rewards/rejected": 0.4653237760066986, "step": 1089 }, { "epoch": 2.76, "learning_rate": 1.6772729136897312e-09, "logits/chosen": -2.2037160396575928, "logits/rejected": -2.1999597549438477, "logps/chosen": -1.5898996591567993, "logps/rejected": -3.4907782077789307, "loss": 0.6494, "rewards/accuracies": 1.0, "rewards/chosen": 0.9058657884597778, "rewards/margins": 0.4619016945362091, "rewards/rejected": 0.4439640939235687, "step": 1090 }, { "epoch": 2.76, "learning_rate": 1.6423413329892166e-09, "logits/chosen": -2.142049551010132, "logits/rejected": -2.1584434509277344, "logps/chosen": -5.023122310638428, "logps/rejected": -13.792101860046387, "loss": 0.7466, "rewards/accuracies": 1.0, "rewards/chosen": 0.6937119364738464, "rewards/margins": 0.02086120843887329, "rewards/rejected": 0.6728507280349731, "step": 1091 }, { "epoch": 2.76, "learning_rate": 1.607771265924479e-09, "logits/chosen": -2.1796224117279053, "logits/rejected": -2.1903698444366455, "logps/chosen": -2.5404789447784424, "logps/rejected": -4.353466033935547, "loss": 0.578, "rewards/accuracies": 1.0, "rewards/chosen": 0.8772332072257996, "rewards/margins": 0.3889985978603363, "rewards/rejected": 0.48823460936546326, "step": 1092 }, { "epoch": 2.77, "learning_rate": 1.5735629709354659e-09, "logits/chosen": -2.2353644371032715, "logits/rejected": -2.2283785343170166, "logps/chosen": -1.1732577085494995, "logps/rejected": -5.164798736572266, "loss": 0.7343, "rewards/accuracies": 0.0, "rewards/chosen": 0.7532209753990173, "rewards/margins": -0.04291701316833496, "rewards/rejected": 0.7961379885673523, "step": 1093 }, { "epoch": 2.77, "learning_rate": 1.539716703757582e-09, "logits/chosen": -2.094369649887085, "logits/rejected": -2.1079909801483154, "logps/chosen": -2.8317465782165527, "logps/rejected": -9.401522636413574, "loss": 0.5868, "rewards/accuracies": 1.0, "rewards/chosen": 0.839146077632904, "rewards/margins": 0.0007190108299255371, "rewards/rejected": 0.8384270668029785, "step": 1094 }, { "epoch": 2.77, "learning_rate": 1.5062327174197641e-09, "logits/chosen": -2.2166779041290283, "logits/rejected": -2.2145156860351562, "logps/chosen": -2.106562852859497, "logps/rejected": -7.4547224044799805, "loss": 0.7339, "rewards/accuracies": 0.0, "rewards/chosen": 0.6450852751731873, "rewards/margins": -0.2952674627304077, "rewards/rejected": 0.940352737903595, "step": 1095 }, { "epoch": 2.77, "learning_rate": 1.473111262242599e-09, "logits/chosen": -2.2182698249816895, "logits/rejected": -2.2554712295532227, "logps/chosen": -11.259719848632812, "logps/rejected": -3.193711042404175, "loss": 0.5361, "rewards/accuracies": 1.0, "rewards/chosen": 0.963805615901947, "rewards/margins": 0.2607060670852661, "rewards/rejected": 0.7030995488166809, "step": 1096 }, { "epoch": 2.78, "learning_rate": 1.4403525858364574e-09, "logits/chosen": -2.1755404472351074, "logits/rejected": -2.172498941421509, "logps/chosen": -1.1413284540176392, "logps/rejected": -10.095059394836426, "loss": 0.7927, "rewards/accuracies": 0.0, "rewards/chosen": 0.6888123154640198, "rewards/margins": -0.08361780643463135, "rewards/rejected": 0.7724301218986511, "step": 1097 }, { "epoch": 2.78, "learning_rate": 1.4079569330996411e-09, "logits/chosen": -2.2618441581726074, "logits/rejected": -2.262528657913208, "logps/chosen": -2.6406126022338867, "logps/rejected": -5.6645708084106445, "loss": 0.6108, "rewards/accuracies": 1.0, "rewards/chosen": 0.8047559857368469, "rewards/margins": 0.40440893173217773, "rewards/rejected": 0.4003470540046692, "step": 1098 }, { "epoch": 2.78, "learning_rate": 1.3759245462165281e-09, "logits/chosen": -2.18601655960083, "logits/rejected": -2.195967435836792, "logps/chosen": -5.714016437530518, "logps/rejected": -4.361438751220703, "loss": 0.6237, "rewards/accuracies": 1.0, "rewards/chosen": 1.09454345703125, "rewards/margins": 0.5914767980575562, "rewards/rejected": 0.5030666589736938, "step": 1099 }, { "epoch": 2.78, "learning_rate": 1.3442556646558078e-09, "logits/chosen": -2.345531463623047, "logits/rejected": -2.3351330757141113, "logps/chosen": -1.2982347011566162, "logps/rejected": -5.795958518981934, "loss": 0.6197, "rewards/accuracies": 1.0, "rewards/chosen": 0.7963008880615234, "rewards/margins": 0.009119391441345215, "rewards/rejected": 0.7871814966201782, "step": 1100 }, { "epoch": 2.79, "learning_rate": 1.3129505251686601e-09, "logits/chosen": -1.9880610704421997, "logits/rejected": -2.0069286823272705, "logps/chosen": -3.682192087173462, "logps/rejected": -6.010267734527588, "loss": 0.5886, "rewards/accuracies": 1.0, "rewards/chosen": 0.6858001947402954, "rewards/margins": 0.15506571531295776, "rewards/rejected": 0.5307344794273376, "step": 1101 }, { "epoch": 2.79, "learning_rate": 1.2820093617869732e-09, "logits/chosen": -2.203070878982544, "logits/rejected": -2.202834367752075, "logps/chosen": -1.9866907596588135, "logps/rejected": -5.0847578048706055, "loss": 0.7506, "rewards/accuracies": 0.0, "rewards/chosen": 0.8101033568382263, "rewards/margins": -0.21083039045333862, "rewards/rejected": 1.020933747291565, "step": 1102 }, { "epoch": 2.79, "learning_rate": 1.2514324058216397e-09, "logits/chosen": -2.1805167198181152, "logits/rejected": -2.3590619564056396, "logps/chosen": -1.7514572143554688, "logps/rejected": -34.308937072753906, "loss": 0.65, "rewards/accuracies": 1.0, "rewards/chosen": 0.7116943597793579, "rewards/margins": 0.48125916719436646, "rewards/rejected": 0.23043517768383026, "step": 1103 }, { "epoch": 2.79, "learning_rate": 1.2212198858607692e-09, "logits/chosen": -2.279294967651367, "logits/rejected": -2.2786524295806885, "logps/chosen": -12.771692276000977, "logps/rejected": -3.687329053878784, "loss": 0.6719, "rewards/accuracies": 1.0, "rewards/chosen": 1.016875147819519, "rewards/margins": 0.15694880485534668, "rewards/rejected": 0.8599263429641724, "step": 1104 }, { "epoch": 2.8, "learning_rate": 1.191372027768034e-09, "logits/chosen": -2.3076117038726807, "logits/rejected": -2.3060619831085205, "logps/chosen": -2.764918327331543, "logps/rejected": -4.199870586395264, "loss": 0.6637, "rewards/accuracies": 0.0, "rewards/chosen": 0.5948131680488586, "rewards/margins": -0.14723724126815796, "rewards/rejected": 0.7420504093170166, "step": 1105 }, { "epoch": 2.8, "learning_rate": 1.1618890546809424e-09, "logits/chosen": -2.3453853130340576, "logits/rejected": -2.5023770332336426, "logps/chosen": -0.994515061378479, "logps/rejected": -47.46773910522461, "loss": 0.7003, "rewards/accuracies": 1.0, "rewards/chosen": 0.7275446653366089, "rewards/margins": 0.5473246574401855, "rewards/rejected": 0.18022003769874573, "step": 1106 }, { "epoch": 2.8, "learning_rate": 1.132771187009196e-09, "logits/chosen": -2.118675947189331, "logits/rejected": -2.1282246112823486, "logps/chosen": -3.316110849380493, "logps/rejected": -3.1546201705932617, "loss": 0.6893, "rewards/accuracies": 1.0, "rewards/chosen": 0.8960304260253906, "rewards/margins": 0.31564241647720337, "rewards/rejected": 0.5803880095481873, "step": 1107 }, { "epoch": 2.81, "learning_rate": 1.1040186424330188e-09, "logits/chosen": -2.2192912101745605, "logits/rejected": -2.2215349674224854, "logps/chosen": -1.577492594718933, "logps/rejected": -2.671261787414551, "loss": 0.5636, "rewards/accuracies": 1.0, "rewards/chosen": 0.9640906453132629, "rewards/margins": 0.3727531433105469, "rewards/rejected": 0.5913375020027161, "step": 1108 }, { "epoch": 2.81, "learning_rate": 1.0756316359015527e-09, "logits/chosen": -2.31978440284729, "logits/rejected": -2.313882827758789, "logps/chosen": -3.8079819679260254, "logps/rejected": -2.7375028133392334, "loss": 0.639, "rewards/accuracies": 1.0, "rewards/chosen": 0.7447656989097595, "rewards/margins": 0.20269137620925903, "rewards/rejected": 0.5420743227005005, "step": 1109 }, { "epoch": 2.81, "learning_rate": 1.0476103796312252e-09, "logits/chosen": -2.2007558345794678, "logits/rejected": -2.2059452533721924, "logps/chosen": -4.707880973815918, "logps/rejected": -2.962825059890747, "loss": 0.6414, "rewards/accuracies": 0.0, "rewards/chosen": 0.4485676884651184, "rewards/margins": -0.35249459743499756, "rewards/rejected": 0.801062285900116, "step": 1110 }, { "epoch": 2.81, "learning_rate": 1.0199550831041903e-09, "logits/chosen": -2.3283157348632812, "logits/rejected": -2.3164308071136475, "logps/chosen": -1.4461820125579834, "logps/rejected": -2.5939247608184814, "loss": 0.6624, "rewards/accuracies": 0.0, "rewards/chosen": 0.6352662444114685, "rewards/margins": -0.1323995590209961, "rewards/rejected": 0.7676658034324646, "step": 1111 }, { "epoch": 2.82, "learning_rate": 9.926659530667291e-10, "logits/chosen": -2.1733851432800293, "logits/rejected": -2.174586296081543, "logps/chosen": -5.1953020095825195, "logps/rejected": -1.023668885231018, "loss": 0.6991, "rewards/accuracies": 0.0, "rewards/chosen": 0.6550328135490417, "rewards/margins": -0.14665883779525757, "rewards/rejected": 0.8016916513442993, "step": 1112 }, { "epoch": 2.82, "learning_rate": 9.657431935277627e-10, "logits/chosen": -2.205064058303833, "logits/rejected": -2.1945226192474365, "logps/chosen": -1.932742953300476, "logps/rejected": -5.131620407104492, "loss": 0.6021, "rewards/accuracies": 0.0, "rewards/chosen": 0.6767815947532654, "rewards/margins": -0.04702901840209961, "rewards/rejected": 0.723810613155365, "step": 1113 }, { "epoch": 2.82, "learning_rate": 9.391870057572526e-10, "logits/chosen": -2.1703028678894043, "logits/rejected": -2.178044557571411, "logps/chosen": -1.1678695678710938, "logps/rejected": -2.3707125186920166, "loss": 0.6299, "rewards/accuracies": 1.0, "rewards/chosen": 0.8271347284317017, "rewards/margins": 0.24240267276763916, "rewards/rejected": 0.5847320556640625, "step": 1114 }, { "epoch": 2.82, "learning_rate": 9.129975882847362e-10, "logits/chosen": -2.064178228378296, "logits/rejected": -2.069819927215576, "logps/chosen": -1.5548012256622314, "logps/rejected": -1.8079487085342407, "loss": 0.6086, "rewards/accuracies": 1.0, "rewards/chosen": 0.8721325993537903, "rewards/margins": 0.2103481888771057, "rewards/rejected": 0.6617844104766846, "step": 1115 }, { "epoch": 2.83, "learning_rate": 8.871751368978553e-10, "logits/chosen": -2.271397590637207, "logits/rejected": -2.323723316192627, "logps/chosen": -0.9727879166603088, "logps/rejected": -7.028111457824707, "loss": 0.6903, "rewards/accuracies": 1.0, "rewards/chosen": 0.8866176009178162, "rewards/margins": 0.24276351928710938, "rewards/rejected": 0.6438540816307068, "step": 1116 }, { "epoch": 2.83, "learning_rate": 8.617198446408736e-10, "logits/chosen": -2.2584738731384277, "logits/rejected": -2.2632148265838623, "logps/chosen": -1.5670408010482788, "logps/rejected": -6.31701135635376, "loss": 0.5779, "rewards/accuracies": 1.0, "rewards/chosen": 0.889445424079895, "rewards/margins": 0.39671313762664795, "rewards/rejected": 0.49273228645324707, "step": 1117 }, { "epoch": 2.83, "learning_rate": 8.366319018132228e-10, "logits/chosen": -2.152894973754883, "logits/rejected": -2.151226758956909, "logps/chosen": -0.30272024869918823, "logps/rejected": -7.307084560394287, "loss": 0.662, "rewards/accuracies": 0.0, "rewards/chosen": 0.6683433055877686, "rewards/margins": -0.11075109243392944, "rewards/rejected": 0.779094398021698, "step": 1118 }, { "epoch": 2.83, "learning_rate": 8.119114959680929e-10, "logits/chosen": -2.2424144744873047, "logits/rejected": -2.2451727390289307, "logps/chosen": -2.1372478008270264, "logps/rejected": -4.708148002624512, "loss": 0.7778, "rewards/accuracies": 0.0, "rewards/chosen": 0.49298402667045593, "rewards/margins": -0.4565812051296234, "rewards/rejected": 0.9495652318000793, "step": 1119 }, { "epoch": 2.84, "learning_rate": 7.875588119110376e-10, "logits/chosen": -2.2363884449005127, "logits/rejected": -2.241482734680176, "logps/chosen": -1.9679208993911743, "logps/rejected": -4.863725662231445, "loss": 0.6517, "rewards/accuracies": 1.0, "rewards/chosen": 0.8262185454368591, "rewards/margins": 0.4081730246543884, "rewards/rejected": 0.4180455207824707, "step": 1120 }, { "epoch": 2.84, "learning_rate": 7.635740316985883e-10, "logits/chosen": -2.245450973510742, "logits/rejected": -2.2367732524871826, "logps/chosen": -2.893899917602539, "logps/rejected": -7.540263652801514, "loss": 0.6662, "rewards/accuracies": 0.0, "rewards/chosen": 0.6354535222053528, "rewards/margins": -0.304476261138916, "rewards/rejected": 0.9399297833442688, "step": 1121 }, { "epoch": 2.84, "learning_rate": 7.399573346368871e-10, "logits/chosen": -2.2330684661865234, "logits/rejected": -2.223869562149048, "logps/chosen": -0.9453620314598083, "logps/rejected": -8.973155975341797, "loss": 0.7533, "rewards/accuracies": 0.0, "rewards/chosen": 0.7523229718208313, "rewards/margins": -0.1797422170639038, "rewards/rejected": 0.9320651888847351, "step": 1122 }, { "epoch": 2.84, "learning_rate": 7.167088972803326e-10, "logits/chosen": -2.2172513008117676, "logits/rejected": -2.200871229171753, "logps/chosen": -1.4181116819381714, "logps/rejected": -5.247791290283203, "loss": 0.6395, "rewards/accuracies": 0.0, "rewards/chosen": 0.6826763153076172, "rewards/margins": -0.16511613130569458, "rewards/rejected": 0.8477924466133118, "step": 1123 }, { "epoch": 2.85, "learning_rate": 6.938288934303038e-10, "logits/chosen": -2.197258472442627, "logits/rejected": -2.203890800476074, "logps/chosen": -4.788078784942627, "logps/rejected": -4.1596221923828125, "loss": 0.64, "rewards/accuracies": 1.0, "rewards/chosen": 1.069933295249939, "rewards/margins": 0.47131383419036865, "rewards/rejected": 0.5986194610595703, "step": 1124 }, { "epoch": 2.85, "learning_rate": 6.713174941338162e-10, "logits/chosen": -2.2436304092407227, "logits/rejected": -2.2326161861419678, "logps/chosen": -2.8550055027008057, "logps/rejected": -4.895914554595947, "loss": 0.8176, "rewards/accuracies": 0.0, "rewards/chosen": 0.7200587391853333, "rewards/margins": -0.28173989057540894, "rewards/rejected": 1.0017986297607422, "step": 1125 }, { "epoch": 2.85, "learning_rate": 6.491748676822617e-10, "logits/chosen": -2.1604115962982178, "logits/rejected": -2.1572959423065186, "logps/chosen": -1.3939309120178223, "logps/rejected": -7.5201849937438965, "loss": 0.7053, "rewards/accuracies": 1.0, "rewards/chosen": 1.0581117868423462, "rewards/margins": 0.5464057326316833, "rewards/rejected": 0.5117060542106628, "step": 1126 }, { "epoch": 2.85, "learning_rate": 6.274011796101597e-10, "logits/chosen": -2.289722442626953, "logits/rejected": -2.2864160537719727, "logps/chosen": -0.8494035005569458, "logps/rejected": -3.3775484561920166, "loss": 0.6475, "rewards/accuracies": 1.0, "rewards/chosen": 0.9060907363891602, "rewards/margins": 0.2816610336303711, "rewards/rejected": 0.6244297027587891, "step": 1127 }, { "epoch": 2.86, "learning_rate": 6.059965926938859e-10, "logits/chosen": -2.243760347366333, "logits/rejected": -2.2496018409729004, "logps/chosen": -5.904455184936523, "logps/rejected": -2.6517415046691895, "loss": 0.7577, "rewards/accuracies": 1.0, "rewards/chosen": 1.0084112882614136, "rewards/margins": 0.41776400804519653, "rewards/rejected": 0.590647280216217, "step": 1128 }, { "epoch": 2.86, "learning_rate": 5.849612669505066e-10, "logits/chosen": -2.2785120010375977, "logits/rejected": -2.2829244136810303, "logps/chosen": -1.8809388875961304, "logps/rejected": -5.7076005935668945, "loss": 0.5938, "rewards/accuracies": 1.0, "rewards/chosen": 0.8080667853355408, "rewards/margins": 0.49758026003837585, "rewards/rejected": 0.3104865252971649, "step": 1129 }, { "epoch": 2.86, "learning_rate": 5.642953596365408e-10, "logits/chosen": -2.214221715927124, "logits/rejected": -2.2358992099761963, "logps/chosen": -4.630316734313965, "logps/rejected": -10.725855827331543, "loss": 0.5381, "rewards/accuracies": 1.0, "rewards/chosen": 1.0621014833450317, "rewards/margins": 0.7085545659065247, "rewards/rejected": 0.3535469174385071, "step": 1130 }, { "epoch": 2.86, "learning_rate": 5.439990252467885e-10, "logits/chosen": -2.359792947769165, "logits/rejected": -2.364193916320801, "logps/chosen": -0.9102458357810974, "logps/rejected": -4.986111640930176, "loss": 0.5693, "rewards/accuracies": 1.0, "rewards/chosen": 0.9256329536437988, "rewards/margins": 0.5243188738822937, "rewards/rejected": 0.4013140797615051, "step": 1131 }, { "epoch": 2.87, "learning_rate": 5.240724155132048e-10, "logits/chosen": -2.280982494354248, "logits/rejected": -2.287576675415039, "logps/chosen": -3.811455011367798, "logps/rejected": -10.524768829345703, "loss": 0.6171, "rewards/accuracies": 1.0, "rewards/chosen": 0.9943044781684875, "rewards/margins": 0.5180556774139404, "rewards/rejected": 0.4762488305568695, "step": 1132 }, { "epoch": 2.87, "learning_rate": 5.045156794037331e-10, "logits/chosen": -2.207909107208252, "logits/rejected": -2.2225728034973145, "logps/chosen": -4.25404691696167, "logps/rejected": -3.9359049797058105, "loss": 0.6337, "rewards/accuracies": 1.0, "rewards/chosen": 1.2003986835479736, "rewards/margins": 0.6249812841415405, "rewards/rejected": 0.5754173994064331, "step": 1133 }, { "epoch": 2.87, "learning_rate": 4.853289631212065e-10, "logits/chosen": -2.2664692401885986, "logits/rejected": -2.2612199783325195, "logps/chosen": -2.9723570346832275, "logps/rejected": -4.282920837402344, "loss": 0.7415, "rewards/accuracies": 1.0, "rewards/chosen": 0.7982555627822876, "rewards/margins": 0.32985231280326843, "rewards/rejected": 0.46840324997901917, "step": 1134 }, { "epoch": 2.87, "learning_rate": 4.6651241010226e-10, "logits/chosen": -2.288606643676758, "logits/rejected": -2.289095878601074, "logps/chosen": -4.032794952392578, "logps/rejected": -5.578210830688477, "loss": 0.5683, "rewards/accuracies": 1.0, "rewards/chosen": 0.8023940920829773, "rewards/margins": 0.3432479798793793, "rewards/rejected": 0.459146112203598, "step": 1135 }, { "epoch": 2.88, "learning_rate": 4.4806616101624173e-10, "logits/chosen": -2.1274657249450684, "logits/rejected": -2.1184864044189453, "logps/chosen": -5.312065601348877, "logps/rejected": -7.9329047203063965, "loss": 0.7191, "rewards/accuracies": 0.0, "rewards/chosen": 0.6611930727958679, "rewards/margins": -0.25870460271835327, "rewards/rejected": 0.9198976755142212, "step": 1136 }, { "epoch": 2.88, "learning_rate": 4.2999035376417026e-10, "logits/chosen": -2.2472786903381348, "logits/rejected": -2.248446226119995, "logps/chosen": -1.0398774147033691, "logps/rejected": -4.869087219238281, "loss": 0.5733, "rewards/accuracies": 1.0, "rewards/chosen": 0.7658340930938721, "rewards/margins": 0.36785072088241577, "rewards/rejected": 0.3979833722114563, "step": 1137 }, { "epoch": 2.88, "learning_rate": 4.1228512347771804e-10, "logits/chosen": -2.156674385070801, "logits/rejected": -2.141442060470581, "logps/chosen": -9.718673706054688, "logps/rejected": -2.4784297943115234, "loss": 0.6721, "rewards/accuracies": 0.0, "rewards/chosen": 0.49202796816825867, "rewards/margins": -0.40332505106925964, "rewards/rejected": 0.8953530192375183, "step": 1138 }, { "epoch": 2.88, "learning_rate": 3.9495060251816257e-10, "logits/chosen": -2.2903404235839844, "logits/rejected": -2.2826976776123047, "logps/chosen": -2.2134816646575928, "logps/rejected": -2.96712064743042, "loss": 0.7097, "rewards/accuracies": 0.0, "rewards/chosen": 0.8533077239990234, "rewards/margins": -0.14396154880523682, "rewards/rejected": 0.9972692728042603, "step": 1139 }, { "epoch": 2.89, "learning_rate": 3.7798692047544267e-10, "logits/chosen": -2.3206946849823, "logits/rejected": -2.324964761734009, "logps/chosen": -2.719512939453125, "logps/rejected": -8.636296272277832, "loss": 0.7453, "rewards/accuracies": 0.0, "rewards/chosen": 0.6503205299377441, "rewards/margins": -0.08074533939361572, "rewards/rejected": 0.7310658693313599, "step": 1140 }, { "epoch": 2.89, "learning_rate": 3.6139420416717026e-10, "logits/chosen": -2.10353684425354, "logits/rejected": -2.1049654483795166, "logps/chosen": -1.046645164489746, "logps/rejected": -4.466464996337891, "loss": 0.5401, "rewards/accuracies": 1.0, "rewards/chosen": 0.9322764277458191, "rewards/margins": 0.44492873549461365, "rewards/rejected": 0.48734769225120544, "step": 1141 }, { "epoch": 2.89, "learning_rate": 3.4517257763766463e-10, "logits/chosen": -2.239948272705078, "logits/rejected": -2.272848606109619, "logps/chosen": -4.8803391456604, "logps/rejected": -10.5418119430542, "loss": 0.628, "rewards/accuracies": 1.0, "rewards/chosen": 1.0369551181793213, "rewards/margins": 0.6064595580101013, "rewards/rejected": 0.43049556016921997, "step": 1142 }, { "epoch": 2.89, "learning_rate": 3.293221621570419e-10, "logits/chosen": -2.2326297760009766, "logits/rejected": -2.2209184169769287, "logps/chosen": -0.8232015371322632, "logps/rejected": -11.736515998840332, "loss": 0.5832, "rewards/accuracies": 0.0, "rewards/chosen": 0.6594919562339783, "rewards/margins": -0.13463199138641357, "rewards/rejected": 0.7941239476203918, "step": 1143 }, { "epoch": 2.9, "learning_rate": 3.138430762203215e-10, "logits/chosen": -2.2688324451446533, "logits/rejected": -2.2592666149139404, "logps/chosen": -1.5618149042129517, "logps/rejected": -6.208406448364258, "loss": 0.6554, "rewards/accuracies": 0.0, "rewards/chosen": 0.7361063361167908, "rewards/margins": -0.00956493616104126, "rewards/rejected": 0.745671272277832, "step": 1144 }, { "epoch": 2.9, "learning_rate": 2.9873543554652105e-10, "logits/chosen": -2.2414817810058594, "logits/rejected": -2.245436906814575, "logps/chosen": -2.2183520793914795, "logps/rejected": -7.766147136688232, "loss": 0.7693, "rewards/accuracies": 0.0, "rewards/chosen": 0.6535496115684509, "rewards/margins": -0.23955953121185303, "rewards/rejected": 0.893109142780304, "step": 1145 }, { "epoch": 2.9, "learning_rate": 2.839993530777851e-10, "logits/chosen": -2.164661169052124, "logits/rejected": -2.1785290241241455, "logps/chosen": -1.800355076789856, "logps/rejected": -12.150328636169434, "loss": 0.682, "rewards/accuracies": 1.0, "rewards/chosen": 0.7161563038825989, "rewards/margins": 0.0015844106674194336, "rewards/rejected": 0.7145718932151794, "step": 1146 }, { "epoch": 2.9, "learning_rate": 2.6963493897856906e-10, "logits/chosen": -2.3754453659057617, "logits/rejected": -2.3735525608062744, "logps/chosen": -2.718494176864624, "logps/rejected": -4.238607406616211, "loss": 0.7033, "rewards/accuracies": 0.0, "rewards/chosen": 0.5884838104248047, "rewards/margins": -0.44547104835510254, "rewards/rejected": 1.0339548587799072, "step": 1147 }, { "epoch": 2.91, "learning_rate": 2.556423006347841e-10, "logits/chosen": -2.2152116298675537, "logits/rejected": -2.222358465194702, "logps/chosen": -1.955432415008545, "logps/rejected": -7.233419895172119, "loss": 0.6537, "rewards/accuracies": 0.0, "rewards/chosen": 0.715216875076294, "rewards/margins": -0.14059752225875854, "rewards/rejected": 0.8558143973350525, "step": 1148 }, { "epoch": 2.91, "learning_rate": 2.420215426530259e-10, "logits/chosen": -2.2542972564697266, "logits/rejected": -2.248732566833496, "logps/chosen": -3.737969398498535, "logps/rejected": -1.2352640628814697, "loss": 0.6359, "rewards/accuracies": 0.0, "rewards/chosen": 0.66429203748703, "rewards/margins": -0.2700514793395996, "rewards/rejected": 0.9343435168266296, "step": 1149 }, { "epoch": 2.91, "learning_rate": 2.2877276685975277e-10, "logits/chosen": -2.3111684322357178, "logits/rejected": -2.292297124862671, "logps/chosen": -1.1580369472503662, "logps/rejected": -8.884220123291016, "loss": 0.8178, "rewards/accuracies": 0.0, "rewards/chosen": 0.5879308581352234, "rewards/margins": -0.09337234497070312, "rewards/rejected": 0.6813032031059265, "step": 1150 }, { "epoch": 2.91, "learning_rate": 2.1589607230056428e-10, "logits/chosen": -2.180516004562378, "logits/rejected": -2.1740922927856445, "logps/chosen": -2.3379411697387695, "logps/rejected": -3.977979898452759, "loss": 0.6845, "rewards/accuracies": 0.0, "rewards/chosen": 0.7788570523262024, "rewards/margins": -0.15125322341918945, "rewards/rejected": 0.9301102757453918, "step": 1151 }, { "epoch": 2.92, "learning_rate": 2.033915552394516e-10, "logits/chosen": -2.223832607269287, "logits/rejected": -2.2317612171173096, "logps/chosen": -1.5120428800582886, "logps/rejected": -2.294517755508423, "loss": 0.5812, "rewards/accuracies": 1.0, "rewards/chosen": 0.959503173828125, "rewards/margins": 0.4105757474899292, "rewards/rejected": 0.5489274263381958, "step": 1152 }, { "epoch": 2.92, "learning_rate": 1.9125930915804278e-10, "logits/chosen": -2.330749988555908, "logits/rejected": -2.334743022918701, "logps/chosen": -3.307760238647461, "logps/rejected": -3.335080623626709, "loss": 0.7096, "rewards/accuracies": 1.0, "rewards/chosen": 1.0008758306503296, "rewards/margins": 0.3814445734024048, "rewards/rejected": 0.6194312572479248, "step": 1153 }, { "epoch": 2.92, "learning_rate": 1.794994247549586e-10, "logits/chosen": -2.208594799041748, "logits/rejected": -2.210240125656128, "logps/chosen": -2.3859615325927734, "logps/rejected": -6.958126544952393, "loss": 0.7346, "rewards/accuracies": 0.0, "rewards/chosen": 0.5814536213874817, "rewards/margins": -0.10426545143127441, "rewards/rejected": 0.6857190728187561, "step": 1154 }, { "epoch": 2.92, "learning_rate": 1.6811198994508557e-10, "logits/chosen": -2.2121341228485107, "logits/rejected": -2.2065699100494385, "logps/chosen": -1.6852607727050781, "logps/rejected": -3.351095199584961, "loss": 0.656, "rewards/accuracies": 0.0, "rewards/chosen": 0.5775315761566162, "rewards/margins": -0.3830755949020386, "rewards/rejected": 0.9606071710586548, "step": 1155 }, { "epoch": 2.93, "learning_rate": 1.5709708985895965e-10, "logits/chosen": -2.1563830375671387, "logits/rejected": -2.143455743789673, "logps/chosen": -2.3531341552734375, "logps/rejected": -4.537245273590088, "loss": 0.6198, "rewards/accuracies": 0.0, "rewards/chosen": 0.5527236461639404, "rewards/margins": -0.24255317449569702, "rewards/rejected": 0.7952768206596375, "step": 1156 }, { "epoch": 2.93, "learning_rate": 1.464548068421001e-10, "logits/chosen": -2.2554593086242676, "logits/rejected": -2.253307342529297, "logps/chosen": -1.7192463874816895, "logps/rejected": -2.508629560470581, "loss": 0.7409, "rewards/accuracies": 1.0, "rewards/chosen": 1.0148457288742065, "rewards/margins": 0.26572126150131226, "rewards/rejected": 0.7491244673728943, "step": 1157 }, { "epoch": 2.93, "learning_rate": 1.3618522045439896e-10, "logits/chosen": -2.1783692836761475, "logits/rejected": -2.199359655380249, "logps/chosen": -7.621493339538574, "logps/rejected": -4.316083908081055, "loss": 0.5752, "rewards/accuracies": 1.0, "rewards/chosen": 1.149626612663269, "rewards/margins": 0.5937944054603577, "rewards/rejected": 0.5558322072029114, "step": 1158 }, { "epoch": 2.93, "learning_rate": 1.2628840746954362e-10, "logits/chosen": -2.2872254848480225, "logits/rejected": -2.284127712249756, "logps/chosen": -3.4713315963745117, "logps/rejected": -14.87551498413086, "loss": 0.7283, "rewards/accuracies": 0.0, "rewards/chosen": 0.7987591624259949, "rewards/margins": -0.24118608236312866, "rewards/rejected": 1.0399452447891235, "step": 1159 }, { "epoch": 2.94, "learning_rate": 1.1676444187442846e-10, "logits/chosen": -2.335689067840576, "logits/rejected": -2.3337557315826416, "logps/chosen": -1.9635825157165527, "logps/rejected": -5.0108137130737305, "loss": 0.6636, "rewards/accuracies": 0.0, "rewards/chosen": 0.6633169054985046, "rewards/margins": -0.1524403691291809, "rewards/rejected": 0.8157572746276855, "step": 1160 }, { "epoch": 2.94, "learning_rate": 1.0761339486859422e-10, "logits/chosen": -2.2644078731536865, "logits/rejected": -2.2689576148986816, "logps/chosen": -3.881354331970215, "logps/rejected": -9.294124603271484, "loss": 0.6268, "rewards/accuracies": 1.0, "rewards/chosen": 0.7519049048423767, "rewards/margins": 0.004114747047424316, "rewards/rejected": 0.7477901577949524, "step": 1161 }, { "epoch": 2.94, "learning_rate": 9.88353348637172e-11, "logits/chosen": -2.2205753326416016, "logits/rejected": -2.2339251041412354, "logps/chosen": -6.262067794799805, "logps/rejected": -2.351482391357422, "loss": 0.7186, "rewards/accuracies": 1.0, "rewards/chosen": 1.072596549987793, "rewards/margins": 0.5348320603370667, "rewards/rejected": 0.5377644896507263, "step": 1162 }, { "epoch": 2.94, "learning_rate": 9.043032748308199e-11, "logits/chosen": -2.197882652282715, "logits/rejected": -2.203380823135376, "logps/chosen": -0.7194627523422241, "logps/rejected": -5.1305718421936035, "loss": 0.6924, "rewards/accuracies": 1.0, "rewards/chosen": 0.8631073832511902, "rewards/margins": 0.45196041464805603, "rewards/rejected": 0.41114696860313416, "step": 1163 }, { "epoch": 2.95, "learning_rate": 8.239843556108739e-11, "logits/chosen": -2.366992712020874, "logits/rejected": -2.3641273975372314, "logps/chosen": -1.8930736780166626, "logps/rejected": -4.02199649810791, "loss": 0.5603, "rewards/accuracies": 1.0, "rewards/chosen": 0.9271196722984314, "rewards/margins": 0.47870248556137085, "rewards/rejected": 0.44841718673706055, "step": 1164 }, { "epoch": 2.95, "learning_rate": 7.473971914280785e-11, "logits/chosen": -2.17584228515625, "logits/rejected": -2.1777877807617188, "logps/chosen": -1.807422161102295, "logps/rejected": -3.0561585426330566, "loss": 0.5556, "rewards/accuracies": 1.0, "rewards/chosen": 0.7141417860984802, "rewards/margins": 0.18826013803482056, "rewards/rejected": 0.5258816480636597, "step": 1165 }, { "epoch": 2.95, "learning_rate": 6.745423548348839e-11, "logits/chosen": -2.1682546138763428, "logits/rejected": -2.1665291786193848, "logps/chosen": -5.192906856536865, "logps/rejected": -4.151454925537109, "loss": 0.5938, "rewards/accuracies": 1.0, "rewards/chosen": 0.9132310748100281, "rewards/margins": 0.40613842010498047, "rewards/rejected": 0.5070926547050476, "step": 1166 }, { "epoch": 2.95, "learning_rate": 6.054203904817811e-11, "logits/chosen": -2.302891492843628, "logits/rejected": -2.34122371673584, "logps/chosen": -0.7066506147384644, "logps/rejected": -7.726829528808594, "loss": 0.6205, "rewards/accuracies": 1.0, "rewards/chosen": 0.8383752107620239, "rewards/margins": 0.00971287488937378, "rewards/rejected": 0.8286623358726501, "step": 1167 }, { "epoch": 2.96, "learning_rate": 5.400318151127514e-11, "logits/chosen": -1.983476161956787, "logits/rejected": -2.02718186378479, "logps/chosen": -0.7357057332992554, "logps/rejected": -9.722628593444824, "loss": 0.6436, "rewards/accuracies": 1.0, "rewards/chosen": 0.8157927393913269, "rewards/margins": 0.23389405012130737, "rewards/rejected": 0.5818986892700195, "step": 1168 }, { "epoch": 2.96, "learning_rate": 4.783771175617124e-11, "logits/chosen": -2.2644922733306885, "logits/rejected": -2.2697532176971436, "logps/chosen": -3.1856470108032227, "logps/rejected": -2.726543426513672, "loss": 0.5743, "rewards/accuracies": 1.0, "rewards/chosen": 0.8812916874885559, "rewards/margins": 0.2358860969543457, "rewards/rejected": 0.6454055905342102, "step": 1169 }, { "epoch": 2.96, "learning_rate": 4.2045675874868845e-11, "logits/chosen": -2.2497918605804443, "logits/rejected": -2.2393300533294678, "logps/chosen": -3.650273323059082, "logps/rejected": -7.120148181915283, "loss": 0.6366, "rewards/accuracies": 0.0, "rewards/chosen": 0.661292552947998, "rewards/margins": -0.2303749918937683, "rewards/rejected": 0.8916675448417664, "step": 1170 }, { "epoch": 2.96, "learning_rate": 3.662711716764244e-11, "logits/chosen": -2.1804933547973633, "logits/rejected": -2.175485849380493, "logps/chosen": -2.7507693767547607, "logps/rejected": -5.082398414611816, "loss": 0.6108, "rewards/accuracies": 1.0, "rewards/chosen": 0.8133161664009094, "rewards/margins": 0.395501047372818, "rewards/rejected": 0.41781511902809143, "step": 1171 }, { "epoch": 2.97, "learning_rate": 3.158207614272212e-11, "logits/chosen": -2.2148489952087402, "logits/rejected": -2.234933614730835, "logps/chosen": -1.3558824062347412, "logps/rejected": -5.711618423461914, "loss": 0.605, "rewards/accuracies": 1.0, "rewards/chosen": 0.8865521550178528, "rewards/margins": 0.2889227867126465, "rewards/rejected": 0.5976293683052063, "step": 1172 }, { "epoch": 2.97, "learning_rate": 2.6910590515966113e-11, "logits/chosen": -2.374119281768799, "logits/rejected": -2.3643381595611572, "logps/chosen": -1.0393273830413818, "logps/rejected": -7.810070037841797, "loss": 0.6042, "rewards/accuracies": 0.0, "rewards/chosen": 0.6487811207771301, "rewards/margins": -0.30980384349823, "rewards/rejected": 0.9585849642753601, "step": 1173 }, { "epoch": 2.97, "learning_rate": 2.2612695210616484e-11, "logits/chosen": -2.2361392974853516, "logits/rejected": -2.246166944503784, "logps/chosen": -4.8691558837890625, "logps/rejected": -2.010368824005127, "loss": 0.7489, "rewards/accuracies": 0.0, "rewards/chosen": 0.6407226920127869, "rewards/margins": -0.0221555233001709, "rewards/rejected": 0.6628782153129578, "step": 1174 }, { "epoch": 2.97, "learning_rate": 1.8688422357004964e-11, "logits/chosen": -2.143578290939331, "logits/rejected": -2.1844186782836914, "logps/chosen": -1.6004480123519897, "logps/rejected": -6.633966445922852, "loss": 0.6453, "rewards/accuracies": 0.0, "rewards/chosen": 0.5750126242637634, "rewards/margins": -0.3056129217147827, "rewards/rejected": 0.8806255459785461, "step": 1175 }, { "epoch": 2.98, "learning_rate": 1.5137801292325337e-11, "logits/chosen": -2.2237768173217773, "logits/rejected": -2.2297542095184326, "logps/chosen": -2.317781448364258, "logps/rejected": -4.433343410491943, "loss": 0.6642, "rewards/accuracies": 0.0, "rewards/chosen": 0.7349929809570312, "rewards/margins": -0.08709907531738281, "rewards/rejected": 0.8220920562744141, "step": 1176 }, { "epoch": 2.98, "learning_rate": 1.1960858560416953e-11, "logits/chosen": -2.109102964401245, "logits/rejected": -2.1290652751922607, "logps/chosen": -2.498528242111206, "logps/rejected": -4.866086483001709, "loss": 0.5737, "rewards/accuracies": 1.0, "rewards/chosen": 0.9115883111953735, "rewards/margins": 0.3501957654953003, "rewards/rejected": 0.5613925457000732, "step": 1177 }, { "epoch": 2.98, "learning_rate": 9.157617911570436e-12, "logits/chosen": -2.2354509830474854, "logits/rejected": -2.3071858882904053, "logps/chosen": -3.242278814315796, "logps/rejected": -20.12676239013672, "loss": 0.5752, "rewards/accuracies": 1.0, "rewards/chosen": 0.681683361530304, "rewards/margins": 0.07322371006011963, "rewards/rejected": 0.6084596514701843, "step": 1178 }, { "epoch": 2.98, "learning_rate": 6.728100302327844e-12, "logits/chosen": -2.067981481552124, "logits/rejected": -2.067305326461792, "logps/chosen": -2.7043747901916504, "logps/rejected": -4.663154125213623, "loss": 0.6163, "rewards/accuracies": 1.0, "rewards/chosen": 0.9125539660453796, "rewards/margins": 0.3397047519683838, "rewards/rejected": 0.5728492140769958, "step": 1179 }, { "epoch": 2.99, "learning_rate": 4.6723238953549906e-12, "logits/chosen": -2.2443294525146484, "logits/rejected": -2.237039804458618, "logps/chosen": -2.9079535007476807, "logps/rejected": -2.473510265350342, "loss": 0.7265, "rewards/accuracies": 0.0, "rewards/chosen": 0.6246404051780701, "rewards/margins": -0.2883518934249878, "rewards/rejected": 0.9129922986030579, "step": 1180 }, { "epoch": 2.99, "learning_rate": 2.9903040592860194e-12, "logits/chosen": -2.179682493209839, "logits/rejected": -2.1770095825195312, "logps/chosen": -1.9611966609954834, "logps/rejected": -2.8985209465026855, "loss": 0.5933, "rewards/accuracies": 1.0, "rewards/chosen": 0.9173359274864197, "rewards/margins": 0.33141231536865234, "rewards/rejected": 0.5859236121177673, "step": 1181 }, { "epoch": 2.99, "learning_rate": 1.6820533686179306e-12, "logits/chosen": -2.097874879837036, "logits/rejected": -2.090695381164551, "logps/chosen": -1.6098887920379639, "logps/rejected": -2.895155191421509, "loss": 0.6838, "rewards/accuracies": 0.0, "rewards/chosen": 0.6401827931404114, "rewards/margins": -0.400221049785614, "rewards/rejected": 1.0404038429260254, "step": 1182 }, { "epoch": 2.99, "learning_rate": 7.475816036051075e-13, "logits/chosen": -2.3657631874084473, "logits/rejected": -2.366037607192993, "logps/chosen": -3.4146664142608643, "logps/rejected": -9.560955047607422, "loss": 0.5957, "rewards/accuracies": 0.0, "rewards/chosen": 0.5638518333435059, "rewards/margins": -0.24418634176254272, "rewards/rejected": 0.8080381751060486, "step": 1183 }, { "epoch": 3.0, "learning_rate": 1.8689575020380822e-13, "logits/chosen": -2.2760543823242188, "logits/rejected": -2.2880561351776123, "logps/chosen": -8.10428237915039, "logps/rejected": -3.056144952774048, "loss": 0.5972, "rewards/accuracies": 1.0, "rewards/chosen": 1.2010780572891235, "rewards/margins": 0.6407520174980164, "rewards/rejected": 0.5603260397911072, "step": 1184 }, { "epoch": 3.0, "learning_rate": 0.0, "logits/chosen": -2.2641549110412598, "logits/rejected": -2.2684202194213867, "logps/chosen": -2.353339433670044, "logps/rejected": -16.921279907226562, "loss": 0.6827, "rewards/accuracies": 0.0, "rewards/chosen": 0.7029343843460083, "rewards/margins": -0.08756780624389648, "rewards/rejected": 0.7905021905899048, "step": 1185 }, { "epoch": 3.0, "step": 1185, "total_flos": 0.0, "train_loss": 0.6561455908706922, "train_runtime": 19149.504, "train_samples_per_second": 0.247, "train_steps_per_second": 0.062 } ], "logging_steps": 1.0, "max_steps": 1185, "num_train_epochs": 3, "save_steps": 200, "total_flos": 0.0, "trial_name": null, "trial_params": null }